Skip to main content

core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
18pub const fn _mm512_abs_epi16(a: __m512i) -> __m512i {
19    unsafe {
20        let a = a.as_i16x32();
21        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
22        transmute(simd_select(cmp, a, simd_neg(a)))
23    }
24}
25
26/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
29#[inline]
30#[target_feature(enable = "avx512bw")]
31#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32#[cfg_attr(test, assert_instr(vpabsw))]
33#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34pub const fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
35    unsafe {
36        let abs = _mm512_abs_epi16(a).as_i16x32();
37        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
38    }
39}
40
41/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
42///
43/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
44#[inline]
45#[target_feature(enable = "avx512bw")]
46#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
49pub const fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
50    unsafe {
51        let abs = _mm512_abs_epi16(a).as_i16x32();
52        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
53    }
54}
55
56/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
59#[inline]
60#[target_feature(enable = "avx512bw,avx512vl")]
61#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
62#[cfg_attr(test, assert_instr(vpabsw))]
63#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
64pub const fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
65    unsafe {
66        let abs = _mm256_abs_epi16(a).as_i16x16();
67        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
68    }
69}
70
71/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
74#[inline]
75#[target_feature(enable = "avx512bw,avx512vl")]
76#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
77#[cfg_attr(test, assert_instr(vpabsw))]
78#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
79pub const fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
80    unsafe {
81        let abs = _mm256_abs_epi16(a).as_i16x16();
82        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
83    }
84}
85
86/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
87///
88/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
89#[inline]
90#[target_feature(enable = "avx512bw,avx512vl")]
91#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92#[cfg_attr(test, assert_instr(vpabsw))]
93#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
94pub const fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
95    unsafe {
96        let abs = _mm_abs_epi16(a).as_i16x8();
97        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
98    }
99}
100
101/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
102///
103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
104#[inline]
105#[target_feature(enable = "avx512bw,avx512vl")]
106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
107#[cfg_attr(test, assert_instr(vpabsw))]
108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
109pub const fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi16(a).as_i16x8();
112        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
119#[inline]
120#[target_feature(enable = "avx512bw")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsb))]
123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
124pub const fn _mm512_abs_epi8(a: __m512i) -> __m512i {
125    unsafe {
126        let a = a.as_i8x64();
127        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
128        transmute(simd_select(cmp, a, simd_neg(a)))
129    }
130}
131
132/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
135#[inline]
136#[target_feature(enable = "avx512bw")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vpabsb))]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
141    unsafe {
142        let abs = _mm512_abs_epi8(a).as_i8x64();
143        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
144    }
145}
146
147/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
150#[inline]
151#[target_feature(enable = "avx512bw")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpabsb))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
156    unsafe {
157        let abs = _mm512_abs_epi8(a).as_i8x64();
158        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
159    }
160}
161
162/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
163///
164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
165#[inline]
166#[target_feature(enable = "avx512bw,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpabsb))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
171    unsafe {
172        let abs = _mm256_abs_epi8(a).as_i8x32();
173        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
174    }
175}
176
177/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
180#[inline]
181#[target_feature(enable = "avx512bw,avx512vl")]
182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
183#[cfg_attr(test, assert_instr(vpabsb))]
184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
185pub const fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
186    unsafe {
187        let abs = _mm256_abs_epi8(a).as_i8x32();
188        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
189    }
190}
191
192/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
195#[inline]
196#[target_feature(enable = "avx512bw,avx512vl")]
197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
198#[cfg_attr(test, assert_instr(vpabsb))]
199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
200pub const fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
201    unsafe {
202        let abs = _mm_abs_epi8(a).as_i8x16();
203        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
204    }
205}
206
207/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
210#[inline]
211#[target_feature(enable = "avx512bw,avx512vl")]
212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
213#[cfg_attr(test, assert_instr(vpabsb))]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
216    unsafe {
217        let abs = _mm_abs_epi8(a).as_i8x16();
218        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
219    }
220}
221
222/// Add packed 16-bit integers in a and b, and store the results in dst.
223///
224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
225#[inline]
226#[target_feature(enable = "avx512bw")]
227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
228#[cfg_attr(test, assert_instr(vpaddw))]
229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
230pub const fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
231    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
232}
233
234/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
235///
236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
237#[inline]
238#[target_feature(enable = "avx512bw")]
239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
240#[cfg_attr(test, assert_instr(vpaddw))]
241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
242pub const fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
243    unsafe {
244        let add = _mm512_add_epi16(a, b).as_i16x32();
245        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
246    }
247}
248
249/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
252#[inline]
253#[target_feature(enable = "avx512bw")]
254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
255#[cfg_attr(test, assert_instr(vpaddw))]
256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
257pub const fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
258    unsafe {
259        let add = _mm512_add_epi16(a, b).as_i16x32();
260        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
261    }
262}
263
264/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
267#[inline]
268#[target_feature(enable = "avx512bw,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpaddw))]
271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
272pub const fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
273    unsafe {
274        let add = _mm256_add_epi16(a, b).as_i16x16();
275        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
276    }
277}
278
279/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
282#[inline]
283#[target_feature(enable = "avx512bw,avx512vl")]
284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
285#[cfg_attr(test, assert_instr(vpaddw))]
286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
287pub const fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
288    unsafe {
289        let add = _mm256_add_epi16(a, b).as_i16x16();
290        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
291    }
292}
293
294/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
297#[inline]
298#[target_feature(enable = "avx512bw,avx512vl")]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[cfg_attr(test, assert_instr(vpaddw))]
301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
302pub const fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
303    unsafe {
304        let add = _mm_add_epi16(a, b).as_i16x8();
305        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
306    }
307}
308
309/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
312#[inline]
313#[target_feature(enable = "avx512bw,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpaddw))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
318    unsafe {
319        let add = _mm_add_epi16(a, b).as_i16x8();
320        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
321    }
322}
323
324/// Add packed 8-bit integers in a and b, and store the results in dst.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
327#[inline]
328#[target_feature(enable = "avx512bw")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpaddb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
333    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
334}
335
336/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
339#[inline]
340#[target_feature(enable = "avx512bw")]
341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
342#[cfg_attr(test, assert_instr(vpaddb))]
343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
344pub const fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
345    unsafe {
346        let add = _mm512_add_epi8(a, b).as_i8x64();
347        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
348    }
349}
350
351/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
354#[inline]
355#[target_feature(enable = "avx512bw")]
356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
357#[cfg_attr(test, assert_instr(vpaddb))]
358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
359pub const fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
360    unsafe {
361        let add = _mm512_add_epi8(a, b).as_i8x64();
362        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
363    }
364}
365
366/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
367///
368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
369#[inline]
370#[target_feature(enable = "avx512bw,avx512vl")]
371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
372#[cfg_attr(test, assert_instr(vpaddb))]
373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
374pub const fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
375    unsafe {
376        let add = _mm256_add_epi8(a, b).as_i8x32();
377        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
378    }
379}
380
381/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
384#[inline]
385#[target_feature(enable = "avx512bw,avx512vl")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpaddb))]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
390    unsafe {
391        let add = _mm256_add_epi8(a, b).as_i8x32();
392        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
393    }
394}
395
396/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
399#[inline]
400#[target_feature(enable = "avx512bw,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpaddb))]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
405    unsafe {
406        let add = _mm_add_epi8(a, b).as_i8x16();
407        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
408    }
409}
410
411/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
412///
413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
414#[inline]
415#[target_feature(enable = "avx512bw,avx512vl")]
416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
417#[cfg_attr(test, assert_instr(vpaddb))]
418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
419pub const fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
420    unsafe {
421        let add = _mm_add_epi8(a, b).as_i8x16();
422        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
423    }
424}
425
426/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
427///
428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
429#[inline]
430#[target_feature(enable = "avx512bw")]
431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
432#[cfg_attr(test, assert_instr(vpaddusw))]
433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
434pub const fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
435    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
436}
437
438/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
439///
440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
441#[inline]
442#[target_feature(enable = "avx512bw")]
443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
444#[cfg_attr(test, assert_instr(vpaddusw))]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
447    unsafe {
448        let add = _mm512_adds_epu16(a, b).as_u16x32();
449        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
450    }
451}
452
453/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
456#[inline]
457#[target_feature(enable = "avx512bw")]
458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
459#[cfg_attr(test, assert_instr(vpaddusw))]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
462    unsafe {
463        let add = _mm512_adds_epu16(a, b).as_u16x32();
464        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
465    }
466}
467
468/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
469///
470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
471#[inline]
472#[target_feature(enable = "avx512bw,avx512vl")]
473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
474#[cfg_attr(test, assert_instr(vpaddusw))]
475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
476pub const fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
477    unsafe {
478        let add = _mm256_adds_epu16(a, b).as_u16x16();
479        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
480    }
481}
482
483/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
486#[inline]
487#[target_feature(enable = "avx512bw,avx512vl")]
488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
489#[cfg_attr(test, assert_instr(vpaddusw))]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491pub const fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
492    unsafe {
493        let add = _mm256_adds_epu16(a, b).as_u16x16();
494        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
495    }
496}
497
498/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
499///
500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
501#[inline]
502#[target_feature(enable = "avx512bw,avx512vl")]
503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
504#[cfg_attr(test, assert_instr(vpaddusw))]
505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
506pub const fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
507    unsafe {
508        let add = _mm_adds_epu16(a, b).as_u16x8();
509        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
510    }
511}
512
513/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
516#[inline]
517#[target_feature(enable = "avx512bw,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vpaddusw))]
520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
521pub const fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
522    unsafe {
523        let add = _mm_adds_epu16(a, b).as_u16x8();
524        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
525    }
526}
527
528/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
529///
530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
531#[inline]
532#[target_feature(enable = "avx512bw")]
533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
534#[cfg_attr(test, assert_instr(vpaddusb))]
535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
536pub const fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
537    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
538}
539
540/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
541///
542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
543#[inline]
544#[target_feature(enable = "avx512bw")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vpaddusb))]
547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
548pub const fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
549    unsafe {
550        let add = _mm512_adds_epu8(a, b).as_u8x64();
551        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
552    }
553}
554
555/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
558#[inline]
559#[target_feature(enable = "avx512bw")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vpaddusb))]
562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
563pub const fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
564    unsafe {
565        let add = _mm512_adds_epu8(a, b).as_u8x64();
566        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
567    }
568}
569
570/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
571///
572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
573#[inline]
574#[target_feature(enable = "avx512bw,avx512vl")]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[cfg_attr(test, assert_instr(vpaddusb))]
577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
578pub const fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
579    unsafe {
580        let add = _mm256_adds_epu8(a, b).as_u8x32();
581        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
582    }
583}
584
585/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
586///
587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
588#[inline]
589#[target_feature(enable = "avx512bw,avx512vl")]
590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
591#[cfg_attr(test, assert_instr(vpaddusb))]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
594    unsafe {
595        let add = _mm256_adds_epu8(a, b).as_u8x32();
596        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
597    }
598}
599
600/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
603#[inline]
604#[target_feature(enable = "avx512bw,avx512vl")]
605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
606#[cfg_attr(test, assert_instr(vpaddusb))]
607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
608pub const fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
609    unsafe {
610        let add = _mm_adds_epu8(a, b).as_u8x16();
611        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
612    }
613}
614
615/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
616///
617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
618#[inline]
619#[target_feature(enable = "avx512bw,avx512vl")]
620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
621#[cfg_attr(test, assert_instr(vpaddusb))]
622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
623pub const fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
624    unsafe {
625        let add = _mm_adds_epu8(a, b).as_u8x16();
626        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
627    }
628}
629
630/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
633#[inline]
634#[target_feature(enable = "avx512bw")]
635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
636#[cfg_attr(test, assert_instr(vpaddsw))]
637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
638pub const fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
639    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
640}
641
642/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
643///
644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
645#[inline]
646#[target_feature(enable = "avx512bw")]
647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
648#[cfg_attr(test, assert_instr(vpaddsw))]
649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
650pub const fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
651    unsafe {
652        let add = _mm512_adds_epi16(a, b).as_i16x32();
653        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
654    }
655}
656
657/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
660#[inline]
661#[target_feature(enable = "avx512bw")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(vpaddsw))]
664#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
665pub const fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
666    unsafe {
667        let add = _mm512_adds_epi16(a, b).as_i16x32();
668        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
669    }
670}
671
672/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
675#[inline]
676#[target_feature(enable = "avx512bw,avx512vl")]
677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
678#[cfg_attr(test, assert_instr(vpaddsw))]
679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
680pub const fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
681    unsafe {
682        let add = _mm256_adds_epi16(a, b).as_i16x16();
683        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
684    }
685}
686
687/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
690#[inline]
691#[target_feature(enable = "avx512bw,avx512vl")]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[cfg_attr(test, assert_instr(vpaddsw))]
694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
695pub const fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
696    unsafe {
697        let add = _mm256_adds_epi16(a, b).as_i16x16();
698        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
699    }
700}
701
702/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
703///
704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
705#[inline]
706#[target_feature(enable = "avx512bw,avx512vl")]
707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
708#[cfg_attr(test, assert_instr(vpaddsw))]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
711    unsafe {
712        let add = _mm_adds_epi16(a, b).as_i16x8();
713        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
714    }
715}
716
717/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
720#[inline]
721#[target_feature(enable = "avx512bw,avx512vl")]
722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
723#[cfg_attr(test, assert_instr(vpaddsw))]
724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
725pub const fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
726    unsafe {
727        let add = _mm_adds_epi16(a, b).as_i16x8();
728        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
729    }
730}
731
732/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
733///
734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
735#[inline]
736#[target_feature(enable = "avx512bw")]
737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
738#[cfg_attr(test, assert_instr(vpaddsb))]
739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
740pub const fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
741    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
742}
743
744/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
745///
746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
747#[inline]
748#[target_feature(enable = "avx512bw")]
749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
750#[cfg_attr(test, assert_instr(vpaddsb))]
751#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
752pub const fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_adds_epi8(a, b).as_i8x64();
755        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
756    }
757}
758
759/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
762#[inline]
763#[target_feature(enable = "avx512bw")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddsb))]
766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
767pub const fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
768    unsafe {
769        let add = _mm512_adds_epi8(a, b).as_i8x64();
770        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
771    }
772}
773
774/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
775///
776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
777#[inline]
778#[target_feature(enable = "avx512bw,avx512vl")]
779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
780#[cfg_attr(test, assert_instr(vpaddsb))]
781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
782pub const fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
783    unsafe {
784        let add = _mm256_adds_epi8(a, b).as_i8x32();
785        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
786    }
787}
788
789/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
792#[inline]
793#[target_feature(enable = "avx512bw,avx512vl")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpaddsb))]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
798    unsafe {
799        let add = _mm256_adds_epi8(a, b).as_i8x32();
800        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
801    }
802}
803
804/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
805///
806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
807#[inline]
808#[target_feature(enable = "avx512bw,avx512vl")]
809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
810#[cfg_attr(test, assert_instr(vpaddsb))]
811#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
812pub const fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
813    unsafe {
814        let add = _mm_adds_epi8(a, b).as_i8x16();
815        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
816    }
817}
818
819/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
820///
821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
822#[inline]
823#[target_feature(enable = "avx512bw,avx512vl")]
824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
825#[cfg_attr(test, assert_instr(vpaddsb))]
826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
827pub const fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
828    unsafe {
829        let add = _mm_adds_epi8(a, b).as_i8x16();
830        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
831    }
832}
833
834/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
835///
836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
837#[inline]
838#[target_feature(enable = "avx512bw")]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[cfg_attr(test, assert_instr(vpsubw))]
841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
842pub const fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
843    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
844}
845
846/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
849#[inline]
850#[target_feature(enable = "avx512bw")]
851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
852#[cfg_attr(test, assert_instr(vpsubw))]
853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
854pub const fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
855    unsafe {
856        let sub = _mm512_sub_epi16(a, b).as_i16x32();
857        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
858    }
859}
860
861/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
864#[inline]
865#[target_feature(enable = "avx512bw")]
866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
867#[cfg_attr(test, assert_instr(vpsubw))]
868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
869pub const fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
870    unsafe {
871        let sub = _mm512_sub_epi16(a, b).as_i16x32();
872        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
873    }
874}
875
876/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
879#[inline]
880#[target_feature(enable = "avx512bw,avx512vl")]
881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
882#[cfg_attr(test, assert_instr(vpsubw))]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
885    unsafe {
886        let sub = _mm256_sub_epi16(a, b).as_i16x16();
887        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
888    }
889}
890
891/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
892///
893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
894#[inline]
895#[target_feature(enable = "avx512bw,avx512vl")]
896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
897#[cfg_attr(test, assert_instr(vpsubw))]
898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
899pub const fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
900    unsafe {
901        let sub = _mm256_sub_epi16(a, b).as_i16x16();
902        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
903    }
904}
905
906/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
909#[inline]
910#[target_feature(enable = "avx512bw,avx512vl")]
911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
912#[cfg_attr(test, assert_instr(vpsubw))]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
915    unsafe {
916        let sub = _mm_sub_epi16(a, b).as_i16x8();
917        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
918    }
919}
920
921/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
924#[inline]
925#[target_feature(enable = "avx512bw,avx512vl")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vpsubw))]
928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
929pub const fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
930    unsafe {
931        let sub = _mm_sub_epi16(a, b).as_i16x8();
932        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
933    }
934}
935
936/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
937///
938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
939#[inline]
940#[target_feature(enable = "avx512bw")]
941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
942#[cfg_attr(test, assert_instr(vpsubb))]
943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
944pub const fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
945    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
946}
947
948/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
949///
950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
951#[inline]
952#[target_feature(enable = "avx512bw")]
953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
954#[cfg_attr(test, assert_instr(vpsubb))]
955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
956pub const fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
957    unsafe {
958        let sub = _mm512_sub_epi8(a, b).as_i8x64();
959        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
960    }
961}
962
963/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
966#[inline]
967#[target_feature(enable = "avx512bw")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vpsubb))]
970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
971pub const fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
972    unsafe {
973        let sub = _mm512_sub_epi8(a, b).as_i8x64();
974        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
975    }
976}
977
978/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
979///
980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
981#[inline]
982#[target_feature(enable = "avx512bw,avx512vl")]
983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
984#[cfg_attr(test, assert_instr(vpsubb))]
985#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
986pub const fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
987    unsafe {
988        let sub = _mm256_sub_epi8(a, b).as_i8x32();
989        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
990    }
991}
992
993/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
996#[inline]
997#[target_feature(enable = "avx512bw,avx512vl")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubb))]
1000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1001pub const fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1002    unsafe {
1003        let sub = _mm256_sub_epi8(a, b).as_i8x32();
1004        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1005    }
1006}
1007
1008/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1009///
1010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
1011#[inline]
1012#[target_feature(enable = "avx512bw,avx512vl")]
1013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1014#[cfg_attr(test, assert_instr(vpsubb))]
1015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1016pub const fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1017    unsafe {
1018        let sub = _mm_sub_epi8(a, b).as_i8x16();
1019        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1020    }
1021}
1022
1023/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
1026#[inline]
1027#[target_feature(enable = "avx512bw,avx512vl")]
1028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029#[cfg_attr(test, assert_instr(vpsubb))]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1032    unsafe {
1033        let sub = _mm_sub_epi8(a, b).as_i8x16();
1034        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1035    }
1036}
1037
1038/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
1039///
1040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
1041#[inline]
1042#[target_feature(enable = "avx512bw")]
1043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1044#[cfg_attr(test, assert_instr(vpsubusw))]
1045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1046pub const fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
1047    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
1048}
1049
1050/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1051///
1052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
1053#[inline]
1054#[target_feature(enable = "avx512bw")]
1055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1056#[cfg_attr(test, assert_instr(vpsubusw))]
1057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1058pub const fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1059    unsafe {
1060        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1061        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
1062    }
1063}
1064
1065/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1066///
1067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
1068#[inline]
1069#[target_feature(enable = "avx512bw")]
1070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1071#[cfg_attr(test, assert_instr(vpsubusw))]
1072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1073pub const fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1074    unsafe {
1075        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1076        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1077    }
1078}
1079
1080/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1083#[inline]
1084#[target_feature(enable = "avx512bw,avx512vl")]
1085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086#[cfg_attr(test, assert_instr(vpsubusw))]
1087#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1088pub const fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1089    unsafe {
1090        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1091        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1092    }
1093}
1094
1095/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1096///
1097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1098#[inline]
1099#[target_feature(enable = "avx512bw,avx512vl")]
1100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1101#[cfg_attr(test, assert_instr(vpsubusw))]
1102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1103pub const fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1104    unsafe {
1105        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1106        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1107    }
1108}
1109
1110/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1113#[inline]
1114#[target_feature(enable = "avx512bw,avx512vl")]
1115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1116#[cfg_attr(test, assert_instr(vpsubusw))]
1117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1118pub const fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1119    unsafe {
1120        let sub = _mm_subs_epu16(a, b).as_u16x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1122    }
1123}
1124
1125/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1128#[inline]
1129#[target_feature(enable = "avx512bw,avx512vl")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubusw))]
1132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1133pub const fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1134    unsafe {
1135        let sub = _mm_subs_epu16(a, b).as_u16x8();
1136        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1137    }
1138}
1139
1140/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1141///
1142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1143#[inline]
1144#[target_feature(enable = "avx512bw")]
1145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1146#[cfg_attr(test, assert_instr(vpsubusb))]
1147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1148pub const fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1149    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1150}
1151
1152/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1153///
1154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1155#[inline]
1156#[target_feature(enable = "avx512bw")]
1157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158#[cfg_attr(test, assert_instr(vpsubusb))]
1159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1160pub const fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1161    unsafe {
1162        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1163        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1164    }
1165}
1166
1167/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1170#[inline]
1171#[target_feature(enable = "avx512bw")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubusb))]
1174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1175pub const fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1176    unsafe {
1177        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1178        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1179    }
1180}
1181
1182/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1183///
1184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1185#[inline]
1186#[target_feature(enable = "avx512bw,avx512vl")]
1187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1188#[cfg_attr(test, assert_instr(vpsubusb))]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1191    unsafe {
1192        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1193        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1194    }
1195}
1196
1197/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubusb))]
1204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1205pub const fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1206    unsafe {
1207        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1208        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1209    }
1210}
1211
1212/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1213///
1214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1215#[inline]
1216#[target_feature(enable = "avx512bw,avx512vl")]
1217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1218#[cfg_attr(test, assert_instr(vpsubusb))]
1219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1220pub const fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1221    unsafe {
1222        let sub = _mm_subs_epu8(a, b).as_u8x16();
1223        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1224    }
1225}
1226
1227/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1228///
1229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1230#[inline]
1231#[target_feature(enable = "avx512bw,avx512vl")]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[cfg_attr(test, assert_instr(vpsubusb))]
1234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1235pub const fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1236    unsafe {
1237        let sub = _mm_subs_epu8(a, b).as_u8x16();
1238        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1239    }
1240}
1241
1242/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1243///
1244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1245#[inline]
1246#[target_feature(enable = "avx512bw")]
1247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1248#[cfg_attr(test, assert_instr(vpsubsw))]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1251    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1252}
1253
1254/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1255///
1256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1257#[inline]
1258#[target_feature(enable = "avx512bw")]
1259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1260#[cfg_attr(test, assert_instr(vpsubsw))]
1261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1262pub const fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1263    unsafe {
1264        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1265        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1266    }
1267}
1268
1269/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1270///
1271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1272#[inline]
1273#[target_feature(enable = "avx512bw")]
1274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1275#[cfg_attr(test, assert_instr(vpsubsw))]
1276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1277pub const fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1278    unsafe {
1279        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1280        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1281    }
1282}
1283
1284/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1285///
1286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1287#[inline]
1288#[target_feature(enable = "avx512bw,avx512vl")]
1289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1290#[cfg_attr(test, assert_instr(vpsubsw))]
1291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1292pub const fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1293    unsafe {
1294        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1295        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1296    }
1297}
1298
1299/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1300///
1301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1302#[inline]
1303#[target_feature(enable = "avx512bw,avx512vl")]
1304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1305#[cfg_attr(test, assert_instr(vpsubsw))]
1306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1307pub const fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1308    unsafe {
1309        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1310        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1311    }
1312}
1313
1314/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1315///
1316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1317#[inline]
1318#[target_feature(enable = "avx512bw,avx512vl")]
1319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1320#[cfg_attr(test, assert_instr(vpsubsw))]
1321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1322pub const fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1323    unsafe {
1324        let sub = _mm_subs_epi16(a, b).as_i16x8();
1325        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1326    }
1327}
1328
1329/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1332#[inline]
1333#[target_feature(enable = "avx512bw,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vpsubsw))]
1336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1337pub const fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1338    unsafe {
1339        let sub = _mm_subs_epi16(a, b).as_i16x8();
1340        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1341    }
1342}
1343
1344/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1347#[inline]
1348#[target_feature(enable = "avx512bw")]
1349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1350#[cfg_attr(test, assert_instr(vpsubsb))]
1351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1352pub const fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1353    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1354}
1355
1356/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1359#[inline]
1360#[target_feature(enable = "avx512bw")]
1361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1362#[cfg_attr(test, assert_instr(vpsubsb))]
1363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1364pub const fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1365    unsafe {
1366        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1367        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1368    }
1369}
1370
1371/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1374#[inline]
1375#[target_feature(enable = "avx512bw")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vpsubsb))]
1378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1379pub const fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1380    unsafe {
1381        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1382        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1383    }
1384}
1385
1386/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1389#[inline]
1390#[target_feature(enable = "avx512bw,avx512vl")]
1391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1392#[cfg_attr(test, assert_instr(vpsubsb))]
1393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1394pub const fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1395    unsafe {
1396        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1397        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1398    }
1399}
1400
1401/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1402///
1403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1404#[inline]
1405#[target_feature(enable = "avx512bw,avx512vl")]
1406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1407#[cfg_attr(test, assert_instr(vpsubsb))]
1408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1409pub const fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1410    unsafe {
1411        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1412        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1413    }
1414}
1415
1416/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1417///
1418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1419#[inline]
1420#[target_feature(enable = "avx512bw,avx512vl")]
1421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1422#[cfg_attr(test, assert_instr(vpsubsb))]
1423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1424pub const fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1425    unsafe {
1426        let sub = _mm_subs_epi8(a, b).as_i8x16();
1427        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1428    }
1429}
1430
1431/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1434#[inline]
1435#[target_feature(enable = "avx512bw,avx512vl")]
1436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1437#[cfg_attr(test, assert_instr(vpsubsb))]
1438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1439pub const fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1440    unsafe {
1441        let sub = _mm_subs_epi8(a, b).as_i8x16();
1442        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1443    }
1444}
1445
1446/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1447///
1448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1449#[inline]
1450#[target_feature(enable = "avx512bw")]
1451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1452#[cfg_attr(test, assert_instr(vpmulhuw))]
1453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1454pub const fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1455    unsafe {
1456        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1457        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1458        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1459        transmute(simd_cast::<u32x32, u16x32>(r))
1460    }
1461}
1462
1463/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1464///
1465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1466#[inline]
1467#[target_feature(enable = "avx512bw")]
1468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1469#[cfg_attr(test, assert_instr(vpmulhuw))]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_mulhi_epu16(
1472    src: __m512i,
1473    k: __mmask32,
1474    a: __m512i,
1475    b: __m512i,
1476) -> __m512i {
1477    unsafe {
1478        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1479        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1480    }
1481}
1482
1483/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1484///
1485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1486#[inline]
1487#[target_feature(enable = "avx512bw")]
1488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1489#[cfg_attr(test, assert_instr(vpmulhuw))]
1490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1491pub const fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1492    unsafe {
1493        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1494        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1495    }
1496}
1497
1498/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1499///
1500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1501#[inline]
1502#[target_feature(enable = "avx512bw,avx512vl")]
1503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1504#[cfg_attr(test, assert_instr(vpmulhuw))]
1505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1506pub const fn _mm256_mask_mulhi_epu16(
1507    src: __m256i,
1508    k: __mmask16,
1509    a: __m256i,
1510    b: __m256i,
1511) -> __m256i {
1512    unsafe {
1513        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1514        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1515    }
1516}
1517
1518/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1519///
1520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1521#[inline]
1522#[target_feature(enable = "avx512bw,avx512vl")]
1523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1524#[cfg_attr(test, assert_instr(vpmulhuw))]
1525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1526pub const fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1527    unsafe {
1528        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1529        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1530    }
1531}
1532
1533/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1534///
1535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1536#[inline]
1537#[target_feature(enable = "avx512bw,avx512vl")]
1538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1539#[cfg_attr(test, assert_instr(vpmulhuw))]
1540#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1541pub const fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1544        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1545    }
1546}
1547
1548/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1551#[inline]
1552#[target_feature(enable = "avx512bw,avx512vl")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhuw))]
1555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1556pub const fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1557    unsafe {
1558        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1559        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1560    }
1561}
1562
1563/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1564///
1565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1566#[inline]
1567#[target_feature(enable = "avx512bw")]
1568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1569#[cfg_attr(test, assert_instr(vpmulhw))]
1570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1571pub const fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1572    unsafe {
1573        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1574        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1575        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1576        transmute(simd_cast::<i32x32, i16x32>(r))
1577    }
1578}
1579
1580/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1581///
1582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1583#[inline]
1584#[target_feature(enable = "avx512bw")]
1585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1586#[cfg_attr(test, assert_instr(vpmulhw))]
1587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1588pub const fn _mm512_mask_mulhi_epi16(
1589    src: __m512i,
1590    k: __mmask32,
1591    a: __m512i,
1592    b: __m512i,
1593) -> __m512i {
1594    unsafe {
1595        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1596        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1597    }
1598}
1599
1600/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1601///
1602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1603#[inline]
1604#[target_feature(enable = "avx512bw")]
1605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1606#[cfg_attr(test, assert_instr(vpmulhw))]
1607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1608pub const fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1609    unsafe {
1610        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1611        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1612    }
1613}
1614
1615/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhw))]
1622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1623pub const fn _mm256_mask_mulhi_epi16(
1624    src: __m256i,
1625    k: __mmask16,
1626    a: __m256i,
1627    b: __m256i,
1628) -> __m256i {
1629    unsafe {
1630        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1631        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1632    }
1633}
1634
1635/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1636///
1637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1638#[inline]
1639#[target_feature(enable = "avx512bw,avx512vl")]
1640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1641#[cfg_attr(test, assert_instr(vpmulhw))]
1642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1643pub const fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1644    unsafe {
1645        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1646        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1647    }
1648}
1649
1650/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1653#[inline]
1654#[target_feature(enable = "avx512bw,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmulhw))]
1657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1658pub const fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1659    unsafe {
1660        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1661        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1662    }
1663}
1664
1665/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1666///
1667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1668#[inline]
1669#[target_feature(enable = "avx512bw,avx512vl")]
1670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1671#[cfg_attr(test, assert_instr(vpmulhw))]
1672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1673pub const fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1674    unsafe {
1675        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1676        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1677    }
1678}
1679
1680/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1683#[inline]
1684#[target_feature(enable = "avx512bw")]
1685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1686#[cfg_attr(test, assert_instr(vpmulhrsw))]
1687pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1688    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1689}
1690
1691/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1692///
1693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1694#[inline]
1695#[target_feature(enable = "avx512bw")]
1696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1697#[cfg_attr(test, assert_instr(vpmulhrsw))]
1698pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1699    unsafe {
1700        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1701        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1702    }
1703}
1704
1705/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1708#[inline]
1709#[target_feature(enable = "avx512bw")]
1710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1711#[cfg_attr(test, assert_instr(vpmulhrsw))]
1712pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1713    unsafe {
1714        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1715        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1716    }
1717}
1718
1719/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1720///
1721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1722#[inline]
1723#[target_feature(enable = "avx512bw,avx512vl")]
1724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1725#[cfg_attr(test, assert_instr(vpmulhrsw))]
1726pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1727    unsafe {
1728        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1729        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1730    }
1731}
1732
1733/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1734///
1735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1736#[inline]
1737#[target_feature(enable = "avx512bw,avx512vl")]
1738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1739#[cfg_attr(test, assert_instr(vpmulhrsw))]
1740pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1741    unsafe {
1742        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1743        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1744    }
1745}
1746
1747/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1748///
1749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1750#[inline]
1751#[target_feature(enable = "avx512bw,avx512vl")]
1752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1753#[cfg_attr(test, assert_instr(vpmulhrsw))]
1754pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1755    unsafe {
1756        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1757        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1758    }
1759}
1760
1761/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1764#[inline]
1765#[target_feature(enable = "avx512bw,avx512vl")]
1766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1767#[cfg_attr(test, assert_instr(vpmulhrsw))]
1768pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1769    unsafe {
1770        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1771        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1772    }
1773}
1774
1775/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1778#[inline]
1779#[target_feature(enable = "avx512bw")]
1780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1781#[cfg_attr(test, assert_instr(vpmullw))]
1782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1783pub const fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1784    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1785}
1786
1787/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1790#[inline]
1791#[target_feature(enable = "avx512bw")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vpmullw))]
1794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1795pub const fn _mm512_mask_mullo_epi16(
1796    src: __m512i,
1797    k: __mmask32,
1798    a: __m512i,
1799    b: __m512i,
1800) -> __m512i {
1801    unsafe {
1802        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1803        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1804    }
1805}
1806
1807/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1808///
1809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1810#[inline]
1811#[target_feature(enable = "avx512bw")]
1812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1813#[cfg_attr(test, assert_instr(vpmullw))]
1814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1815pub const fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1816    unsafe {
1817        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1818        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1819    }
1820}
1821
1822/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1823///
1824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1825#[inline]
1826#[target_feature(enable = "avx512bw,avx512vl")]
1827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1828#[cfg_attr(test, assert_instr(vpmullw))]
1829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1830pub const fn _mm256_mask_mullo_epi16(
1831    src: __m256i,
1832    k: __mmask16,
1833    a: __m256i,
1834    b: __m256i,
1835) -> __m256i {
1836    unsafe {
1837        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1838        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1839    }
1840}
1841
1842/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1843///
1844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1845#[inline]
1846#[target_feature(enable = "avx512bw,avx512vl")]
1847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1848#[cfg_attr(test, assert_instr(vpmullw))]
1849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1850pub const fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1851    unsafe {
1852        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1853        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1854    }
1855}
1856
1857/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1858///
1859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1860#[inline]
1861#[target_feature(enable = "avx512bw,avx512vl")]
1862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1863#[cfg_attr(test, assert_instr(vpmullw))]
1864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1865pub const fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1866    unsafe {
1867        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1868        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1869    }
1870}
1871
1872/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1875#[inline]
1876#[target_feature(enable = "avx512bw,avx512vl")]
1877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1878#[cfg_attr(test, assert_instr(vpmullw))]
1879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1880pub const fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1881    unsafe {
1882        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1883        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1884    }
1885}
1886
1887/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1888///
1889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1890#[inline]
1891#[target_feature(enable = "avx512bw")]
1892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1893#[cfg_attr(test, assert_instr(vpmaxuw))]
1894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1895pub const fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1896    unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
1897}
1898
1899/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1900///
1901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1902#[inline]
1903#[target_feature(enable = "avx512bw")]
1904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1905#[cfg_attr(test, assert_instr(vpmaxuw))]
1906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1907pub const fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1908    unsafe {
1909        let max = _mm512_max_epu16(a, b).as_u16x32();
1910        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1911    }
1912}
1913
1914/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1915///
1916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1917#[inline]
1918#[target_feature(enable = "avx512bw")]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[cfg_attr(test, assert_instr(vpmaxuw))]
1921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1922pub const fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1923    unsafe {
1924        let max = _mm512_max_epu16(a, b).as_u16x32();
1925        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1926    }
1927}
1928
1929/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1930///
1931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1932#[inline]
1933#[target_feature(enable = "avx512bw,avx512vl")]
1934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1935#[cfg_attr(test, assert_instr(vpmaxuw))]
1936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1937pub const fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1938    unsafe {
1939        let max = _mm256_max_epu16(a, b).as_u16x16();
1940        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1941    }
1942}
1943
1944/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1945///
1946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1947#[inline]
1948#[target_feature(enable = "avx512bw,avx512vl")]
1949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1950#[cfg_attr(test, assert_instr(vpmaxuw))]
1951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1952pub const fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1953    unsafe {
1954        let max = _mm256_max_epu16(a, b).as_u16x16();
1955        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1956    }
1957}
1958
1959/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1960///
1961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1962#[inline]
1963#[target_feature(enable = "avx512bw,avx512vl")]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[cfg_attr(test, assert_instr(vpmaxuw))]
1966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1967pub const fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1968    unsafe {
1969        let max = _mm_max_epu16(a, b).as_u16x8();
1970        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1971    }
1972}
1973
1974/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1975///
1976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1977#[inline]
1978#[target_feature(enable = "avx512bw,avx512vl")]
1979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1980#[cfg_attr(test, assert_instr(vpmaxuw))]
1981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1982pub const fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1983    unsafe {
1984        let max = _mm_max_epu16(a, b).as_u16x8();
1985        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1986    }
1987}
1988
1989/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1990///
1991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1992#[inline]
1993#[target_feature(enable = "avx512bw")]
1994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1995#[cfg_attr(test, assert_instr(vpmaxub))]
1996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1997pub const fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1998    unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
1999}
2000
2001/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2002///
2003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
2004#[inline]
2005#[target_feature(enable = "avx512bw")]
2006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2007#[cfg_attr(test, assert_instr(vpmaxub))]
2008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2009pub const fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2010    unsafe {
2011        let max = _mm512_max_epu8(a, b).as_u8x64();
2012        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
2013    }
2014}
2015
2016/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
2019#[inline]
2020#[target_feature(enable = "avx512bw")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vpmaxub))]
2023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2024pub const fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2025    unsafe {
2026        let max = _mm512_max_epu8(a, b).as_u8x64();
2027        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
2028    }
2029}
2030
2031/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
2034#[inline]
2035#[target_feature(enable = "avx512bw,avx512vl")]
2036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037#[cfg_attr(test, assert_instr(vpmaxub))]
2038#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2039pub const fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2040    unsafe {
2041        let max = _mm256_max_epu8(a, b).as_u8x32();
2042        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
2043    }
2044}
2045
2046/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
2049#[inline]
2050#[target_feature(enable = "avx512bw,avx512vl")]
2051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2052#[cfg_attr(test, assert_instr(vpmaxub))]
2053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2054pub const fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2055    unsafe {
2056        let max = _mm256_max_epu8(a, b).as_u8x32();
2057        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
2058    }
2059}
2060
2061/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
2064#[inline]
2065#[target_feature(enable = "avx512bw,avx512vl")]
2066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2067#[cfg_attr(test, assert_instr(vpmaxub))]
2068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2069pub const fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2070    unsafe {
2071        let max = _mm_max_epu8(a, b).as_u8x16();
2072        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
2073    }
2074}
2075
2076/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2077///
2078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
2079#[inline]
2080#[target_feature(enable = "avx512bw,avx512vl")]
2081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2082#[cfg_attr(test, assert_instr(vpmaxub))]
2083#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2084pub const fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2085    unsafe {
2086        let max = _mm_max_epu8(a, b).as_u8x16();
2087        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
2088    }
2089}
2090
2091/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
2092///
2093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
2094#[inline]
2095#[target_feature(enable = "avx512bw")]
2096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2097#[cfg_attr(test, assert_instr(vpmaxsw))]
2098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2099pub const fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
2100    unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2101}
2102
2103/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2104///
2105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
2106#[inline]
2107#[target_feature(enable = "avx512bw")]
2108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2109#[cfg_attr(test, assert_instr(vpmaxsw))]
2110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2111pub const fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2112    unsafe {
2113        let max = _mm512_max_epi16(a, b).as_i16x32();
2114        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
2115    }
2116}
2117
2118/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
2121#[inline]
2122#[target_feature(enable = "avx512bw")]
2123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2124#[cfg_attr(test, assert_instr(vpmaxsw))]
2125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2126pub const fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2127    unsafe {
2128        let max = _mm512_max_epi16(a, b).as_i16x32();
2129        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
2130    }
2131}
2132
2133/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2134///
2135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
2136#[inline]
2137#[target_feature(enable = "avx512bw,avx512vl")]
2138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2139#[cfg_attr(test, assert_instr(vpmaxsw))]
2140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2141pub const fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2142    unsafe {
2143        let max = _mm256_max_epi16(a, b).as_i16x16();
2144        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
2145    }
2146}
2147
2148/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2149///
2150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
2151#[inline]
2152#[target_feature(enable = "avx512bw,avx512vl")]
2153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2154#[cfg_attr(test, assert_instr(vpmaxsw))]
2155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2156pub const fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2157    unsafe {
2158        let max = _mm256_max_epi16(a, b).as_i16x16();
2159        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2160    }
2161}
2162
2163/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2166#[inline]
2167#[target_feature(enable = "avx512bw,avx512vl")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpmaxsw))]
2170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2171pub const fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2172    unsafe {
2173        let max = _mm_max_epi16(a, b).as_i16x8();
2174        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2175    }
2176}
2177
2178/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2179///
2180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2181#[inline]
2182#[target_feature(enable = "avx512bw,avx512vl")]
2183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2184#[cfg_attr(test, assert_instr(vpmaxsw))]
2185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2186pub const fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2187    unsafe {
2188        let max = _mm_max_epi16(a, b).as_i16x8();
2189        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2190    }
2191}
2192
2193/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2194///
2195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2196#[inline]
2197#[target_feature(enable = "avx512bw")]
2198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2199#[cfg_attr(test, assert_instr(vpmaxsb))]
2200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2201pub const fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2202    unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2203}
2204
2205/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2208#[inline]
2209#[target_feature(enable = "avx512bw")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpmaxsb))]
2212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2213pub const fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2214    unsafe {
2215        let max = _mm512_max_epi8(a, b).as_i8x64();
2216        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2217    }
2218}
2219
2220/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2221///
2222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2223#[inline]
2224#[target_feature(enable = "avx512bw")]
2225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2226#[cfg_attr(test, assert_instr(vpmaxsb))]
2227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2228pub const fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2229    unsafe {
2230        let max = _mm512_max_epi8(a, b).as_i8x64();
2231        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2232    }
2233}
2234
2235/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2236///
2237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2238#[inline]
2239#[target_feature(enable = "avx512bw,avx512vl")]
2240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2241#[cfg_attr(test, assert_instr(vpmaxsb))]
2242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2243pub const fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2244    unsafe {
2245        let max = _mm256_max_epi8(a, b).as_i8x32();
2246        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2247    }
2248}
2249
2250/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2253#[inline]
2254#[target_feature(enable = "avx512bw,avx512vl")]
2255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2256#[cfg_attr(test, assert_instr(vpmaxsb))]
2257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2258pub const fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2259    unsafe {
2260        let max = _mm256_max_epi8(a, b).as_i8x32();
2261        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2262    }
2263}
2264
2265/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2266///
2267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2268#[inline]
2269#[target_feature(enable = "avx512bw,avx512vl")]
2270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2271#[cfg_attr(test, assert_instr(vpmaxsb))]
2272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2273pub const fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2274    unsafe {
2275        let max = _mm_max_epi8(a, b).as_i8x16();
2276        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2277    }
2278}
2279
2280/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2281///
2282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2283#[inline]
2284#[target_feature(enable = "avx512bw,avx512vl")]
2285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2286#[cfg_attr(test, assert_instr(vpmaxsb))]
2287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2288pub const fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2289    unsafe {
2290        let max = _mm_max_epi8(a, b).as_i8x16();
2291        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2292    }
2293}
2294
2295/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2296///
2297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2298#[inline]
2299#[target_feature(enable = "avx512bw")]
2300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2301#[cfg_attr(test, assert_instr(vpminuw))]
2302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2303pub const fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2304    unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
2305}
2306
2307/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2310#[inline]
2311#[target_feature(enable = "avx512bw")]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313#[cfg_attr(test, assert_instr(vpminuw))]
2314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2315pub const fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2316    unsafe {
2317        let min = _mm512_min_epu16(a, b).as_u16x32();
2318        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2319    }
2320}
2321
2322/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2323///
2324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2325#[inline]
2326#[target_feature(enable = "avx512bw")]
2327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2328#[cfg_attr(test, assert_instr(vpminuw))]
2329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2330pub const fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2331    unsafe {
2332        let min = _mm512_min_epu16(a, b).as_u16x32();
2333        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2334    }
2335}
2336
2337/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2338///
2339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2340#[inline]
2341#[target_feature(enable = "avx512bw,avx512vl")]
2342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2343#[cfg_attr(test, assert_instr(vpminuw))]
2344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2345pub const fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2346    unsafe {
2347        let min = _mm256_min_epu16(a, b).as_u16x16();
2348        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2349    }
2350}
2351
2352/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2353///
2354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2355#[inline]
2356#[target_feature(enable = "avx512bw,avx512vl")]
2357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2358#[cfg_attr(test, assert_instr(vpminuw))]
2359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2360pub const fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2361    unsafe {
2362        let min = _mm256_min_epu16(a, b).as_u16x16();
2363        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2364    }
2365}
2366
2367/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2368///
2369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2370#[inline]
2371#[target_feature(enable = "avx512bw,avx512vl")]
2372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2373#[cfg_attr(test, assert_instr(vpminuw))]
2374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2375pub const fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2376    unsafe {
2377        let min = _mm_min_epu16(a, b).as_u16x8();
2378        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2379    }
2380}
2381
2382/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2383///
2384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2385#[inline]
2386#[target_feature(enable = "avx512bw,avx512vl")]
2387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2388#[cfg_attr(test, assert_instr(vpminuw))]
2389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2390pub const fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2391    unsafe {
2392        let min = _mm_min_epu16(a, b).as_u16x8();
2393        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2394    }
2395}
2396
2397/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2398///
2399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2400#[inline]
2401#[target_feature(enable = "avx512bw")]
2402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2403#[cfg_attr(test, assert_instr(vpminub))]
2404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2405pub const fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2406    unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
2407}
2408
2409/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2410///
2411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2412#[inline]
2413#[target_feature(enable = "avx512bw")]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415#[cfg_attr(test, assert_instr(vpminub))]
2416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2417pub const fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2418    unsafe {
2419        let min = _mm512_min_epu8(a, b).as_u8x64();
2420        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2421    }
2422}
2423
2424/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2425///
2426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2427#[inline]
2428#[target_feature(enable = "avx512bw")]
2429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2430#[cfg_attr(test, assert_instr(vpminub))]
2431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2432pub const fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2433    unsafe {
2434        let min = _mm512_min_epu8(a, b).as_u8x64();
2435        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2436    }
2437}
2438
2439/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2440///
2441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2442#[inline]
2443#[target_feature(enable = "avx512bw,avx512vl")]
2444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2445#[cfg_attr(test, assert_instr(vpminub))]
2446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2447pub const fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2448    unsafe {
2449        let min = _mm256_min_epu8(a, b).as_u8x32();
2450        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2451    }
2452}
2453
2454/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2457#[inline]
2458#[target_feature(enable = "avx512bw,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vpminub))]
2461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2462pub const fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2463    unsafe {
2464        let min = _mm256_min_epu8(a, b).as_u8x32();
2465        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2466    }
2467}
2468
2469/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2470///
2471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2472#[inline]
2473#[target_feature(enable = "avx512bw,avx512vl")]
2474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2475#[cfg_attr(test, assert_instr(vpminub))]
2476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2477pub const fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2478    unsafe {
2479        let min = _mm_min_epu8(a, b).as_u8x16();
2480        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2481    }
2482}
2483
2484/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2485///
2486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2487#[inline]
2488#[target_feature(enable = "avx512bw,avx512vl")]
2489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2490#[cfg_attr(test, assert_instr(vpminub))]
2491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2492pub const fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2493    unsafe {
2494        let min = _mm_min_epu8(a, b).as_u8x16();
2495        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2496    }
2497}
2498
2499/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2500///
2501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2502#[inline]
2503#[target_feature(enable = "avx512bw")]
2504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2505#[cfg_attr(test, assert_instr(vpminsw))]
2506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2507pub const fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2508    unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2509}
2510
2511/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2512///
2513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2514#[inline]
2515#[target_feature(enable = "avx512bw")]
2516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2517#[cfg_attr(test, assert_instr(vpminsw))]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2520    unsafe {
2521        let min = _mm512_min_epi16(a, b).as_i16x32();
2522        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2523    }
2524}
2525
2526/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2527///
2528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2529#[inline]
2530#[target_feature(enable = "avx512bw")]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532#[cfg_attr(test, assert_instr(vpminsw))]
2533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2534pub const fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2535    unsafe {
2536        let min = _mm512_min_epi16(a, b).as_i16x32();
2537        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2538    }
2539}
2540
2541/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2544#[inline]
2545#[target_feature(enable = "avx512bw,avx512vl")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpminsw))]
2548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2549pub const fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2550    unsafe {
2551        let min = _mm256_min_epi16(a, b).as_i16x16();
2552        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2553    }
2554}
2555
2556/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2557///
2558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2559#[inline]
2560#[target_feature(enable = "avx512bw,avx512vl")]
2561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2562#[cfg_attr(test, assert_instr(vpminsw))]
2563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2564pub const fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2565    unsafe {
2566        let min = _mm256_min_epi16(a, b).as_i16x16();
2567        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2568    }
2569}
2570
2571/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2572///
2573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2574#[inline]
2575#[target_feature(enable = "avx512bw,avx512vl")]
2576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2577#[cfg_attr(test, assert_instr(vpminsw))]
2578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2579pub const fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2580    unsafe {
2581        let min = _mm_min_epi16(a, b).as_i16x8();
2582        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2583    }
2584}
2585
2586/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2587///
2588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2589#[inline]
2590#[target_feature(enable = "avx512bw,avx512vl")]
2591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2592#[cfg_attr(test, assert_instr(vpminsw))]
2593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2594pub const fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2595    unsafe {
2596        let min = _mm_min_epi16(a, b).as_i16x8();
2597        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2598    }
2599}
2600
2601/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2604#[inline]
2605#[target_feature(enable = "avx512bw")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vpminsb))]
2608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2609pub const fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2610    unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2611}
2612
2613/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2614///
2615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2616#[inline]
2617#[target_feature(enable = "avx512bw")]
2618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2619#[cfg_attr(test, assert_instr(vpminsb))]
2620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2621pub const fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2622    unsafe {
2623        let min = _mm512_min_epi8(a, b).as_i8x64();
2624        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2625    }
2626}
2627
2628/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2631#[inline]
2632#[target_feature(enable = "avx512bw")]
2633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2634#[cfg_attr(test, assert_instr(vpminsb))]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2637    unsafe {
2638        let min = _mm512_min_epi8(a, b).as_i8x64();
2639        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2640    }
2641}
2642
2643/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2646#[inline]
2647#[target_feature(enable = "avx512bw,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vpminsb))]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2652    unsafe {
2653        let min = _mm256_min_epi8(a, b).as_i8x32();
2654        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2655    }
2656}
2657
2658/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2659///
2660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2661#[inline]
2662#[target_feature(enable = "avx512bw,avx512vl")]
2663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2664#[cfg_attr(test, assert_instr(vpminsb))]
2665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2666pub const fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2667    unsafe {
2668        let min = _mm256_min_epi8(a, b).as_i8x32();
2669        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2670    }
2671}
2672
2673/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2676#[inline]
2677#[target_feature(enable = "avx512bw,avx512vl")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpminsb))]
2680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2681pub const fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2682    unsafe {
2683        let min = _mm_min_epi8(a, b).as_i8x16();
2684        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2685    }
2686}
2687
2688/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2689///
2690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2691#[inline]
2692#[target_feature(enable = "avx512bw,avx512vl")]
2693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2694#[cfg_attr(test, assert_instr(vpminsb))]
2695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2696pub const fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2697    unsafe {
2698        let min = _mm_min_epi8(a, b).as_i8x16();
2699        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2700    }
2701}
2702
2703/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2704///
2705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2706#[inline]
2707#[target_feature(enable = "avx512bw")]
2708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2709#[cfg_attr(test, assert_instr(vpcmp))]
2710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2711pub const fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2712    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2713}
2714
2715/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2716///
2717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2718#[inline]
2719#[target_feature(enable = "avx512bw")]
2720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2721#[cfg_attr(test, assert_instr(vpcmp))]
2722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2723pub const fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2724    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2725}
2726
2727/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2728///
2729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2730#[inline]
2731#[target_feature(enable = "avx512bw,avx512vl")]
2732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2733#[cfg_attr(test, assert_instr(vpcmp))]
2734#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2735pub const fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2736    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2737}
2738
2739/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2742#[inline]
2743#[target_feature(enable = "avx512bw,avx512vl")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2747pub const fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2748    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2749}
2750
2751/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2752///
2753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2754#[inline]
2755#[target_feature(enable = "avx512bw,avx512vl")]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757#[cfg_attr(test, assert_instr(vpcmp))]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2760    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2761}
2762
2763/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2766#[inline]
2767#[target_feature(enable = "avx512bw,avx512vl")]
2768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2769#[cfg_attr(test, assert_instr(vpcmp))]
2770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2771pub const fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2772    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2773}
2774
2775/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2776///
2777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2778#[inline]
2779#[target_feature(enable = "avx512bw")]
2780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2781#[cfg_attr(test, assert_instr(vpcmp))]
2782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2783pub const fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2784    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2785}
2786
2787/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2788///
2789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2790#[inline]
2791#[target_feature(enable = "avx512bw")]
2792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2793#[cfg_attr(test, assert_instr(vpcmp))]
2794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2795pub const fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2796    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2797}
2798
2799/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2800///
2801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2802#[inline]
2803#[target_feature(enable = "avx512bw,avx512vl")]
2804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2805#[cfg_attr(test, assert_instr(vpcmp))]
2806#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2807pub const fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2808    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2809}
2810
2811/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2812///
2813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2814#[inline]
2815#[target_feature(enable = "avx512bw,avx512vl")]
2816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2817#[cfg_attr(test, assert_instr(vpcmp))]
2818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2819pub const fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2820    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2821}
2822
2823/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2826#[inline]
2827#[target_feature(enable = "avx512bw,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpcmp))]
2830#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2831pub const fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2832    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2833}
2834
2835/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2836///
2837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2838#[inline]
2839#[target_feature(enable = "avx512bw,avx512vl")]
2840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2841#[cfg_attr(test, assert_instr(vpcmp))]
2842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2843pub const fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2844    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2845}
2846
2847/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2848///
2849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2850#[inline]
2851#[target_feature(enable = "avx512bw")]
2852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2853#[cfg_attr(test, assert_instr(vpcmp))]
2854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2855pub const fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2856    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2857}
2858
2859/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2862#[inline]
2863#[target_feature(enable = "avx512bw")]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865#[cfg_attr(test, assert_instr(vpcmp))]
2866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2867pub const fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2868    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2869}
2870
2871/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2874#[inline]
2875#[target_feature(enable = "avx512bw,avx512vl")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2879pub const fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2880    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2881}
2882
2883/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2884///
2885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2886#[inline]
2887#[target_feature(enable = "avx512bw,avx512vl")]
2888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2889#[cfg_attr(test, assert_instr(vpcmp))]
2890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2891pub const fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2892    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2893}
2894
2895/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2898#[inline]
2899#[target_feature(enable = "avx512bw,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpcmp))]
2902#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2903pub const fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2904    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2905}
2906
2907/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2908///
2909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2910#[inline]
2911#[target_feature(enable = "avx512bw,avx512vl")]
2912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2913#[cfg_attr(test, assert_instr(vpcmp))]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2916    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2917}
2918
2919/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2920///
2921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2922#[inline]
2923#[target_feature(enable = "avx512bw")]
2924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2925#[cfg_attr(test, assert_instr(vpcmp))]
2926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2927pub const fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2928    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2929}
2930
2931/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2932///
2933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2934#[inline]
2935#[target_feature(enable = "avx512bw")]
2936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2937#[cfg_attr(test, assert_instr(vpcmp))]
2938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2939pub const fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2940    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2941}
2942
2943/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2944///
2945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2946#[inline]
2947#[target_feature(enable = "avx512bw,avx512vl")]
2948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2949#[cfg_attr(test, assert_instr(vpcmp))]
2950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2951pub const fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2952    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2953}
2954
2955/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2956///
2957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2958#[inline]
2959#[target_feature(enable = "avx512bw,avx512vl")]
2960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2961#[cfg_attr(test, assert_instr(vpcmp))]
2962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2963pub const fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2964    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2965}
2966
2967/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2968///
2969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2970#[inline]
2971#[target_feature(enable = "avx512bw,avx512vl")]
2972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2973#[cfg_attr(test, assert_instr(vpcmp))]
2974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2975pub const fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2976    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2977}
2978
2979/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2980///
2981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2982#[inline]
2983#[target_feature(enable = "avx512bw,avx512vl")]
2984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2985#[cfg_attr(test, assert_instr(vpcmp))]
2986#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2987pub const fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2988    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2989}
2990
2991/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2992///
2993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2994#[inline]
2995#[target_feature(enable = "avx512bw")]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997#[cfg_attr(test, assert_instr(vpcmp))]
2998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2999pub const fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3000    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
3001}
3002
3003/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3011pub const fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3012    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3013}
3014
3015/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3016///
3017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
3018#[inline]
3019#[target_feature(enable = "avx512bw,avx512vl")]
3020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3021#[cfg_attr(test, assert_instr(vpcmp))]
3022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3023pub const fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3024    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
3025}
3026
3027/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3028///
3029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
3030#[inline]
3031#[target_feature(enable = "avx512bw,avx512vl")]
3032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3033#[cfg_attr(test, assert_instr(vpcmp))]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3036    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3037}
3038
3039/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
3042#[inline]
3043#[target_feature(enable = "avx512bw,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vpcmp))]
3046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3047pub const fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3048    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
3049}
3050
3051/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3052///
3053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
3054#[inline]
3055#[target_feature(enable = "avx512bw,avx512vl")]
3056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3057#[cfg_attr(test, assert_instr(vpcmp))]
3058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3059pub const fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3060    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3061}
3062
3063/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3064///
3065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
3066#[inline]
3067#[target_feature(enable = "avx512bw")]
3068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3069#[cfg_attr(test, assert_instr(vpcmp))]
3070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3071pub const fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3072    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
3073}
3074
3075/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3076///
3077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
3078#[inline]
3079#[target_feature(enable = "avx512bw")]
3080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3081#[cfg_attr(test, assert_instr(vpcmp))]
3082#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3083pub const fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3084    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3085}
3086
3087/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3088///
3089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
3090#[inline]
3091#[target_feature(enable = "avx512bw,avx512vl")]
3092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3093#[cfg_attr(test, assert_instr(vpcmp))]
3094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3095pub const fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3096    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
3097}
3098
3099/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3100///
3101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
3102#[inline]
3103#[target_feature(enable = "avx512bw,avx512vl")]
3104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3105#[cfg_attr(test, assert_instr(vpcmp))]
3106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3107pub const fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3108    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3109}
3110
3111/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3112///
3113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
3114#[inline]
3115#[target_feature(enable = "avx512bw,avx512vl")]
3116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3117#[cfg_attr(test, assert_instr(vpcmp))]
3118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3119pub const fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3120    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
3121}
3122
3123/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
3126#[inline]
3127#[target_feature(enable = "avx512bw,avx512vl")]
3128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3129#[cfg_attr(test, assert_instr(vpcmp))]
3130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3131pub const fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3132    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3133}
3134
3135/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3143pub const fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3144    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
3145}
3146
3147/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3148///
3149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
3150#[inline]
3151#[target_feature(enable = "avx512bw")]
3152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3153#[cfg_attr(test, assert_instr(vpcmp))]
3154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3155pub const fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3156    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3157}
3158
3159/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3160///
3161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
3162#[inline]
3163#[target_feature(enable = "avx512bw,avx512vl")]
3164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3165#[cfg_attr(test, assert_instr(vpcmp))]
3166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3167pub const fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3168    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
3169}
3170
3171/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3172///
3173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
3174#[inline]
3175#[target_feature(enable = "avx512bw,avx512vl")]
3176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3177#[cfg_attr(test, assert_instr(vpcmp))]
3178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3179pub const fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3180    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3181}
3182
3183/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3184///
3185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
3186#[inline]
3187#[target_feature(enable = "avx512bw,avx512vl")]
3188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3189#[cfg_attr(test, assert_instr(vpcmp))]
3190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3191pub const fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3192    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
3193}
3194
3195/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3196///
3197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
3198#[inline]
3199#[target_feature(enable = "avx512bw,avx512vl")]
3200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3201#[cfg_attr(test, assert_instr(vpcmp))]
3202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3203pub const fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3204    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3205}
3206
3207/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3208///
3209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
3210#[inline]
3211#[target_feature(enable = "avx512bw")]
3212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3213#[cfg_attr(test, assert_instr(vpcmp))]
3214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3215pub const fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3216    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3217}
3218
3219/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3222#[inline]
3223#[target_feature(enable = "avx512bw")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpcmp))]
3226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3227pub const fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3228    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3229}
3230
3231/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3232///
3233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3234#[inline]
3235#[target_feature(enable = "avx512bw,avx512vl")]
3236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3237#[cfg_attr(test, assert_instr(vpcmp))]
3238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3239pub const fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3240    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3241}
3242
3243/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3244///
3245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3246#[inline]
3247#[target_feature(enable = "avx512bw,avx512vl")]
3248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3249#[cfg_attr(test, assert_instr(vpcmp))]
3250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3251pub const fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3252    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3253}
3254
3255/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3256///
3257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3258#[inline]
3259#[target_feature(enable = "avx512bw,avx512vl")]
3260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3261#[cfg_attr(test, assert_instr(vpcmp))]
3262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3263pub const fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3264    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3270#[inline]
3271#[target_feature(enable = "avx512bw,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3275pub const fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3276    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3277}
3278
3279/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3280///
3281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3282#[inline]
3283#[target_feature(enable = "avx512bw")]
3284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3285#[cfg_attr(test, assert_instr(vpcmp))]
3286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3287pub const fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3288    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3289}
3290
3291/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3292///
3293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3294#[inline]
3295#[target_feature(enable = "avx512bw")]
3296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3297#[cfg_attr(test, assert_instr(vpcmp))]
3298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3299pub const fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3300    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3301}
3302
3303/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3304///
3305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3306#[inline]
3307#[target_feature(enable = "avx512bw,avx512vl")]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309#[cfg_attr(test, assert_instr(vpcmp))]
3310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3311pub const fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3312    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3313}
3314
3315/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3316///
3317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3318#[inline]
3319#[target_feature(enable = "avx512bw,avx512vl")]
3320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3321#[cfg_attr(test, assert_instr(vpcmp))]
3322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3323pub const fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3324    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3325}
3326
3327/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3328///
3329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3330#[inline]
3331#[target_feature(enable = "avx512bw,avx512vl")]
3332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3333#[cfg_attr(test, assert_instr(vpcmp))]
3334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3335pub const fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3336    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3337}
3338
3339/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3340///
3341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3342#[inline]
3343#[target_feature(enable = "avx512bw,avx512vl")]
3344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3345#[cfg_attr(test, assert_instr(vpcmp))]
3346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3347pub const fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3348    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3349}
3350
3351/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3352///
3353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3354#[inline]
3355#[target_feature(enable = "avx512bw")]
3356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3357#[cfg_attr(test, assert_instr(vpcmp))]
3358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3359pub const fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3360    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3361}
3362
3363/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3364///
3365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3366#[inline]
3367#[target_feature(enable = "avx512bw")]
3368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3369#[cfg_attr(test, assert_instr(vpcmp))]
3370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3371pub const fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3372    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3373}
3374
3375/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3376///
3377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3378#[inline]
3379#[target_feature(enable = "avx512bw,avx512vl")]
3380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3381#[cfg_attr(test, assert_instr(vpcmp))]
3382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3383pub const fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3384    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3385}
3386
3387/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3388///
3389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3390#[inline]
3391#[target_feature(enable = "avx512bw,avx512vl")]
3392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3393#[cfg_attr(test, assert_instr(vpcmp))]
3394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3395pub const fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3396    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3402#[inline]
3403#[target_feature(enable = "avx512bw,avx512vl")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3407pub const fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3408    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3409}
3410
3411/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3412///
3413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3414#[inline]
3415#[target_feature(enable = "avx512bw,avx512vl")]
3416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3417#[cfg_attr(test, assert_instr(vpcmp))]
3418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3419pub const fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3420    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3421}
3422
3423/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3426#[inline]
3427#[target_feature(enable = "avx512bw")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vpcmp))]
3430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3431pub const fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3432    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3433}
3434
3435/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3436///
3437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3438#[inline]
3439#[target_feature(enable = "avx512bw")]
3440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3441#[cfg_attr(test, assert_instr(vpcmp))]
3442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3443pub const fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3444    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3445}
3446
3447/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3448///
3449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3450#[inline]
3451#[target_feature(enable = "avx512bw,avx512vl")]
3452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3453#[cfg_attr(test, assert_instr(vpcmp))]
3454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3455pub const fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3456    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3457}
3458
3459/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3460///
3461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3462#[inline]
3463#[target_feature(enable = "avx512bw,avx512vl")]
3464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3465#[cfg_attr(test, assert_instr(vpcmp))]
3466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3467pub const fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3468    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3469}
3470
3471/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3472///
3473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3474#[inline]
3475#[target_feature(enable = "avx512bw,avx512vl")]
3476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3477#[cfg_attr(test, assert_instr(vpcmp))]
3478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3479pub const fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3480    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3481}
3482
3483/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3484///
3485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3486#[inline]
3487#[target_feature(enable = "avx512bw,avx512vl")]
3488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3489#[cfg_attr(test, assert_instr(vpcmp))]
3490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3491pub const fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3492    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3493}
3494
3495/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3496///
3497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3498#[inline]
3499#[target_feature(enable = "avx512bw")]
3500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3501#[cfg_attr(test, assert_instr(vpcmp))]
3502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3503pub const fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3504    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3505}
3506
3507/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3508///
3509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3510#[inline]
3511#[target_feature(enable = "avx512bw")]
3512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3513#[cfg_attr(test, assert_instr(vpcmp))]
3514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3515pub const fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3516    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3517}
3518
3519/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3520///
3521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3522#[inline]
3523#[target_feature(enable = "avx512bw,avx512vl")]
3524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3525#[cfg_attr(test, assert_instr(vpcmp))]
3526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3527pub const fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3528    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3534#[inline]
3535#[target_feature(enable = "avx512bw,avx512vl")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3539pub const fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3540    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3541}
3542
3543/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3544///
3545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3546#[inline]
3547#[target_feature(enable = "avx512bw,avx512vl")]
3548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3549#[cfg_attr(test, assert_instr(vpcmp))]
3550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3551pub const fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3552    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3553}
3554
3555/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3558#[inline]
3559#[target_feature(enable = "avx512bw,avx512vl")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vpcmp))]
3562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3563pub const fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3564    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3565}
3566
3567/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3568///
3569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3570#[inline]
3571#[target_feature(enable = "avx512bw")]
3572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3573#[cfg_attr(test, assert_instr(vpcmp))]
3574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3575pub const fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3576    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3577}
3578
3579/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3580///
3581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3582#[inline]
3583#[target_feature(enable = "avx512bw")]
3584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3585#[cfg_attr(test, assert_instr(vpcmp))]
3586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3587pub const fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3588    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3589}
3590
3591/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3592///
3593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3594#[inline]
3595#[target_feature(enable = "avx512bw,avx512vl")]
3596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3597#[cfg_attr(test, assert_instr(vpcmp))]
3598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3599pub const fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3600    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3601}
3602
3603/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3604///
3605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3606#[inline]
3607#[target_feature(enable = "avx512bw,avx512vl")]
3608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3609#[cfg_attr(test, assert_instr(vpcmp))]
3610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3611pub const fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3612    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3613}
3614
3615/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3616///
3617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3618#[inline]
3619#[target_feature(enable = "avx512bw,avx512vl")]
3620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3621#[cfg_attr(test, assert_instr(vpcmp))]
3622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3623pub const fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3624    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3625}
3626
3627/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3628///
3629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3630#[inline]
3631#[target_feature(enable = "avx512bw,avx512vl")]
3632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3633#[cfg_attr(test, assert_instr(vpcmp))]
3634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3635pub const fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3636    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3637}
3638
3639/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3642#[inline]
3643#[target_feature(enable = "avx512bw")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vpcmp))]
3646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3647pub const fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3648    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3649}
3650
3651/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3652///
3653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3654#[inline]
3655#[target_feature(enable = "avx512bw")]
3656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3657#[cfg_attr(test, assert_instr(vpcmp))]
3658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3659pub const fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3660    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3666#[inline]
3667#[target_feature(enable = "avx512bw,avx512vl")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3671pub const fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3672    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3673}
3674
3675/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3676///
3677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3678#[inline]
3679#[target_feature(enable = "avx512bw,avx512vl")]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681#[cfg_attr(test, assert_instr(vpcmp))]
3682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3683pub const fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3684    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3685}
3686
3687/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3690#[inline]
3691#[target_feature(enable = "avx512bw,avx512vl")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vpcmp))]
3694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3695pub const fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3696    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3697}
3698
3699/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3700///
3701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3702#[inline]
3703#[target_feature(enable = "avx512bw,avx512vl")]
3704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3705#[cfg_attr(test, assert_instr(vpcmp))]
3706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3707pub const fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3708    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3709}
3710
3711/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3712///
3713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3714#[inline]
3715#[target_feature(enable = "avx512bw")]
3716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3717#[cfg_attr(test, assert_instr(vpcmp))]
3718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3719pub const fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3720    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3721}
3722
3723/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3724///
3725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3726#[inline]
3727#[target_feature(enable = "avx512bw")]
3728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3729#[cfg_attr(test, assert_instr(vpcmp))]
3730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3731pub const fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3732    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3733}
3734
3735/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3736///
3737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3738#[inline]
3739#[target_feature(enable = "avx512bw,avx512vl")]
3740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3741#[cfg_attr(test, assert_instr(vpcmp))]
3742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3743pub const fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3744    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3745}
3746
3747/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3748///
3749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3750#[inline]
3751#[target_feature(enable = "avx512bw,avx512vl")]
3752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3753#[cfg_attr(test, assert_instr(vpcmp))]
3754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3755pub const fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3756    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3757}
3758
3759/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3760///
3761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3762#[inline]
3763#[target_feature(enable = "avx512bw,avx512vl")]
3764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3765#[cfg_attr(test, assert_instr(vpcmp))]
3766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3767pub const fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3768    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3769}
3770
3771/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3772///
3773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3774#[inline]
3775#[target_feature(enable = "avx512bw,avx512vl")]
3776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3777#[cfg_attr(test, assert_instr(vpcmp))]
3778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3779pub const fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3780    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3781}
3782
3783/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3784///
3785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3786#[inline]
3787#[target_feature(enable = "avx512bw")]
3788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3789#[cfg_attr(test, assert_instr(vpcmp))]
3790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3791pub const fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3803pub const fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3804    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3805}
3806
3807/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3808///
3809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3810#[inline]
3811#[target_feature(enable = "avx512bw,avx512vl")]
3812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3813#[cfg_attr(test, assert_instr(vpcmp))]
3814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3815pub const fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3816    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3817}
3818
3819/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3822#[inline]
3823#[target_feature(enable = "avx512bw,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vpcmp))]
3826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3827pub const fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3828    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3829}
3830
3831/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3832///
3833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3834#[inline]
3835#[target_feature(enable = "avx512bw,avx512vl")]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837#[cfg_attr(test, assert_instr(vpcmp))]
3838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3839pub const fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3840    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3841}
3842
3843/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3844///
3845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3846#[inline]
3847#[target_feature(enable = "avx512bw,avx512vl")]
3848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3849#[cfg_attr(test, assert_instr(vpcmp))]
3850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3851pub const fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3852    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3853}
3854
3855/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3856///
3857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3858#[inline]
3859#[target_feature(enable = "avx512bw")]
3860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3861#[cfg_attr(test, assert_instr(vpcmp))]
3862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3863pub const fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3864    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3865}
3866
3867/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3868///
3869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3870#[inline]
3871#[target_feature(enable = "avx512bw")]
3872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3873#[cfg_attr(test, assert_instr(vpcmp))]
3874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3875pub const fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3876    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3877}
3878
3879/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3880///
3881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3882#[inline]
3883#[target_feature(enable = "avx512bw,avx512vl")]
3884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3885#[cfg_attr(test, assert_instr(vpcmp))]
3886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3887pub const fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3888    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3889}
3890
3891/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3892///
3893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3894#[inline]
3895#[target_feature(enable = "avx512bw,avx512vl")]
3896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3897#[cfg_attr(test, assert_instr(vpcmp))]
3898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3899pub const fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3900    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3901}
3902
3903/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3904///
3905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3906#[inline]
3907#[target_feature(enable = "avx512bw,avx512vl")]
3908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3909#[cfg_attr(test, assert_instr(vpcmp))]
3910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3911pub const fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3912    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3913}
3914
3915/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3916///
3917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3918#[inline]
3919#[target_feature(enable = "avx512bw,avx512vl")]
3920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3921#[cfg_attr(test, assert_instr(vpcmp))]
3922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3923pub const fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3924    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3935pub const fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3936    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3937}
3938
3939/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3940///
3941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3942#[inline]
3943#[target_feature(enable = "avx512bw")]
3944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3945#[cfg_attr(test, assert_instr(vpcmp))]
3946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3947pub const fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3948    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3949}
3950
3951/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3954#[inline]
3955#[target_feature(enable = "avx512bw,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vpcmp))]
3958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3959pub const fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3960    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3961}
3962
3963/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3964///
3965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3966#[inline]
3967#[target_feature(enable = "avx512bw,avx512vl")]
3968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3969#[cfg_attr(test, assert_instr(vpcmp))]
3970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3971pub const fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3972    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3973}
3974
3975/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3976///
3977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3978#[inline]
3979#[target_feature(enable = "avx512bw,avx512vl")]
3980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3981#[cfg_attr(test, assert_instr(vpcmp))]
3982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3983pub const fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3984    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3985}
3986
3987/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3988///
3989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3990#[inline]
3991#[target_feature(enable = "avx512bw,avx512vl")]
3992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3993#[cfg_attr(test, assert_instr(vpcmp))]
3994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3995pub const fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3996    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3997}
3998
3999/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4000///
4001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
4002#[inline]
4003#[target_feature(enable = "avx512bw")]
4004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4005#[cfg_attr(test, assert_instr(vpcmp))]
4006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4007pub const fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4008    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
4009}
4010
4011/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4012///
4013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
4014#[inline]
4015#[target_feature(enable = "avx512bw")]
4016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4017#[cfg_attr(test, assert_instr(vpcmp))]
4018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4019pub const fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4020    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4021}
4022
4023/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4024///
4025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
4026#[inline]
4027#[target_feature(enable = "avx512bw,avx512vl")]
4028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4029#[cfg_attr(test, assert_instr(vpcmp))]
4030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4031pub const fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4032    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
4033}
4034
4035/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4036///
4037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
4038#[inline]
4039#[target_feature(enable = "avx512bw,avx512vl")]
4040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4041#[cfg_attr(test, assert_instr(vpcmp))]
4042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4043pub const fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4044    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4045}
4046
4047/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
4050#[inline]
4051#[target_feature(enable = "avx512bw,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vpcmp))]
4054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4055pub const fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4056    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
4057}
4058
4059/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
4062#[inline]
4063#[target_feature(enable = "avx512bw,avx512vl")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4067pub const fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4068    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4069}
4070
4071/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4072///
4073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
4074#[inline]
4075#[target_feature(enable = "avx512bw")]
4076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4077#[cfg_attr(test, assert_instr(vpcmp))]
4078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4079pub const fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4080    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
4081}
4082
4083/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4084///
4085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
4086#[inline]
4087#[target_feature(enable = "avx512bw")]
4088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4089#[cfg_attr(test, assert_instr(vpcmp))]
4090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4091pub const fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4092    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4093}
4094
4095/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4096///
4097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
4098#[inline]
4099#[target_feature(enable = "avx512bw,avx512vl")]
4100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4101#[cfg_attr(test, assert_instr(vpcmp))]
4102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4103pub const fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4104    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
4105}
4106
4107/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4108///
4109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
4110#[inline]
4111#[target_feature(enable = "avx512bw,avx512vl")]
4112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4113#[cfg_attr(test, assert_instr(vpcmp))]
4114#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4115pub const fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4116    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4117}
4118
4119/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4120///
4121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
4122#[inline]
4123#[target_feature(enable = "avx512bw,avx512vl")]
4124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4125#[cfg_attr(test, assert_instr(vpcmp))]
4126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4127pub const fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4128    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
4129}
4130
4131/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4132///
4133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
4134#[inline]
4135#[target_feature(enable = "avx512bw,avx512vl")]
4136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4137#[cfg_attr(test, assert_instr(vpcmp))]
4138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4139pub const fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4140    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4141}
4142
4143/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4144///
4145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
4146#[inline]
4147#[target_feature(enable = "avx512bw")]
4148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4149#[cfg_attr(test, assert_instr(vpcmp))]
4150#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4151pub const fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4152    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
4153}
4154
4155/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4156///
4157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
4158#[inline]
4159#[target_feature(enable = "avx512bw")]
4160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4161#[cfg_attr(test, assert_instr(vpcmp))]
4162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4163pub const fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4164    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4165}
4166
4167/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4168///
4169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
4170#[inline]
4171#[target_feature(enable = "avx512bw,avx512vl")]
4172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4173#[cfg_attr(test, assert_instr(vpcmp))]
4174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4175pub const fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4176    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
4177}
4178
4179/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4180///
4181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
4182#[inline]
4183#[target_feature(enable = "avx512bw,avx512vl")]
4184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4185#[cfg_attr(test, assert_instr(vpcmp))]
4186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4187pub const fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4188    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4189}
4190
4191/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4192///
4193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
4194#[inline]
4195#[target_feature(enable = "avx512bw,avx512vl")]
4196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4197#[cfg_attr(test, assert_instr(vpcmp))]
4198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4199pub const fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4200    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
4201}
4202
4203/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4204///
4205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
4206#[inline]
4207#[target_feature(enable = "avx512bw,avx512vl")]
4208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4209#[cfg_attr(test, assert_instr(vpcmp))]
4210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4211pub const fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4212    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4213}
4214
4215/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4216///
4217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
4218#[inline]
4219#[target_feature(enable = "avx512bw")]
4220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4221#[cfg_attr(test, assert_instr(vpcmp))]
4222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4223pub const fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4224    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
4225}
4226
4227/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4228///
4229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
4230#[inline]
4231#[target_feature(enable = "avx512bw")]
4232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4233#[cfg_attr(test, assert_instr(vpcmp))]
4234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4235pub const fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4236    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4237}
4238
4239/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4240///
4241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
4242#[inline]
4243#[target_feature(enable = "avx512bw,avx512vl")]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245#[cfg_attr(test, assert_instr(vpcmp))]
4246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4247pub const fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4248    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
4249}
4250
4251/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4252///
4253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
4254#[inline]
4255#[target_feature(enable = "avx512bw,avx512vl")]
4256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4257#[cfg_attr(test, assert_instr(vpcmp))]
4258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4259pub const fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4260    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4261}
4262
4263/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4264///
4265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
4266#[inline]
4267#[target_feature(enable = "avx512bw,avx512vl")]
4268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4269#[cfg_attr(test, assert_instr(vpcmp))]
4270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4271pub const fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4272    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
4273}
4274
4275/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4276///
4277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
4278#[inline]
4279#[target_feature(enable = "avx512bw,avx512vl")]
4280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4281#[cfg_attr(test, assert_instr(vpcmp))]
4282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4283pub const fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4284    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4285}
4286
4287/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[cfg_attr(test, assert_instr(vpcmp))]
4294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4295pub const fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4296    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
4297}
4298
4299/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4300///
4301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
4302#[inline]
4303#[target_feature(enable = "avx512bw")]
4304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4305#[cfg_attr(test, assert_instr(vpcmp))]
4306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4307pub const fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4308    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4309}
4310
4311/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4312///
4313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4314#[inline]
4315#[target_feature(enable = "avx512bw,avx512vl")]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317#[cfg_attr(test, assert_instr(vpcmp))]
4318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4319pub const fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4320    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4321}
4322
4323/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4326#[inline]
4327#[target_feature(enable = "avx512bw,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vpcmp))]
4330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4331pub const fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4332    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4333}
4334
4335/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4336///
4337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4338#[inline]
4339#[target_feature(enable = "avx512bw,avx512vl")]
4340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4341#[cfg_attr(test, assert_instr(vpcmp))]
4342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4343pub const fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4344    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4345}
4346
4347/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4348///
4349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4350#[inline]
4351#[target_feature(enable = "avx512bw,avx512vl")]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353#[cfg_attr(test, assert_instr(vpcmp))]
4354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4355pub const fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4356    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4357}
4358
4359/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4360///
4361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4362#[inline]
4363#[target_feature(enable = "avx512bw")]
4364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4365#[cfg_attr(test, assert_instr(vpcmp))]
4366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4367pub const fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4368    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4369}
4370
4371/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4374#[inline]
4375#[target_feature(enable = "avx512bw")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vpcmp))]
4378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4379pub const fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4380    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4381}
4382
4383/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4384///
4385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4386#[inline]
4387#[target_feature(enable = "avx512bw,avx512vl")]
4388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4389#[cfg_attr(test, assert_instr(vpcmp))]
4390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4391pub const fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4392    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4393}
4394
4395/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4396///
4397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4398#[inline]
4399#[target_feature(enable = "avx512bw,avx512vl")]
4400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4401#[cfg_attr(test, assert_instr(vpcmp))]
4402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4403pub const fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4404    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4405}
4406
4407/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4408///
4409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4410#[inline]
4411#[target_feature(enable = "avx512bw,avx512vl")]
4412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4413#[cfg_attr(test, assert_instr(vpcmp))]
4414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4415pub const fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4416    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4417}
4418
4419/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4420///
4421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4422#[inline]
4423#[target_feature(enable = "avx512bw,avx512vl")]
4424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4425#[cfg_attr(test, assert_instr(vpcmp))]
4426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4427pub const fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4428    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4429}
4430
4431/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4432///
4433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4434#[inline]
4435#[target_feature(enable = "avx512bw")]
4436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4437#[rustc_legacy_const_generics(2)]
4438#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4440pub const fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u16x32();
4444        let b = b.as_u16x32();
4445        let r = match IMM8 {
4446            0 => simd_eq(a, b),
4447            1 => simd_lt(a, b),
4448            2 => simd_le(a, b),
4449            3 => i16x32::ZERO,
4450            4 => simd_ne(a, b),
4451            5 => simd_ge(a, b),
4452            6 => simd_gt(a, b),
4453            _ => i16x32::splat(-1),
4454        };
4455        simd_bitmask(r)
4456    }
4457}
4458
4459/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4462#[inline]
4463#[target_feature(enable = "avx512bw")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[rustc_legacy_const_generics(3)]
4466#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4468pub const fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4469    k1: __mmask32,
4470    a: __m512i,
4471    b: __m512i,
4472) -> __mmask32 {
4473    unsafe {
4474        static_assert_uimm_bits!(IMM8, 3);
4475        let a = a.as_u16x32();
4476        let b = b.as_u16x32();
4477        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4478        let r = match IMM8 {
4479            0 => simd_and(k1, simd_eq(a, b)),
4480            1 => simd_and(k1, simd_lt(a, b)),
4481            2 => simd_and(k1, simd_le(a, b)),
4482            3 => i16x32::ZERO,
4483            4 => simd_and(k1, simd_ne(a, b)),
4484            5 => simd_and(k1, simd_ge(a, b)),
4485            6 => simd_and(k1, simd_gt(a, b)),
4486            _ => k1,
4487        };
4488        simd_bitmask(r)
4489    }
4490}
4491
4492/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4495#[inline]
4496#[target_feature(enable = "avx512bw,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[rustc_legacy_const_generics(2)]
4499#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4502    unsafe {
4503        static_assert_uimm_bits!(IMM8, 3);
4504        let a = a.as_u16x16();
4505        let b = b.as_u16x16();
4506        let r = match IMM8 {
4507            0 => simd_eq(a, b),
4508            1 => simd_lt(a, b),
4509            2 => simd_le(a, b),
4510            3 => i16x16::ZERO,
4511            4 => simd_ne(a, b),
4512            5 => simd_ge(a, b),
4513            6 => simd_gt(a, b),
4514            _ => i16x16::splat(-1),
4515        };
4516        simd_bitmask(r)
4517    }
4518}
4519
4520/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4521///
4522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4523#[inline]
4524#[target_feature(enable = "avx512bw,avx512vl")]
4525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4526#[rustc_legacy_const_generics(3)]
4527#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4529pub const fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4530    k1: __mmask16,
4531    a: __m256i,
4532    b: __m256i,
4533) -> __mmask16 {
4534    unsafe {
4535        static_assert_uimm_bits!(IMM8, 3);
4536        let a = a.as_u16x16();
4537        let b = b.as_u16x16();
4538        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4539        let r = match IMM8 {
4540            0 => simd_and(k1, simd_eq(a, b)),
4541            1 => simd_and(k1, simd_lt(a, b)),
4542            2 => simd_and(k1, simd_le(a, b)),
4543            3 => i16x16::ZERO,
4544            4 => simd_and(k1, simd_ne(a, b)),
4545            5 => simd_and(k1, simd_ge(a, b)),
4546            6 => simd_and(k1, simd_gt(a, b)),
4547            _ => k1,
4548        };
4549        simd_bitmask(r)
4550    }
4551}
4552
4553/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4556#[inline]
4557#[target_feature(enable = "avx512bw,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[rustc_legacy_const_generics(2)]
4560#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4562pub const fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4563    unsafe {
4564        static_assert_uimm_bits!(IMM8, 3);
4565        let a = a.as_u16x8();
4566        let b = b.as_u16x8();
4567        let r = match IMM8 {
4568            0 => simd_eq(a, b),
4569            1 => simd_lt(a, b),
4570            2 => simd_le(a, b),
4571            3 => i16x8::ZERO,
4572            4 => simd_ne(a, b),
4573            5 => simd_ge(a, b),
4574            6 => simd_gt(a, b),
4575            _ => i16x8::splat(-1),
4576        };
4577        simd_bitmask(r)
4578    }
4579}
4580
4581/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4584#[inline]
4585#[target_feature(enable = "avx512bw,avx512vl")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[rustc_legacy_const_generics(3)]
4588#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4590pub const fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
4591    k1: __mmask8,
4592    a: __m128i,
4593    b: __m128i,
4594) -> __mmask8 {
4595    unsafe {
4596        static_assert_uimm_bits!(IMM8, 3);
4597        let a = a.as_u16x8();
4598        let b = b.as_u16x8();
4599        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4600        let r = match IMM8 {
4601            0 => simd_and(k1, simd_eq(a, b)),
4602            1 => simd_and(k1, simd_lt(a, b)),
4603            2 => simd_and(k1, simd_le(a, b)),
4604            3 => i16x8::ZERO,
4605            4 => simd_and(k1, simd_ne(a, b)),
4606            5 => simd_and(k1, simd_ge(a, b)),
4607            6 => simd_and(k1, simd_gt(a, b)),
4608            _ => k1,
4609        };
4610        simd_bitmask(r)
4611    }
4612}
4613
4614/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4615///
4616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4617#[inline]
4618#[target_feature(enable = "avx512bw")]
4619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4620#[rustc_legacy_const_generics(2)]
4621#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4623pub const fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4624    unsafe {
4625        static_assert_uimm_bits!(IMM8, 3);
4626        let a = a.as_u8x64();
4627        let b = b.as_u8x64();
4628        let r = match IMM8 {
4629            0 => simd_eq(a, b),
4630            1 => simd_lt(a, b),
4631            2 => simd_le(a, b),
4632            3 => i8x64::ZERO,
4633            4 => simd_ne(a, b),
4634            5 => simd_ge(a, b),
4635            6 => simd_gt(a, b),
4636            _ => i8x64::splat(-1),
4637        };
4638        simd_bitmask(r)
4639    }
4640}
4641
4642/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4643///
4644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4645#[inline]
4646#[target_feature(enable = "avx512bw")]
4647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4648#[rustc_legacy_const_generics(3)]
4649#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4651pub const fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4652    k1: __mmask64,
4653    a: __m512i,
4654    b: __m512i,
4655) -> __mmask64 {
4656    unsafe {
4657        static_assert_uimm_bits!(IMM8, 3);
4658        let a = a.as_u8x64();
4659        let b = b.as_u8x64();
4660        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4661        let r = match IMM8 {
4662            0 => simd_and(k1, simd_eq(a, b)),
4663            1 => simd_and(k1, simd_lt(a, b)),
4664            2 => simd_and(k1, simd_le(a, b)),
4665            3 => i8x64::ZERO,
4666            4 => simd_and(k1, simd_ne(a, b)),
4667            5 => simd_and(k1, simd_ge(a, b)),
4668            6 => simd_and(k1, simd_gt(a, b)),
4669            _ => k1,
4670        };
4671        simd_bitmask(r)
4672    }
4673}
4674
4675/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4676///
4677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4678#[inline]
4679#[target_feature(enable = "avx512bw,avx512vl")]
4680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4681#[rustc_legacy_const_generics(2)]
4682#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4684pub const fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4685    unsafe {
4686        static_assert_uimm_bits!(IMM8, 3);
4687        let a = a.as_u8x32();
4688        let b = b.as_u8x32();
4689        let r = match IMM8 {
4690            0 => simd_eq(a, b),
4691            1 => simd_lt(a, b),
4692            2 => simd_le(a, b),
4693            3 => i8x32::ZERO,
4694            4 => simd_ne(a, b),
4695            5 => simd_ge(a, b),
4696            6 => simd_gt(a, b),
4697            _ => i8x32::splat(-1),
4698        };
4699        simd_bitmask(r)
4700    }
4701}
4702
4703/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4704///
4705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4706#[inline]
4707#[target_feature(enable = "avx512bw,avx512vl")]
4708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4709#[rustc_legacy_const_generics(3)]
4710#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4712pub const fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4713    k1: __mmask32,
4714    a: __m256i,
4715    b: __m256i,
4716) -> __mmask32 {
4717    unsafe {
4718        static_assert_uimm_bits!(IMM8, 3);
4719        let a = a.as_u8x32();
4720        let b = b.as_u8x32();
4721        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4722        let r = match IMM8 {
4723            0 => simd_and(k1, simd_eq(a, b)),
4724            1 => simd_and(k1, simd_lt(a, b)),
4725            2 => simd_and(k1, simd_le(a, b)),
4726            3 => i8x32::ZERO,
4727            4 => simd_and(k1, simd_ne(a, b)),
4728            5 => simd_and(k1, simd_ge(a, b)),
4729            6 => simd_and(k1, simd_gt(a, b)),
4730            _ => k1,
4731        };
4732        simd_bitmask(r)
4733    }
4734}
4735
4736/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4737///
4738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4739#[inline]
4740#[target_feature(enable = "avx512bw,avx512vl")]
4741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4742#[rustc_legacy_const_generics(2)]
4743#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4746    unsafe {
4747        static_assert_uimm_bits!(IMM8, 3);
4748        let a = a.as_u8x16();
4749        let b = b.as_u8x16();
4750        let r = match IMM8 {
4751            0 => simd_eq(a, b),
4752            1 => simd_lt(a, b),
4753            2 => simd_le(a, b),
4754            3 => i8x16::ZERO,
4755            4 => simd_ne(a, b),
4756            5 => simd_ge(a, b),
4757            6 => simd_gt(a, b),
4758            _ => i8x16::splat(-1),
4759        };
4760        simd_bitmask(r)
4761    }
4762}
4763
4764/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4765///
4766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4767#[inline]
4768#[target_feature(enable = "avx512bw,avx512vl")]
4769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4770#[rustc_legacy_const_generics(3)]
4771#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4773pub const fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
4774    k1: __mmask16,
4775    a: __m128i,
4776    b: __m128i,
4777) -> __mmask16 {
4778    unsafe {
4779        static_assert_uimm_bits!(IMM8, 3);
4780        let a = a.as_u8x16();
4781        let b = b.as_u8x16();
4782        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4783        let r = match IMM8 {
4784            0 => simd_and(k1, simd_eq(a, b)),
4785            1 => simd_and(k1, simd_lt(a, b)),
4786            2 => simd_and(k1, simd_le(a, b)),
4787            3 => i8x16::ZERO,
4788            4 => simd_and(k1, simd_ne(a, b)),
4789            5 => simd_and(k1, simd_ge(a, b)),
4790            6 => simd_and(k1, simd_gt(a, b)),
4791            _ => k1,
4792        };
4793        simd_bitmask(r)
4794    }
4795}
4796
4797/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4800#[inline]
4801#[target_feature(enable = "avx512bw")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[rustc_legacy_const_generics(2)]
4804#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4806pub const fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4807    unsafe {
4808        static_assert_uimm_bits!(IMM8, 3);
4809        let a = a.as_i16x32();
4810        let b = b.as_i16x32();
4811        let r = match IMM8 {
4812            0 => simd_eq(a, b),
4813            1 => simd_lt(a, b),
4814            2 => simd_le(a, b),
4815            3 => i16x32::ZERO,
4816            4 => simd_ne(a, b),
4817            5 => simd_ge(a, b),
4818            6 => simd_gt(a, b),
4819            _ => i16x32::splat(-1),
4820        };
4821        simd_bitmask(r)
4822    }
4823}
4824
4825/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4826///
4827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4828#[inline]
4829#[target_feature(enable = "avx512bw")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_legacy_const_generics(3)]
4832#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4834pub const fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4835    k1: __mmask32,
4836    a: __m512i,
4837    b: __m512i,
4838) -> __mmask32 {
4839    unsafe {
4840        static_assert_uimm_bits!(IMM8, 3);
4841        let a = a.as_i16x32();
4842        let b = b.as_i16x32();
4843        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4844        let r = match IMM8 {
4845            0 => simd_and(k1, simd_eq(a, b)),
4846            1 => simd_and(k1, simd_lt(a, b)),
4847            2 => simd_and(k1, simd_le(a, b)),
4848            3 => i16x32::ZERO,
4849            4 => simd_and(k1, simd_ne(a, b)),
4850            5 => simd_and(k1, simd_ge(a, b)),
4851            6 => simd_and(k1, simd_gt(a, b)),
4852            _ => k1,
4853        };
4854        simd_bitmask(r)
4855    }
4856}
4857
4858/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4859///
4860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4861#[inline]
4862#[target_feature(enable = "avx512bw,avx512vl")]
4863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4864#[rustc_legacy_const_generics(2)]
4865#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4867pub const fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4868    unsafe {
4869        static_assert_uimm_bits!(IMM8, 3);
4870        let a = a.as_i16x16();
4871        let b = b.as_i16x16();
4872        let r = match IMM8 {
4873            0 => simd_eq(a, b),
4874            1 => simd_lt(a, b),
4875            2 => simd_le(a, b),
4876            3 => i16x16::ZERO,
4877            4 => simd_ne(a, b),
4878            5 => simd_ge(a, b),
4879            6 => simd_gt(a, b),
4880            _ => i16x16::splat(-1),
4881        };
4882        simd_bitmask(r)
4883    }
4884}
4885
4886/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892#[rustc_legacy_const_generics(3)]
4893#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4895pub const fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4896    k1: __mmask16,
4897    a: __m256i,
4898    b: __m256i,
4899) -> __mmask16 {
4900    unsafe {
4901        static_assert_uimm_bits!(IMM8, 3);
4902        let a = a.as_i16x16();
4903        let b = b.as_i16x16();
4904        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4905        let r = match IMM8 {
4906            0 => simd_and(k1, simd_eq(a, b)),
4907            1 => simd_and(k1, simd_lt(a, b)),
4908            2 => simd_and(k1, simd_le(a, b)),
4909            3 => i16x16::ZERO,
4910            4 => simd_and(k1, simd_ne(a, b)),
4911            5 => simd_and(k1, simd_ge(a, b)),
4912            6 => simd_and(k1, simd_gt(a, b)),
4913            _ => k1,
4914        };
4915        simd_bitmask(r)
4916    }
4917}
4918
4919/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4920///
4921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4922#[inline]
4923#[target_feature(enable = "avx512bw,avx512vl")]
4924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4925#[rustc_legacy_const_generics(2)]
4926#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4929    unsafe {
4930        static_assert_uimm_bits!(IMM8, 3);
4931        let a = a.as_i16x8();
4932        let b = b.as_i16x8();
4933        let r = match IMM8 {
4934            0 => simd_eq(a, b),
4935            1 => simd_lt(a, b),
4936            2 => simd_le(a, b),
4937            3 => i16x8::ZERO,
4938            4 => simd_ne(a, b),
4939            5 => simd_ge(a, b),
4940            6 => simd_gt(a, b),
4941            _ => i16x8::splat(-1),
4942        };
4943        simd_bitmask(r)
4944    }
4945}
4946
4947/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4948///
4949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4950#[inline]
4951#[target_feature(enable = "avx512bw,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_legacy_const_generics(3)]
4954#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4956pub const fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
4957    k1: __mmask8,
4958    a: __m128i,
4959    b: __m128i,
4960) -> __mmask8 {
4961    unsafe {
4962        static_assert_uimm_bits!(IMM8, 3);
4963        let a = a.as_i16x8();
4964        let b = b.as_i16x8();
4965        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4966        let r = match IMM8 {
4967            0 => simd_and(k1, simd_eq(a, b)),
4968            1 => simd_and(k1, simd_lt(a, b)),
4969            2 => simd_and(k1, simd_le(a, b)),
4970            3 => i16x8::ZERO,
4971            4 => simd_and(k1, simd_ne(a, b)),
4972            5 => simd_and(k1, simd_ge(a, b)),
4973            6 => simd_and(k1, simd_gt(a, b)),
4974            _ => k1,
4975        };
4976        simd_bitmask(r)
4977    }
4978}
4979
4980/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4981///
4982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4983#[inline]
4984#[target_feature(enable = "avx512bw")]
4985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4986#[rustc_legacy_const_generics(2)]
4987#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4989pub const fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4990    unsafe {
4991        static_assert_uimm_bits!(IMM8, 3);
4992        let a = a.as_i8x64();
4993        let b = b.as_i8x64();
4994        let r = match IMM8 {
4995            0 => simd_eq(a, b),
4996            1 => simd_lt(a, b),
4997            2 => simd_le(a, b),
4998            3 => i8x64::ZERO,
4999            4 => simd_ne(a, b),
5000            5 => simd_ge(a, b),
5001            6 => simd_gt(a, b),
5002            _ => i8x64::splat(-1),
5003        };
5004        simd_bitmask(r)
5005    }
5006}
5007
5008/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5009///
5010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
5011#[inline]
5012#[target_feature(enable = "avx512bw")]
5013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5014#[rustc_legacy_const_generics(3)]
5015#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5017pub const fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
5018    k1: __mmask64,
5019    a: __m512i,
5020    b: __m512i,
5021) -> __mmask64 {
5022    unsafe {
5023        static_assert_uimm_bits!(IMM8, 3);
5024        let a = a.as_i8x64();
5025        let b = b.as_i8x64();
5026        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
5027        let r = match IMM8 {
5028            0 => simd_and(k1, simd_eq(a, b)),
5029            1 => simd_and(k1, simd_lt(a, b)),
5030            2 => simd_and(k1, simd_le(a, b)),
5031            3 => i8x64::ZERO,
5032            4 => simd_and(k1, simd_ne(a, b)),
5033            5 => simd_and(k1, simd_ge(a, b)),
5034            6 => simd_and(k1, simd_gt(a, b)),
5035            _ => k1,
5036        };
5037        simd_bitmask(r)
5038    }
5039}
5040
5041/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5042///
5043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
5044#[inline]
5045#[target_feature(enable = "avx512bw,avx512vl")]
5046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5047#[rustc_legacy_const_generics(2)]
5048#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5050pub const fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
5051    unsafe {
5052        static_assert_uimm_bits!(IMM8, 3);
5053        let a = a.as_i8x32();
5054        let b = b.as_i8x32();
5055        let r = match IMM8 {
5056            0 => simd_eq(a, b),
5057            1 => simd_lt(a, b),
5058            2 => simd_le(a, b),
5059            3 => i8x32::ZERO,
5060            4 => simd_ne(a, b),
5061            5 => simd_ge(a, b),
5062            6 => simd_gt(a, b),
5063            _ => i8x32::splat(-1),
5064        };
5065        simd_bitmask(r)
5066    }
5067}
5068
5069/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5070///
5071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
5072#[inline]
5073#[target_feature(enable = "avx512bw,avx512vl")]
5074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5075#[rustc_legacy_const_generics(3)]
5076#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5078pub const fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
5079    k1: __mmask32,
5080    a: __m256i,
5081    b: __m256i,
5082) -> __mmask32 {
5083    unsafe {
5084        static_assert_uimm_bits!(IMM8, 3);
5085        let a = a.as_i8x32();
5086        let b = b.as_i8x32();
5087        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
5088        let r = match IMM8 {
5089            0 => simd_and(k1, simd_eq(a, b)),
5090            1 => simd_and(k1, simd_lt(a, b)),
5091            2 => simd_and(k1, simd_le(a, b)),
5092            3 => i8x32::ZERO,
5093            4 => simd_and(k1, simd_ne(a, b)),
5094            5 => simd_and(k1, simd_ge(a, b)),
5095            6 => simd_and(k1, simd_gt(a, b)),
5096            _ => k1,
5097        };
5098        simd_bitmask(r)
5099    }
5100}
5101
5102/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5103///
5104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
5105#[inline]
5106#[target_feature(enable = "avx512bw,avx512vl")]
5107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5108#[rustc_legacy_const_generics(2)]
5109#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5111pub const fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
5112    unsafe {
5113        static_assert_uimm_bits!(IMM8, 3);
5114        let a = a.as_i8x16();
5115        let b = b.as_i8x16();
5116        let r = match IMM8 {
5117            0 => simd_eq(a, b),
5118            1 => simd_lt(a, b),
5119            2 => simd_le(a, b),
5120            3 => i8x16::ZERO,
5121            4 => simd_ne(a, b),
5122            5 => simd_ge(a, b),
5123            6 => simd_gt(a, b),
5124            _ => i8x16::splat(-1),
5125        };
5126        simd_bitmask(r)
5127    }
5128}
5129
5130/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[rustc_legacy_const_generics(3)]
5137#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5139pub const fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
5140    k1: __mmask16,
5141    a: __m128i,
5142    b: __m128i,
5143) -> __mmask16 {
5144    unsafe {
5145        static_assert_uimm_bits!(IMM8, 3);
5146        let a = a.as_i8x16();
5147        let b = b.as_i8x16();
5148        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
5149        let r = match IMM8 {
5150            0 => simd_and(k1, simd_eq(a, b)),
5151            1 => simd_and(k1, simd_lt(a, b)),
5152            2 => simd_and(k1, simd_le(a, b)),
5153            3 => i8x16::ZERO,
5154            4 => simd_and(k1, simd_ne(a, b)),
5155            5 => simd_and(k1, simd_ge(a, b)),
5156            6 => simd_and(k1, simd_gt(a, b)),
5157            _ => k1,
5158        };
5159        simd_bitmask(r)
5160    }
5161}
5162
5163/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
5166#[inline]
5167#[target_feature(enable = "avx512bw,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5170pub const fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
5171    unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
5172}
5173
5174/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
5177#[inline]
5178#[target_feature(enable = "avx512bw,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5181pub const fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
5182    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
5183}
5184
5185/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
5188#[inline]
5189#[target_feature(enable = "avx512bw,avx512vl")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5192pub const fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
5193    unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
5194}
5195
5196/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
5199#[inline]
5200#[target_feature(enable = "avx512bw,avx512vl")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5203pub const fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
5204    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
5205}
5206
5207/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
5210#[inline]
5211#[target_feature(enable = "avx512bw,avx512vl")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5214pub const fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
5215    unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
5216}
5217
5218/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
5221#[inline]
5222#[target_feature(enable = "avx512bw,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5225pub const fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
5226    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
5227}
5228
5229/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
5232#[inline]
5233#[target_feature(enable = "avx512bw,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5236pub const fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
5237    unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
5238}
5239
5240/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5247pub const fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
5248    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
5249}
5250
5251/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
5254#[inline]
5255#[target_feature(enable = "avx512bw,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5258pub const fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
5259    unsafe { simd_reduce_and(a.as_i16x16()) }
5260}
5261
5262/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
5265#[inline]
5266#[target_feature(enable = "avx512bw,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5269pub const fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
5270    unsafe {
5271        simd_reduce_and(simd_select_bitmask(
5272            k,
5273            a.as_i16x16(),
5274            _mm256_set1_epi64x(-1).as_i16x16(),
5275        ))
5276    }
5277}
5278
5279/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
5282#[inline]
5283#[target_feature(enable = "avx512bw,avx512vl")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5286pub const fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
5287    unsafe { simd_reduce_and(a.as_i16x8()) }
5288}
5289
5290/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5297pub const fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
5298    unsafe {
5299        simd_reduce_and(simd_select_bitmask(
5300            k,
5301            a.as_i16x8(),
5302            _mm_set1_epi64x(-1).as_i16x8(),
5303        ))
5304    }
5305}
5306
5307/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5308///
5309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
5310#[inline]
5311#[target_feature(enable = "avx512bw,avx512vl")]
5312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5314pub const fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
5315    unsafe { simd_reduce_and(a.as_i8x32()) }
5316}
5317
5318/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5319///
5320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
5321#[inline]
5322#[target_feature(enable = "avx512bw,avx512vl")]
5323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5324#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5325pub const fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
5326    unsafe {
5327        simd_reduce_and(simd_select_bitmask(
5328            k,
5329            a.as_i8x32(),
5330            _mm256_set1_epi64x(-1).as_i8x32(),
5331        ))
5332    }
5333}
5334
5335/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5336///
5337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
5338#[inline]
5339#[target_feature(enable = "avx512bw,avx512vl")]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5342pub const fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
5343    unsafe { simd_reduce_and(a.as_i8x16()) }
5344}
5345
5346/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5347///
5348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
5349#[inline]
5350#[target_feature(enable = "avx512bw,avx512vl")]
5351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5353pub const fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
5354    unsafe {
5355        simd_reduce_and(simd_select_bitmask(
5356            k,
5357            a.as_i8x16(),
5358            _mm_set1_epi64x(-1).as_i8x16(),
5359        ))
5360    }
5361}
5362
5363/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5364///
5365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
5366#[inline]
5367#[target_feature(enable = "avx512bw,avx512vl")]
5368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5370pub const fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
5371    unsafe { simd_reduce_max(a.as_i16x16()) }
5372}
5373
5374/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5375///
5376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5377#[inline]
5378#[target_feature(enable = "avx512bw,avx512vl")]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5381pub const fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5382    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5383}
5384
5385/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5386///
5387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5388#[inline]
5389#[target_feature(enable = "avx512bw,avx512vl")]
5390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5392pub const fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5393    unsafe { simd_reduce_max(a.as_i16x8()) }
5394}
5395
5396/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5397///
5398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5399#[inline]
5400#[target_feature(enable = "avx512bw,avx512vl")]
5401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5403pub const fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5404    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5405}
5406
5407/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5408///
5409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5410#[inline]
5411#[target_feature(enable = "avx512bw,avx512vl")]
5412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5414pub const fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5415    unsafe { simd_reduce_max(a.as_i8x32()) }
5416}
5417
5418/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5419///
5420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5421#[inline]
5422#[target_feature(enable = "avx512bw,avx512vl")]
5423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5424#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5425pub const fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5426    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5427}
5428
5429/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5430///
5431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5432#[inline]
5433#[target_feature(enable = "avx512bw,avx512vl")]
5434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5436pub const fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5437    unsafe { simd_reduce_max(a.as_i8x16()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5447pub const fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5448    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5449}
5450
5451/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5452///
5453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5454#[inline]
5455#[target_feature(enable = "avx512bw,avx512vl")]
5456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5457#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5458pub const fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5459    unsafe { simd_reduce_max(a.as_u16x16()) }
5460}
5461
5462/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5463///
5464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5465#[inline]
5466#[target_feature(enable = "avx512bw,avx512vl")]
5467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5469pub const fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5470    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5471}
5472
5473/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5474///
5475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5476#[inline]
5477#[target_feature(enable = "avx512bw,avx512vl")]
5478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5480pub const fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5481    unsafe { simd_reduce_max(a.as_u16x8()) }
5482}
5483
5484/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5485///
5486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5487#[inline]
5488#[target_feature(enable = "avx512bw,avx512vl")]
5489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5491pub const fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5492    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5493}
5494
5495/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5496///
5497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5498#[inline]
5499#[target_feature(enable = "avx512bw,avx512vl")]
5500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5502pub const fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5503    unsafe { simd_reduce_max(a.as_u8x32()) }
5504}
5505
5506/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5507///
5508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5509#[inline]
5510#[target_feature(enable = "avx512bw,avx512vl")]
5511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5513pub const fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5514    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5515}
5516
5517/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5518///
5519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5520#[inline]
5521#[target_feature(enable = "avx512bw,avx512vl")]
5522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5524pub const fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5525    unsafe { simd_reduce_max(a.as_u8x16()) }
5526}
5527
5528/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5529///
5530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5531#[inline]
5532#[target_feature(enable = "avx512bw,avx512vl")]
5533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5535pub const fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5536    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5537}
5538
5539/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5540///
5541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5542#[inline]
5543#[target_feature(enable = "avx512bw,avx512vl")]
5544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5546pub const fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5547    unsafe { simd_reduce_min(a.as_i16x16()) }
5548}
5549
5550/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5551///
5552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5553#[inline]
5554#[target_feature(enable = "avx512bw,avx512vl")]
5555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5557pub const fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5558    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5559}
5560
5561/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5562///
5563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5564#[inline]
5565#[target_feature(enable = "avx512bw,avx512vl")]
5566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5568pub const fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5569    unsafe { simd_reduce_min(a.as_i16x8()) }
5570}
5571
5572/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5573///
5574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5575#[inline]
5576#[target_feature(enable = "avx512bw,avx512vl")]
5577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5579pub const fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5580    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5581}
5582
5583/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5586#[inline]
5587#[target_feature(enable = "avx512bw,avx512vl")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5590pub const fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5591    unsafe { simd_reduce_min(a.as_i8x32()) }
5592}
5593
5594/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5595///
5596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5597#[inline]
5598#[target_feature(enable = "avx512bw,avx512vl")]
5599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5601pub const fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5602    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5603}
5604
5605/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5606///
5607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5608#[inline]
5609#[target_feature(enable = "avx512bw,avx512vl")]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5612pub const fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5613    unsafe { simd_reduce_min(a.as_i8x16()) }
5614}
5615
5616/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5617///
5618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5619#[inline]
5620#[target_feature(enable = "avx512bw,avx512vl")]
5621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5623pub const fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5624    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5625}
5626
5627/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5628///
5629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5630#[inline]
5631#[target_feature(enable = "avx512bw,avx512vl")]
5632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5634pub const fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5635    unsafe { simd_reduce_min(a.as_u16x16()) }
5636}
5637
5638/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5639///
5640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5641#[inline]
5642#[target_feature(enable = "avx512bw,avx512vl")]
5643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5645pub const fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5646    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5647}
5648
5649/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5650///
5651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5652#[inline]
5653#[target_feature(enable = "avx512bw,avx512vl")]
5654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5656pub const fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5657    unsafe { simd_reduce_min(a.as_u16x8()) }
5658}
5659
5660/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5661///
5662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5663#[inline]
5664#[target_feature(enable = "avx512bw,avx512vl")]
5665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5667pub const fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5668    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5669}
5670
5671/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5672///
5673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5674#[inline]
5675#[target_feature(enable = "avx512bw,avx512vl")]
5676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5678pub const fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5679    unsafe { simd_reduce_min(a.as_u8x32()) }
5680}
5681
5682/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5683///
5684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5685#[inline]
5686#[target_feature(enable = "avx512bw,avx512vl")]
5687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5689pub const fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5690    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5691}
5692
5693/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5694///
5695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5696#[inline]
5697#[target_feature(enable = "avx512bw,avx512vl")]
5698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5700pub const fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5701    unsafe { simd_reduce_min(a.as_u8x16()) }
5702}
5703
5704/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5705///
5706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5707#[inline]
5708#[target_feature(enable = "avx512bw,avx512vl")]
5709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5711pub const fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5712    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5713}
5714
5715/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5716///
5717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5718#[inline]
5719#[target_feature(enable = "avx512bw,avx512vl")]
5720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5722pub const fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5723    unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
5724}
5725
5726/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5727///
5728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5729#[inline]
5730#[target_feature(enable = "avx512bw,avx512vl")]
5731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5733pub const fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5734    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
5735}
5736
5737/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5738///
5739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5740#[inline]
5741#[target_feature(enable = "avx512bw,avx512vl")]
5742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5744pub const fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5745    unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
5746}
5747
5748/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5749///
5750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5751#[inline]
5752#[target_feature(enable = "avx512bw,avx512vl")]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5755pub const fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5756    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
5757}
5758
5759/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5760///
5761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5762#[inline]
5763#[target_feature(enable = "avx512bw,avx512vl")]
5764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5766pub const fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5767    unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
5768}
5769
5770/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5771///
5772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5773#[inline]
5774#[target_feature(enable = "avx512bw,avx512vl")]
5775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5777pub const fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5778    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
5779}
5780
5781/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5782///
5783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5784#[inline]
5785#[target_feature(enable = "avx512bw,avx512vl")]
5786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5788pub const fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5789    unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
5790}
5791
5792/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5793///
5794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5795#[inline]
5796#[target_feature(enable = "avx512bw,avx512vl")]
5797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5799pub const fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5800    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
5801}
5802
5803/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5804///
5805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5806#[inline]
5807#[target_feature(enable = "avx512bw,avx512vl")]
5808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5810pub const fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5811    unsafe { simd_reduce_or(a.as_i16x16()) }
5812}
5813
5814/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5815///
5816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5817#[inline]
5818#[target_feature(enable = "avx512bw,avx512vl")]
5819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5821pub const fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5822    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5823}
5824
5825/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5826///
5827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5828#[inline]
5829#[target_feature(enable = "avx512bw,avx512vl")]
5830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5832pub const fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5833    unsafe { simd_reduce_or(a.as_i16x8()) }
5834}
5835
5836/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5837///
5838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5839#[inline]
5840#[target_feature(enable = "avx512bw,avx512vl")]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5843pub const fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5844    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5845}
5846
5847/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5848///
5849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5850#[inline]
5851#[target_feature(enable = "avx512bw,avx512vl")]
5852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5854pub const fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5855    unsafe { simd_reduce_or(a.as_i8x32()) }
5856}
5857
5858/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5859///
5860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5861#[inline]
5862#[target_feature(enable = "avx512bw,avx512vl")]
5863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5865pub const fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5866    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5867}
5868
5869/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5870///
5871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5872#[inline]
5873#[target_feature(enable = "avx512bw,avx512vl")]
5874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5875#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5876pub const fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5877    unsafe { simd_reduce_or(a.as_i8x16()) }
5878}
5879
5880/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5881///
5882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5883#[inline]
5884#[target_feature(enable = "avx512bw,avx512vl")]
5885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5887pub const fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5888    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5889}
5890
5891/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5892///
5893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5894#[inline]
5895#[target_feature(enable = "avx512bw")]
5896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5897#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5899pub const unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5900    ptr::read_unaligned(mem_addr as *const __m512i)
5901}
5902
5903/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5904///
5905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5906#[inline]
5907#[target_feature(enable = "avx512bw,avx512vl")]
5908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5909#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5911pub const unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5912    ptr::read_unaligned(mem_addr as *const __m256i)
5913}
5914
5915/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5916///
5917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5918#[inline]
5919#[target_feature(enable = "avx512bw,avx512vl")]
5920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5921#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5923pub const unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5924    ptr::read_unaligned(mem_addr as *const __m128i)
5925}
5926
5927/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5928///
5929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5930#[inline]
5931#[target_feature(enable = "avx512bw")]
5932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5933#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5935pub const unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5936    ptr::read_unaligned(mem_addr as *const __m512i)
5937}
5938
5939/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5940///
5941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5942#[inline]
5943#[target_feature(enable = "avx512bw,avx512vl")]
5944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5945#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5947pub const unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5948    ptr::read_unaligned(mem_addr as *const __m256i)
5949}
5950
5951/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5952///
5953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5954#[inline]
5955#[target_feature(enable = "avx512bw,avx512vl")]
5956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5957#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5959pub const unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5960    ptr::read_unaligned(mem_addr as *const __m128i)
5961}
5962
5963/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5964///
5965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5966#[inline]
5967#[target_feature(enable = "avx512bw")]
5968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5969#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5971pub const unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5972    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5973}
5974
5975/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5976///
5977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5978#[inline]
5979#[target_feature(enable = "avx512bw,avx512vl")]
5980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5981#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5983pub const unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5984    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5985}
5986
5987/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5988///
5989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5990#[inline]
5991#[target_feature(enable = "avx512bw,avx512vl")]
5992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5993#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5995pub const unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5996    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5997}
5998
5999/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6000///
6001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
6002#[inline]
6003#[target_feature(enable = "avx512bw")]
6004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6005#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6007pub const unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
6008    ptr::write_unaligned(mem_addr as *mut __m512i, a);
6009}
6010
6011/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6012///
6013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
6014#[inline]
6015#[target_feature(enable = "avx512bw,avx512vl")]
6016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6017#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6019pub const unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
6020    ptr::write_unaligned(mem_addr as *mut __m256i, a);
6021}
6022
6023/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6024///
6025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
6026#[inline]
6027#[target_feature(enable = "avx512bw,avx512vl")]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6031pub const unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
6032    ptr::write_unaligned(mem_addr as *mut __m128i, a);
6033}
6034
6035/// Load packed 16-bit integers from memory into dst using writemask k
6036/// (elements are copied from src when the corresponding mask bit is not set).
6037/// mem_addr does not need to be aligned on any particular boundary.
6038///
6039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
6040#[inline]
6041#[target_feature(enable = "avx512bw")]
6042#[cfg_attr(test, assert_instr(vmovdqu16))]
6043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6045pub const unsafe fn _mm512_mask_loadu_epi16(
6046    src: __m512i,
6047    k: __mmask32,
6048    mem_addr: *const i16,
6049) -> __m512i {
6050    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
6051    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
6052}
6053
6054/// Load packed 16-bit integers from memory into dst using zeromask k
6055/// (elements are zeroed out when the corresponding mask bit is not set).
6056/// mem_addr does not need to be aligned on any particular boundary.
6057///
6058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
6059#[inline]
6060#[target_feature(enable = "avx512bw")]
6061#[cfg_attr(test, assert_instr(vmovdqu16))]
6062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6064pub const unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
6065    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
6066}
6067
6068/// Load packed 8-bit integers from memory into dst using writemask k
6069/// (elements are copied from src when the corresponding mask bit is not set).
6070/// mem_addr does not need to be aligned on any particular boundary.
6071///
6072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
6073#[inline]
6074#[target_feature(enable = "avx512bw")]
6075#[cfg_attr(test, assert_instr(vmovdqu8))]
6076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6078pub const unsafe fn _mm512_mask_loadu_epi8(
6079    src: __m512i,
6080    k: __mmask64,
6081    mem_addr: *const i8,
6082) -> __m512i {
6083    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
6084    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
6085}
6086
6087/// Load packed 8-bit integers from memory into dst using zeromask k
6088/// (elements are zeroed out when the corresponding mask bit is not set).
6089/// mem_addr does not need to be aligned on any particular boundary.
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
6092#[inline]
6093#[target_feature(enable = "avx512bw")]
6094#[cfg_attr(test, assert_instr(vmovdqu8))]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6097pub const unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
6098    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
6099}
6100
6101/// Load packed 16-bit integers from memory into dst using writemask k
6102/// (elements are copied from src when the corresponding mask bit is not set).
6103/// mem_addr does not need to be aligned on any particular boundary.
6104///
6105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
6106#[inline]
6107#[target_feature(enable = "avx512bw,avx512vl")]
6108#[cfg_attr(test, assert_instr(vmovdqu16))]
6109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6111pub const unsafe fn _mm256_mask_loadu_epi16(
6112    src: __m256i,
6113    k: __mmask16,
6114    mem_addr: *const i16,
6115) -> __m256i {
6116    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
6117    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
6118}
6119
6120/// Load packed 16-bit integers from memory into dst using zeromask k
6121/// (elements are zeroed out when the corresponding mask bit is not set).
6122/// mem_addr does not need to be aligned on any particular boundary.
6123///
6124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
6125#[inline]
6126#[target_feature(enable = "avx512bw,avx512vl")]
6127#[cfg_attr(test, assert_instr(vmovdqu16))]
6128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6130pub const unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
6131    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
6132}
6133
6134/// Load packed 8-bit integers from memory into dst using writemask k
6135/// (elements are copied from src when the corresponding mask bit is not set).
6136/// mem_addr does not need to be aligned on any particular boundary.
6137///
6138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
6139#[inline]
6140#[target_feature(enable = "avx512bw,avx512vl")]
6141#[cfg_attr(test, assert_instr(vmovdqu8))]
6142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6144pub const unsafe fn _mm256_mask_loadu_epi8(
6145    src: __m256i,
6146    k: __mmask32,
6147    mem_addr: *const i8,
6148) -> __m256i {
6149    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
6150    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
6151}
6152
6153/// Load packed 8-bit integers from memory into dst using zeromask k
6154/// (elements are zeroed out when the corresponding mask bit is not set).
6155/// mem_addr does not need to be aligned on any particular boundary.
6156///
6157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
6158#[inline]
6159#[target_feature(enable = "avx512bw,avx512vl")]
6160#[cfg_attr(test, assert_instr(vmovdqu8))]
6161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6163pub const unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
6164    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
6165}
6166
6167/// Load packed 16-bit integers from memory into dst using writemask k
6168/// (elements are copied from src when the corresponding mask bit is not set).
6169/// mem_addr does not need to be aligned on any particular boundary.
6170///
6171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
6172#[inline]
6173#[target_feature(enable = "avx512bw,avx512vl")]
6174#[cfg_attr(test, assert_instr(vmovdqu16))]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6177pub const unsafe fn _mm_mask_loadu_epi16(
6178    src: __m128i,
6179    k: __mmask8,
6180    mem_addr: *const i16,
6181) -> __m128i {
6182    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
6183    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
6184}
6185
6186/// Load packed 16-bit integers from memory into dst using zeromask k
6187/// (elements are zeroed out when the corresponding mask bit is not set).
6188/// mem_addr does not need to be aligned on any particular boundary.
6189///
6190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
6191#[inline]
6192#[target_feature(enable = "avx512bw,avx512vl")]
6193#[cfg_attr(test, assert_instr(vmovdqu16))]
6194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6196pub const unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
6197    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
6198}
6199
6200/// Load packed 8-bit integers from memory into dst using writemask k
6201/// (elements are copied from src when the corresponding mask bit is not set).
6202/// mem_addr does not need to be aligned on any particular boundary.
6203///
6204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
6205#[inline]
6206#[target_feature(enable = "avx512bw,avx512vl")]
6207#[cfg_attr(test, assert_instr(vmovdqu8))]
6208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6210pub const unsafe fn _mm_mask_loadu_epi8(
6211    src: __m128i,
6212    k: __mmask16,
6213    mem_addr: *const i8,
6214) -> __m128i {
6215    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
6216    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
6217}
6218
6219/// Load packed 8-bit integers from memory into dst using zeromask k
6220/// (elements are zeroed out when the corresponding mask bit is not set).
6221/// mem_addr does not need to be aligned on any particular boundary.
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
6224#[inline]
6225#[target_feature(enable = "avx512bw,avx512vl")]
6226#[cfg_attr(test, assert_instr(vmovdqu8))]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6229pub const unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
6230    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
6231}
6232
6233/// Store packed 16-bit integers from a into memory using writemask k.
6234/// mem_addr does not need to be aligned on any particular boundary.
6235///
6236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
6237#[inline]
6238#[target_feature(enable = "avx512bw")]
6239#[cfg_attr(test, assert_instr(vmovdqu16))]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6242pub const unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
6243    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
6244    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
6245}
6246
6247/// Store packed 8-bit integers from a into memory using writemask k.
6248/// mem_addr does not need to be aligned on any particular boundary.
6249///
6250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
6251#[inline]
6252#[target_feature(enable = "avx512bw")]
6253#[cfg_attr(test, assert_instr(vmovdqu8))]
6254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6256pub const unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
6257    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
6258    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
6259}
6260
6261/// Store packed 16-bit integers from a into memory using writemask k.
6262/// mem_addr does not need to be aligned on any particular boundary.
6263///
6264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
6265#[inline]
6266#[target_feature(enable = "avx512bw,avx512vl")]
6267#[cfg_attr(test, assert_instr(vmovdqu16))]
6268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6270pub const unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
6271    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
6272    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
6273}
6274
6275/// Store packed 8-bit integers from a into memory using writemask k.
6276/// mem_addr does not need to be aligned on any particular boundary.
6277///
6278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
6279#[inline]
6280#[target_feature(enable = "avx512bw,avx512vl")]
6281#[cfg_attr(test, assert_instr(vmovdqu8))]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6284pub const unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
6285    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
6286    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
6287}
6288
6289/// Store packed 16-bit integers from a into memory using writemask k.
6290/// mem_addr does not need to be aligned on any particular boundary.
6291///
6292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
6293#[inline]
6294#[target_feature(enable = "avx512bw,avx512vl")]
6295#[cfg_attr(test, assert_instr(vmovdqu16))]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6298pub const unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
6299    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
6300    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
6301}
6302
6303/// Store packed 8-bit integers from a into memory using writemask k.
6304/// mem_addr does not need to be aligned on any particular boundary.
6305///
6306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
6307#[inline]
6308#[target_feature(enable = "avx512bw,avx512vl")]
6309#[cfg_attr(test, assert_instr(vmovdqu8))]
6310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6312pub const unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
6313    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
6314    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
6315}
6316
6317/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
6318///
6319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
6320#[inline]
6321#[target_feature(enable = "avx512bw")]
6322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6323#[cfg_attr(test, assert_instr(vpmaddwd))]
6324pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
6325    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
6326    //
6327    // ```rust
6328    // #[target_feature(enable = "avx512bw")]
6329    // unsafe fn widening_add(mad: __m512i) -> __m512i {
6330    //     _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
6331    // }
6332    // ```
6333    //
6334    // If we implement this using generic vector intrinsics, the optimizer
6335    // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
6336    // For this reason, we use x86 intrinsics.
6337    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
6338}
6339
6340/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6341///
6342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
6343#[inline]
6344#[target_feature(enable = "avx512bw")]
6345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6346#[cfg_attr(test, assert_instr(vpmaddwd))]
6347pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6348    unsafe {
6349        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6350        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
6351    }
6352}
6353
6354/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
6357#[inline]
6358#[target_feature(enable = "avx512bw")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vpmaddwd))]
6361pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6362    unsafe {
6363        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6364        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
6365    }
6366}
6367
6368/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6369///
6370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
6371#[inline]
6372#[target_feature(enable = "avx512bw,avx512vl")]
6373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6374#[cfg_attr(test, assert_instr(vpmaddwd))]
6375pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6376    unsafe {
6377        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6378        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
6379    }
6380}
6381
6382/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6383///
6384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
6385#[inline]
6386#[target_feature(enable = "avx512bw,avx512vl")]
6387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6388#[cfg_attr(test, assert_instr(vpmaddwd))]
6389pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6390    unsafe {
6391        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6392        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
6393    }
6394}
6395
6396/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6397///
6398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
6399#[inline]
6400#[target_feature(enable = "avx512bw,avx512vl")]
6401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6402#[cfg_attr(test, assert_instr(vpmaddwd))]
6403pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6404    unsafe {
6405        let madd = _mm_madd_epi16(a, b).as_i32x4();
6406        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
6407    }
6408}
6409
6410/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6411///
6412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
6413#[inline]
6414#[target_feature(enable = "avx512bw,avx512vl")]
6415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6416#[cfg_attr(test, assert_instr(vpmaddwd))]
6417pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6418    unsafe {
6419        let madd = _mm_madd_epi16(a, b).as_i32x4();
6420        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
6421    }
6422}
6423
6424/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
6425///
6426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
6427#[inline]
6428#[target_feature(enable = "avx512bw")]
6429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6430#[cfg_attr(test, assert_instr(vpmaddubsw))]
6431pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
6432    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
6433}
6434
6435/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6436///
6437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
6438#[inline]
6439#[target_feature(enable = "avx512bw")]
6440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6441#[cfg_attr(test, assert_instr(vpmaddubsw))]
6442pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6443    unsafe {
6444        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6445        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
6446    }
6447}
6448
6449/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6450///
6451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
6452#[inline]
6453#[target_feature(enable = "avx512bw")]
6454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6455#[cfg_attr(test, assert_instr(vpmaddubsw))]
6456pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6457    unsafe {
6458        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6459        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
6460    }
6461}
6462
6463/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6464///
6465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
6466#[inline]
6467#[target_feature(enable = "avx512bw,avx512vl")]
6468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6469#[cfg_attr(test, assert_instr(vpmaddubsw))]
6470pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6471    unsafe {
6472        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6473        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
6474    }
6475}
6476
6477/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6478///
6479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
6480#[inline]
6481#[target_feature(enable = "avx512bw,avx512vl")]
6482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6483#[cfg_attr(test, assert_instr(vpmaddubsw))]
6484pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6485    unsafe {
6486        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6487        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6488    }
6489}
6490
6491/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6494#[inline]
6495#[target_feature(enable = "avx512bw,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vpmaddubsw))]
6498pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6499    unsafe {
6500        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6501        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6502    }
6503}
6504
6505/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6506///
6507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6508#[inline]
6509#[target_feature(enable = "avx512bw,avx512vl")]
6510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6511#[cfg_attr(test, assert_instr(vpmaddubsw))]
6512pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6513    unsafe {
6514        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6515        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6516    }
6517}
6518
6519/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6522#[inline]
6523#[target_feature(enable = "avx512bw")]
6524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6525#[cfg_attr(test, assert_instr(vpackssdw))]
6526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6527pub const fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6528    unsafe {
6529        let max = simd_splat(i16::MAX as i32);
6530        let min = simd_splat(i16::MIN as i32);
6531
6532        let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
6533            .as_m512i()
6534            .as_i16x32();
6535        let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min)
6536            .as_m512i()
6537            .as_i16x32();
6538
6539        #[rustfmt::skip]
6540        const IDXS: [u32; 32] = [
6541            00, 02, 04, 06,
6542            32, 34, 36, 38,
6543            08, 10, 12, 14,
6544            40, 42, 44, 46,
6545            16, 18, 20, 22,
6546            48, 50, 52, 54,
6547            24, 26, 28, 30,
6548            56, 58, 60, 62,
6549        ];
6550        let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
6551
6552        result.as_m512i()
6553    }
6554}
6555
6556/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6557///
6558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6559#[inline]
6560#[target_feature(enable = "avx512bw")]
6561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6562#[cfg_attr(test, assert_instr(vpackssdw))]
6563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6564pub const fn _mm512_mask_packs_epi32(
6565    src: __m512i,
6566    k: __mmask32,
6567    a: __m512i,
6568    b: __m512i,
6569) -> __m512i {
6570    unsafe {
6571        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6572        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6573    }
6574}
6575
6576/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6577///
6578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6579#[inline]
6580#[target_feature(enable = "avx512bw")]
6581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6582#[cfg_attr(test, assert_instr(vpackssdw))]
6583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6584pub const fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6585    unsafe {
6586        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6587        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6588    }
6589}
6590
6591/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6592///
6593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6594#[inline]
6595#[target_feature(enable = "avx512bw,avx512vl")]
6596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6597#[cfg_attr(test, assert_instr(vpackssdw))]
6598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6599pub const fn _mm256_mask_packs_epi32(
6600    src: __m256i,
6601    k: __mmask16,
6602    a: __m256i,
6603    b: __m256i,
6604) -> __m256i {
6605    unsafe {
6606        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6607        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6608    }
6609}
6610
6611/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6612///
6613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6614#[inline]
6615#[target_feature(enable = "avx512bw,avx512vl")]
6616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6617#[cfg_attr(test, assert_instr(vpackssdw))]
6618pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6619    unsafe {
6620        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6621        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6622    }
6623}
6624
6625/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6626///
6627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6628#[inline]
6629#[target_feature(enable = "avx512bw,avx512vl")]
6630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6631#[cfg_attr(test, assert_instr(vpackssdw))]
6632#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6633pub const fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6634    unsafe {
6635        let pack = _mm_packs_epi32(a, b).as_i16x8();
6636        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6637    }
6638}
6639
6640/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6641///
6642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6643#[inline]
6644#[target_feature(enable = "avx512bw,avx512vl")]
6645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6646#[cfg_attr(test, assert_instr(vpackssdw))]
6647#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6648pub const fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6649    unsafe {
6650        let pack = _mm_packs_epi32(a, b).as_i16x8();
6651        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6652    }
6653}
6654
6655/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6656///
6657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6658#[inline]
6659#[target_feature(enable = "avx512bw")]
6660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6661#[cfg_attr(test, assert_instr(vpacksswb))]
6662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6663pub const fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6664    unsafe {
6665        let max = simd_splat(i8::MAX as i16);
6666        let min = simd_splat(i8::MIN as i16);
6667
6668        let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
6669            .as_m512i()
6670            .as_i8x64();
6671        let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min)
6672            .as_m512i()
6673            .as_i8x64();
6674
6675        #[rustfmt::skip]
6676        const IDXS: [u32; 64] = [
6677            000, 002, 004, 006, 008, 010, 012, 014,
6678            064, 066, 068, 070, 072, 074, 076, 078,
6679            016, 018, 020, 022, 024, 026, 028, 030,
6680            080, 082, 084, 086, 088, 090, 092, 094,
6681            032, 034, 036, 038, 040, 042, 044, 046,
6682            096, 098, 100, 102, 104, 106, 108, 110,
6683            048, 050, 052, 054, 056, 058, 060, 062,
6684            112, 114, 116, 118, 120, 122, 124, 126,
6685        ];
6686        let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS);
6687
6688        result.as_m512i()
6689    }
6690}
6691
6692/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6693///
6694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6695#[inline]
6696#[target_feature(enable = "avx512bw")]
6697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6698#[cfg_attr(test, assert_instr(vpacksswb))]
6699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6700pub const fn _mm512_mask_packs_epi16(
6701    src: __m512i,
6702    k: __mmask64,
6703    a: __m512i,
6704    b: __m512i,
6705) -> __m512i {
6706    unsafe {
6707        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6708        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6709    }
6710}
6711
6712/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6713///
6714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6715#[inline]
6716#[target_feature(enable = "avx512bw")]
6717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6718#[cfg_attr(test, assert_instr(vpacksswb))]
6719#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6720pub const fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6721    unsafe {
6722        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6723        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6724    }
6725}
6726
6727/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6728///
6729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6730#[inline]
6731#[target_feature(enable = "avx512bw,avx512vl")]
6732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6733#[cfg_attr(test, assert_instr(vpacksswb))]
6734#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6735pub const fn _mm256_mask_packs_epi16(
6736    src: __m256i,
6737    k: __mmask32,
6738    a: __m256i,
6739    b: __m256i,
6740) -> __m256i {
6741    unsafe {
6742        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6743        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6744    }
6745}
6746
6747/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6748///
6749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6750#[inline]
6751#[target_feature(enable = "avx512bw,avx512vl")]
6752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6753#[cfg_attr(test, assert_instr(vpacksswb))]
6754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6755pub const fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6756    unsafe {
6757        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6758        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6759    }
6760}
6761
6762/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6763///
6764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6765#[inline]
6766#[target_feature(enable = "avx512bw,avx512vl")]
6767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6768#[cfg_attr(test, assert_instr(vpacksswb))]
6769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6770pub const fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6771    unsafe {
6772        let pack = _mm_packs_epi16(a, b).as_i8x16();
6773        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6774    }
6775}
6776
6777/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6778///
6779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6780#[inline]
6781#[target_feature(enable = "avx512bw,avx512vl")]
6782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6783#[cfg_attr(test, assert_instr(vpacksswb))]
6784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6785pub const fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6786    unsafe {
6787        let pack = _mm_packs_epi16(a, b).as_i8x16();
6788        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6789    }
6790}
6791
6792/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6793///
6794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6795#[inline]
6796#[target_feature(enable = "avx512bw")]
6797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6798#[cfg_attr(test, assert_instr(vpackusdw))]
6799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6800pub const fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6801    unsafe {
6802        let max = simd_splat(u16::MAX as i32);
6803        let min = simd_splat(u16::MIN as i32);
6804
6805        let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
6806            .as_m512i()
6807            .as_i16x32();
6808        let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min)
6809            .as_m512i()
6810            .as_i16x32();
6811
6812        #[rustfmt::skip]
6813        const IDXS: [u32; 32] = [
6814            00, 02, 04, 06,
6815            32, 34, 36, 38,
6816            08, 10, 12, 14,
6817            40, 42, 44, 46,
6818            16, 18, 20, 22,
6819            48, 50, 52, 54,
6820            24, 26, 28, 30,
6821            56, 58, 60, 62,
6822        ];
6823        let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
6824
6825        result.as_m512i()
6826    }
6827}
6828
6829/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6830///
6831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6832#[inline]
6833#[target_feature(enable = "avx512bw")]
6834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6835#[cfg_attr(test, assert_instr(vpackusdw))]
6836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6837pub const fn _mm512_mask_packus_epi32(
6838    src: __m512i,
6839    k: __mmask32,
6840    a: __m512i,
6841    b: __m512i,
6842) -> __m512i {
6843    unsafe {
6844        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6845        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6846    }
6847}
6848
6849/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6850///
6851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6852#[inline]
6853#[target_feature(enable = "avx512bw")]
6854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6855#[cfg_attr(test, assert_instr(vpackusdw))]
6856#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6857pub const fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6858    unsafe {
6859        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6860        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6861    }
6862}
6863
6864/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6865///
6866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6867#[inline]
6868#[target_feature(enable = "avx512bw,avx512vl")]
6869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6870#[cfg_attr(test, assert_instr(vpackusdw))]
6871#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6872pub const fn _mm256_mask_packus_epi32(
6873    src: __m256i,
6874    k: __mmask16,
6875    a: __m256i,
6876    b: __m256i,
6877) -> __m256i {
6878    unsafe {
6879        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6880        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6881    }
6882}
6883
6884/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6885///
6886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6887#[inline]
6888#[target_feature(enable = "avx512bw,avx512vl")]
6889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6890#[cfg_attr(test, assert_instr(vpackusdw))]
6891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6892pub const fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6893    unsafe {
6894        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6895        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6896    }
6897}
6898
6899/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6900///
6901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6902#[inline]
6903#[target_feature(enable = "avx512bw,avx512vl")]
6904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6905#[cfg_attr(test, assert_instr(vpackusdw))]
6906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6907pub const fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6908    unsafe {
6909        let pack = _mm_packus_epi32(a, b).as_i16x8();
6910        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6911    }
6912}
6913
6914/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6915///
6916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6917#[inline]
6918#[target_feature(enable = "avx512bw,avx512vl")]
6919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6920#[cfg_attr(test, assert_instr(vpackusdw))]
6921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6922pub const fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6923    unsafe {
6924        let pack = _mm_packus_epi32(a, b).as_i16x8();
6925        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6926    }
6927}
6928
6929/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6930///
6931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6932#[inline]
6933#[target_feature(enable = "avx512bw")]
6934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6935#[cfg_attr(test, assert_instr(vpackuswb))]
6936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6937pub const fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6938    unsafe {
6939        let max = simd_splat(u8::MAX as i16);
6940        let min = simd_splat(u8::MIN as i16);
6941
6942        let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
6943            .as_m512i()
6944            .as_i8x64();
6945        let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min)
6946            .as_m512i()
6947            .as_i8x64();
6948
6949        #[rustfmt::skip]
6950        const IDXS: [u32; 64] = [
6951            000, 002, 004, 006, 008, 010, 012, 014,
6952            064, 066, 068, 070, 072, 074, 076, 078,
6953            016, 018, 020, 022, 024, 026, 028, 030,
6954            080, 082, 084, 086, 088, 090, 092, 094,
6955            032, 034, 036, 038, 040, 042, 044, 046,
6956            096, 098, 100, 102, 104, 106, 108, 110,
6957            048, 050, 052, 054, 056, 058, 060, 062,
6958            112, 114, 116, 118, 120, 122, 124, 126,
6959        ];
6960        let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS);
6961
6962        result.as_m512i()
6963    }
6964}
6965
6966/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6967///
6968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6969#[inline]
6970#[target_feature(enable = "avx512bw")]
6971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6972#[cfg_attr(test, assert_instr(vpackuswb))]
6973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6974pub const fn _mm512_mask_packus_epi16(
6975    src: __m512i,
6976    k: __mmask64,
6977    a: __m512i,
6978    b: __m512i,
6979) -> __m512i {
6980    unsafe {
6981        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6982        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6983    }
6984}
6985
6986/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6987///
6988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6989#[inline]
6990#[target_feature(enable = "avx512bw")]
6991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6992#[cfg_attr(test, assert_instr(vpackuswb))]
6993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6994pub const fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6995    unsafe {
6996        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6997        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6998    }
6999}
7000
7001/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7002///
7003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
7004#[inline]
7005#[target_feature(enable = "avx512bw,avx512vl")]
7006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7007#[cfg_attr(test, assert_instr(vpackuswb))]
7008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7009pub const fn _mm256_mask_packus_epi16(
7010    src: __m256i,
7011    k: __mmask32,
7012    a: __m256i,
7013    b: __m256i,
7014) -> __m256i {
7015    unsafe {
7016        let pack = _mm256_packus_epi16(a, b).as_i8x32();
7017        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
7018    }
7019}
7020
7021/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7022///
7023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
7024#[inline]
7025#[target_feature(enable = "avx512bw,avx512vl")]
7026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7027#[cfg_attr(test, assert_instr(vpackuswb))]
7028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7029pub const fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7030    unsafe {
7031        let pack = _mm256_packus_epi16(a, b).as_i8x32();
7032        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
7033    }
7034}
7035
7036/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7037///
7038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
7039#[inline]
7040#[target_feature(enable = "avx512bw,avx512vl")]
7041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7042#[cfg_attr(test, assert_instr(vpackuswb))]
7043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7044pub const fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7045    unsafe {
7046        let pack = _mm_packus_epi16(a, b).as_i8x16();
7047        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
7048    }
7049}
7050
7051/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7052///
7053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
7054#[inline]
7055#[target_feature(enable = "avx512bw,avx512vl")]
7056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7057#[cfg_attr(test, assert_instr(vpackuswb))]
7058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7059pub const fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7060    unsafe {
7061        let pack = _mm_packus_epi16(a, b).as_i8x16();
7062        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
7063    }
7064}
7065
7066/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
7067///
7068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
7069#[inline]
7070#[target_feature(enable = "avx512bw")]
7071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7072#[cfg_attr(test, assert_instr(vpavgw))]
7073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7074pub const fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
7075    unsafe {
7076        let a = simd_cast::<_, u32x32>(a.as_u16x32());
7077        let b = simd_cast::<_, u32x32>(b.as_u16x32());
7078        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
7079        transmute(simd_cast::<_, u16x32>(r))
7080    }
7081}
7082
7083/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7084///
7085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
7086#[inline]
7087#[target_feature(enable = "avx512bw")]
7088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7089#[cfg_attr(test, assert_instr(vpavgw))]
7090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7091pub const fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
7092    unsafe {
7093        let avg = _mm512_avg_epu16(a, b).as_u16x32();
7094        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
7095    }
7096}
7097
7098/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7099///
7100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
7101#[inline]
7102#[target_feature(enable = "avx512bw")]
7103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7104#[cfg_attr(test, assert_instr(vpavgw))]
7105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7106pub const fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
7107    unsafe {
7108        let avg = _mm512_avg_epu16(a, b).as_u16x32();
7109        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
7110    }
7111}
7112
7113/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7114///
7115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
7116#[inline]
7117#[target_feature(enable = "avx512bw,avx512vl")]
7118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7119#[cfg_attr(test, assert_instr(vpavgw))]
7120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7121pub const fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7122    unsafe {
7123        let avg = _mm256_avg_epu16(a, b).as_u16x16();
7124        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
7125    }
7126}
7127
7128/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7129///
7130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
7131#[inline]
7132#[target_feature(enable = "avx512bw,avx512vl")]
7133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7134#[cfg_attr(test, assert_instr(vpavgw))]
7135#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7136pub const fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7137    unsafe {
7138        let avg = _mm256_avg_epu16(a, b).as_u16x16();
7139        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
7140    }
7141}
7142
7143/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7144///
7145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
7146#[inline]
7147#[target_feature(enable = "avx512bw,avx512vl")]
7148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7149#[cfg_attr(test, assert_instr(vpavgw))]
7150#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7151pub const fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7152    unsafe {
7153        let avg = _mm_avg_epu16(a, b).as_u16x8();
7154        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
7155    }
7156}
7157
7158/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7159///
7160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
7161#[inline]
7162#[target_feature(enable = "avx512bw,avx512vl")]
7163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7164#[cfg_attr(test, assert_instr(vpavgw))]
7165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7166pub const fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7167    unsafe {
7168        let avg = _mm_avg_epu16(a, b).as_u16x8();
7169        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
7170    }
7171}
7172
7173/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
7174///
7175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
7176#[inline]
7177#[target_feature(enable = "avx512bw")]
7178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7179#[cfg_attr(test, assert_instr(vpavgb))]
7180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7181pub const fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
7182    unsafe {
7183        let a = simd_cast::<_, u16x64>(a.as_u8x64());
7184        let b = simd_cast::<_, u16x64>(b.as_u8x64());
7185        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
7186        transmute(simd_cast::<_, u8x64>(r))
7187    }
7188}
7189
7190/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7191///
7192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
7193#[inline]
7194#[target_feature(enable = "avx512bw")]
7195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7196#[cfg_attr(test, assert_instr(vpavgb))]
7197#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7198pub const fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7199    unsafe {
7200        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7201        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
7202    }
7203}
7204
7205/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7206///
7207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
7208#[inline]
7209#[target_feature(enable = "avx512bw")]
7210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7211#[cfg_attr(test, assert_instr(vpavgb))]
7212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7213pub const fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7214    unsafe {
7215        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7216        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
7217    }
7218}
7219
7220/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7221///
7222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
7223#[inline]
7224#[target_feature(enable = "avx512bw,avx512vl")]
7225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7226#[cfg_attr(test, assert_instr(vpavgb))]
7227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7228pub const fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7229    unsafe {
7230        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7231        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
7232    }
7233}
7234
7235/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7236///
7237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
7238#[inline]
7239#[target_feature(enable = "avx512bw,avx512vl")]
7240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7241#[cfg_attr(test, assert_instr(vpavgb))]
7242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7243pub const fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7244    unsafe {
7245        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7246        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
7247    }
7248}
7249
7250/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7251///
7252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
7253#[inline]
7254#[target_feature(enable = "avx512bw,avx512vl")]
7255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7256#[cfg_attr(test, assert_instr(vpavgb))]
7257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7258pub const fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7259    unsafe {
7260        let avg = _mm_avg_epu8(a, b).as_u8x16();
7261        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
7262    }
7263}
7264
7265/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7266///
7267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
7268#[inline]
7269#[target_feature(enable = "avx512bw,avx512vl")]
7270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7271#[cfg_attr(test, assert_instr(vpavgb))]
7272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7273pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7274    unsafe {
7275        let avg = _mm_avg_epu8(a, b).as_u8x16();
7276        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
7277    }
7278}
7279
7280/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
7281///
7282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
7283#[inline]
7284#[target_feature(enable = "avx512bw")]
7285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7286#[cfg_attr(test, assert_instr(vpsllw))]
7287pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
7288    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
7289}
7290
7291/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7292///
7293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
7294#[inline]
7295#[target_feature(enable = "avx512bw")]
7296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7297#[cfg_attr(test, assert_instr(vpsllw))]
7298pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7299    unsafe {
7300        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7301        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7302    }
7303}
7304
7305/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7306///
7307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
7308#[inline]
7309#[target_feature(enable = "avx512bw")]
7310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7311#[cfg_attr(test, assert_instr(vpsllw))]
7312pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7313    unsafe {
7314        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7315        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7316    }
7317}
7318
7319/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7320///
7321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
7322#[inline]
7323#[target_feature(enable = "avx512bw,avx512vl")]
7324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7325#[cfg_attr(test, assert_instr(vpsllw))]
7326pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7327    unsafe {
7328        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7329        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7330    }
7331}
7332
7333/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7334///
7335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
7336#[inline]
7337#[target_feature(enable = "avx512bw,avx512vl")]
7338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7339#[cfg_attr(test, assert_instr(vpsllw))]
7340pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7341    unsafe {
7342        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7343        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7344    }
7345}
7346
7347/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7348///
7349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
7350#[inline]
7351#[target_feature(enable = "avx512bw,avx512vl")]
7352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7353#[cfg_attr(test, assert_instr(vpsllw))]
7354pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7355    unsafe {
7356        let shf = _mm_sll_epi16(a, count).as_i16x8();
7357        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7358    }
7359}
7360
7361/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7362///
7363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
7364#[inline]
7365#[target_feature(enable = "avx512bw,avx512vl")]
7366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7367#[cfg_attr(test, assert_instr(vpsllw))]
7368pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7369    unsafe {
7370        let shf = _mm_sll_epi16(a, count).as_i16x8();
7371        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7372    }
7373}
7374
7375/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
7376///
7377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
7378#[inline]
7379#[target_feature(enable = "avx512bw")]
7380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7381#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7382#[rustc_legacy_const_generics(1)]
7383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7384pub const fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7385    unsafe {
7386        static_assert_uimm_bits!(IMM8, 8);
7387        if IMM8 >= 16 {
7388            _mm512_setzero_si512()
7389        } else {
7390            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7391        }
7392    }
7393}
7394
7395/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7396///
7397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
7398#[inline]
7399#[target_feature(enable = "avx512bw")]
7400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7401#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7402#[rustc_legacy_const_generics(3)]
7403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7404pub const fn _mm512_mask_slli_epi16<const IMM8: u32>(
7405    src: __m512i,
7406    k: __mmask32,
7407    a: __m512i,
7408) -> __m512i {
7409    unsafe {
7410        static_assert_uimm_bits!(IMM8, 8);
7411        let shf = if IMM8 >= 16 {
7412            u16x32::ZERO
7413        } else {
7414            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7415        };
7416        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7417    }
7418}
7419
7420/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
7423#[inline]
7424#[target_feature(enable = "avx512bw")]
7425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7426#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7427#[rustc_legacy_const_generics(2)]
7428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7429pub const fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7430    unsafe {
7431        static_assert_uimm_bits!(IMM8, 8);
7432        if IMM8 >= 16 {
7433            _mm512_setzero_si512()
7434        } else {
7435            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7436            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7437        }
7438    }
7439}
7440
7441/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7442///
7443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
7444#[inline]
7445#[target_feature(enable = "avx512bw,avx512vl")]
7446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7447#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7448#[rustc_legacy_const_generics(3)]
7449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7450pub const fn _mm256_mask_slli_epi16<const IMM8: u32>(
7451    src: __m256i,
7452    k: __mmask16,
7453    a: __m256i,
7454) -> __m256i {
7455    unsafe {
7456        static_assert_uimm_bits!(IMM8, 8);
7457        let shf = if IMM8 >= 16 {
7458            u16x16::ZERO
7459        } else {
7460            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
7461        };
7462        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
7463    }
7464}
7465
7466/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7467///
7468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
7469#[inline]
7470#[target_feature(enable = "avx512bw,avx512vl")]
7471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7472#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7473#[rustc_legacy_const_generics(2)]
7474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7475pub const fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7476    unsafe {
7477        static_assert_uimm_bits!(IMM8, 8);
7478        if IMM8 >= 16 {
7479            _mm256_setzero_si256()
7480        } else {
7481            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
7482            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
7483        }
7484    }
7485}
7486
7487/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7488///
7489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
7490#[inline]
7491#[target_feature(enable = "avx512bw,avx512vl")]
7492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7493#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7494#[rustc_legacy_const_generics(3)]
7495#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7496pub const fn _mm_mask_slli_epi16<const IMM8: u32>(
7497    src: __m128i,
7498    k: __mmask8,
7499    a: __m128i,
7500) -> __m128i {
7501    unsafe {
7502        static_assert_uimm_bits!(IMM8, 8);
7503        let shf = if IMM8 >= 16 {
7504            u16x8::ZERO
7505        } else {
7506            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
7507        };
7508        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
7509    }
7510}
7511
7512/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7513///
7514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
7515#[inline]
7516#[target_feature(enable = "avx512bw,avx512vl")]
7517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7518#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7519#[rustc_legacy_const_generics(2)]
7520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7521pub const fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7522    unsafe {
7523        static_assert_uimm_bits!(IMM8, 8);
7524        if IMM8 >= 16 {
7525            _mm_setzero_si128()
7526        } else {
7527            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
7528            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
7529        }
7530    }
7531}
7532
7533/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7534///
7535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
7536#[inline]
7537#[target_feature(enable = "avx512bw")]
7538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7539#[cfg_attr(test, assert_instr(vpsllvw))]
7540#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7541pub const fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
7542    unsafe {
7543        let count = count.as_u16x32();
7544        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7545        let count = simd_select(no_overflow, count, u16x32::ZERO);
7546        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7547    }
7548}
7549
7550/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7551///
7552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
7553#[inline]
7554#[target_feature(enable = "avx512bw")]
7555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7556#[cfg_attr(test, assert_instr(vpsllvw))]
7557#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7558pub const fn _mm512_mask_sllv_epi16(
7559    src: __m512i,
7560    k: __mmask32,
7561    a: __m512i,
7562    count: __m512i,
7563) -> __m512i {
7564    unsafe {
7565        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7566        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7567    }
7568}
7569
7570/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7571///
7572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
7573#[inline]
7574#[target_feature(enable = "avx512bw")]
7575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7576#[cfg_attr(test, assert_instr(vpsllvw))]
7577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7578pub const fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7579    unsafe {
7580        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7581        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7582    }
7583}
7584
7585/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
7588#[inline]
7589#[target_feature(enable = "avx512bw,avx512vl")]
7590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7591#[cfg_attr(test, assert_instr(vpsllvw))]
7592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7593pub const fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
7594    unsafe {
7595        let count = count.as_u16x16();
7596        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7597        let count = simd_select(no_overflow, count, u16x16::ZERO);
7598        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7599    }
7600}
7601
7602/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7603///
7604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
7605#[inline]
7606#[target_feature(enable = "avx512bw,avx512vl")]
7607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7608#[cfg_attr(test, assert_instr(vpsllvw))]
7609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7610pub const fn _mm256_mask_sllv_epi16(
7611    src: __m256i,
7612    k: __mmask16,
7613    a: __m256i,
7614    count: __m256i,
7615) -> __m256i {
7616    unsafe {
7617        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7618        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7619    }
7620}
7621
7622/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7623///
7624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
7625#[inline]
7626#[target_feature(enable = "avx512bw,avx512vl")]
7627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7628#[cfg_attr(test, assert_instr(vpsllvw))]
7629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7630pub const fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7631    unsafe {
7632        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7633        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7634    }
7635}
7636
7637/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7638///
7639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
7640#[inline]
7641#[target_feature(enable = "avx512bw,avx512vl")]
7642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7643#[cfg_attr(test, assert_instr(vpsllvw))]
7644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7645pub const fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
7646    unsafe {
7647        let count = count.as_u16x8();
7648        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7649        let count = simd_select(no_overflow, count, u16x8::ZERO);
7650        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7651    }
7652}
7653
7654/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7655///
7656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
7657#[inline]
7658#[target_feature(enable = "avx512bw,avx512vl")]
7659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7660#[cfg_attr(test, assert_instr(vpsllvw))]
7661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7662pub const fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7663    unsafe {
7664        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7665        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7666    }
7667}
7668
7669/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7670///
7671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
7672#[inline]
7673#[target_feature(enable = "avx512bw,avx512vl")]
7674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7675#[cfg_attr(test, assert_instr(vpsllvw))]
7676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7677pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7678    unsafe {
7679        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7680        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7681    }
7682}
7683
7684/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
7685///
7686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
7687#[inline]
7688#[target_feature(enable = "avx512bw")]
7689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7690#[cfg_attr(test, assert_instr(vpsrlw))]
7691pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
7692    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
7693}
7694
7695/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7696///
7697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
7698#[inline]
7699#[target_feature(enable = "avx512bw")]
7700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7701#[cfg_attr(test, assert_instr(vpsrlw))]
7702pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7703    unsafe {
7704        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7705        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7706    }
7707}
7708
7709/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7710///
7711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
7712#[inline]
7713#[target_feature(enable = "avx512bw")]
7714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7715#[cfg_attr(test, assert_instr(vpsrlw))]
7716pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7717    unsafe {
7718        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7719        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7720    }
7721}
7722
7723/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7724///
7725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7726#[inline]
7727#[target_feature(enable = "avx512bw,avx512vl")]
7728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7729#[cfg_attr(test, assert_instr(vpsrlw))]
7730pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7731    unsafe {
7732        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7733        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7734    }
7735}
7736
7737/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7738///
7739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7740#[inline]
7741#[target_feature(enable = "avx512bw,avx512vl")]
7742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7743#[cfg_attr(test, assert_instr(vpsrlw))]
7744pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7745    unsafe {
7746        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7747        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7748    }
7749}
7750
7751/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7752///
7753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7754#[inline]
7755#[target_feature(enable = "avx512bw,avx512vl")]
7756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7757#[cfg_attr(test, assert_instr(vpsrlw))]
7758pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7759    unsafe {
7760        let shf = _mm_srl_epi16(a, count).as_i16x8();
7761        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7762    }
7763}
7764
7765/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7766///
7767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7768#[inline]
7769#[target_feature(enable = "avx512bw,avx512vl")]
7770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7771#[cfg_attr(test, assert_instr(vpsrlw))]
7772pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7773    unsafe {
7774        let shf = _mm_srl_epi16(a, count).as_i16x8();
7775        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7776    }
7777}
7778
7779/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7780///
7781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7782#[inline]
7783#[target_feature(enable = "avx512bw")]
7784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7785#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7786#[rustc_legacy_const_generics(1)]
7787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7788pub const fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7789    unsafe {
7790        static_assert_uimm_bits!(IMM8, 8);
7791        if IMM8 >= 16 {
7792            _mm512_setzero_si512()
7793        } else {
7794            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7795        }
7796    }
7797}
7798
7799/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7800///
7801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7802#[inline]
7803#[target_feature(enable = "avx512bw")]
7804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7805#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7806#[rustc_legacy_const_generics(3)]
7807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7808pub const fn _mm512_mask_srli_epi16<const IMM8: u32>(
7809    src: __m512i,
7810    k: __mmask32,
7811    a: __m512i,
7812) -> __m512i {
7813    unsafe {
7814        static_assert_uimm_bits!(IMM8, 8);
7815        let shf = if IMM8 >= 16 {
7816            u16x32::ZERO
7817        } else {
7818            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7819        };
7820        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7821    }
7822}
7823
7824/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7825///
7826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7827#[inline]
7828#[target_feature(enable = "avx512bw")]
7829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7830#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7831#[rustc_legacy_const_generics(2)]
7832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7833pub const fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7834    unsafe {
7835        static_assert_uimm_bits!(IMM8, 8);
7836        //imm8 should be u32, it seems the document to verify is incorrect
7837        if IMM8 >= 16 {
7838            _mm512_setzero_si512()
7839        } else {
7840            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7841            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7842        }
7843    }
7844}
7845
7846/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7847///
7848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7849#[inline]
7850#[target_feature(enable = "avx512bw,avx512vl")]
7851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7852#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7853#[rustc_legacy_const_generics(3)]
7854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7855pub const fn _mm256_mask_srli_epi16<const IMM8: i32>(
7856    src: __m256i,
7857    k: __mmask16,
7858    a: __m256i,
7859) -> __m256i {
7860    unsafe {
7861        static_assert_uimm_bits!(IMM8, 8);
7862        let shf = _mm256_srli_epi16::<IMM8>(a);
7863        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7864    }
7865}
7866
7867/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7868///
7869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7870#[inline]
7871#[target_feature(enable = "avx512bw,avx512vl")]
7872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7873#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7874#[rustc_legacy_const_generics(2)]
7875#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7876pub const fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7877    unsafe {
7878        static_assert_uimm_bits!(IMM8, 8);
7879        let shf = _mm256_srli_epi16::<IMM8>(a);
7880        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7881    }
7882}
7883
7884/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7885///
7886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7887#[inline]
7888#[target_feature(enable = "avx512bw,avx512vl")]
7889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7890#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7891#[rustc_legacy_const_generics(3)]
7892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7893pub const fn _mm_mask_srli_epi16<const IMM8: i32>(
7894    src: __m128i,
7895    k: __mmask8,
7896    a: __m128i,
7897) -> __m128i {
7898    unsafe {
7899        static_assert_uimm_bits!(IMM8, 8);
7900        let shf = _mm_srli_epi16::<IMM8>(a);
7901        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7902    }
7903}
7904
7905/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7906///
7907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7908#[inline]
7909#[target_feature(enable = "avx512bw,avx512vl")]
7910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7911#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7912#[rustc_legacy_const_generics(2)]
7913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7914pub const fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7915    unsafe {
7916        static_assert_uimm_bits!(IMM8, 8);
7917        let shf = _mm_srli_epi16::<IMM8>(a);
7918        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7919    }
7920}
7921
7922/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7923///
7924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7925#[inline]
7926#[target_feature(enable = "avx512bw")]
7927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7928#[cfg_attr(test, assert_instr(vpsrlvw))]
7929#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7930pub const fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7931    unsafe {
7932        let count = count.as_u16x32();
7933        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7934        let count = simd_select(no_overflow, count, u16x32::ZERO);
7935        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7936    }
7937}
7938
7939/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7940///
7941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7942#[inline]
7943#[target_feature(enable = "avx512bw")]
7944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7945#[cfg_attr(test, assert_instr(vpsrlvw))]
7946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7947pub const fn _mm512_mask_srlv_epi16(
7948    src: __m512i,
7949    k: __mmask32,
7950    a: __m512i,
7951    count: __m512i,
7952) -> __m512i {
7953    unsafe {
7954        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7955        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7956    }
7957}
7958
7959/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7960///
7961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7962#[inline]
7963#[target_feature(enable = "avx512bw")]
7964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7965#[cfg_attr(test, assert_instr(vpsrlvw))]
7966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7967pub const fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7968    unsafe {
7969        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7970        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7971    }
7972}
7973
7974/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7975///
7976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7977#[inline]
7978#[target_feature(enable = "avx512bw,avx512vl")]
7979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7980#[cfg_attr(test, assert_instr(vpsrlvw))]
7981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7982pub const fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7983    unsafe {
7984        let count = count.as_u16x16();
7985        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7986        let count = simd_select(no_overflow, count, u16x16::ZERO);
7987        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7988    }
7989}
7990
7991/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7992///
7993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7994#[inline]
7995#[target_feature(enable = "avx512bw,avx512vl")]
7996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7997#[cfg_attr(test, assert_instr(vpsrlvw))]
7998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7999pub const fn _mm256_mask_srlv_epi16(
8000    src: __m256i,
8001    k: __mmask16,
8002    a: __m256i,
8003    count: __m256i,
8004) -> __m256i {
8005    unsafe {
8006        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
8007        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
8008    }
8009}
8010
8011/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8012///
8013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
8014#[inline]
8015#[target_feature(enable = "avx512bw,avx512vl")]
8016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8017#[cfg_attr(test, assert_instr(vpsrlvw))]
8018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8019pub const fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
8020    unsafe {
8021        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
8022        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
8023    }
8024}
8025
8026/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
8027///
8028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
8029#[inline]
8030#[target_feature(enable = "avx512bw,avx512vl")]
8031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8032#[cfg_attr(test, assert_instr(vpsrlvw))]
8033#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8034pub const fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
8035    unsafe {
8036        let count = count.as_u16x8();
8037        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
8038        let count = simd_select(no_overflow, count, u16x8::ZERO);
8039        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
8040    }
8041}
8042
8043/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8044///
8045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
8046#[inline]
8047#[target_feature(enable = "avx512bw,avx512vl")]
8048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8049#[cfg_attr(test, assert_instr(vpsrlvw))]
8050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8051pub const fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8052    unsafe {
8053        let shf = _mm_srlv_epi16(a, count).as_i16x8();
8054        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
8055    }
8056}
8057
8058/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8059///
8060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
8061#[inline]
8062#[target_feature(enable = "avx512bw,avx512vl")]
8063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8064#[cfg_attr(test, assert_instr(vpsrlvw))]
8065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8066pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8067    unsafe {
8068        let shf = _mm_srlv_epi16(a, count).as_i16x8();
8069        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8070    }
8071}
8072
8073/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
8074///
8075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
8076#[inline]
8077#[target_feature(enable = "avx512bw")]
8078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8079#[cfg_attr(test, assert_instr(vpsraw))]
8080pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
8081    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
8082}
8083
8084/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8085///
8086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
8087#[inline]
8088#[target_feature(enable = "avx512bw")]
8089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8090#[cfg_attr(test, assert_instr(vpsraw))]
8091pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
8092    unsafe {
8093        let shf = _mm512_sra_epi16(a, count).as_i16x32();
8094        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8095    }
8096}
8097
8098/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8099///
8100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
8101#[inline]
8102#[target_feature(enable = "avx512bw")]
8103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8104#[cfg_attr(test, assert_instr(vpsraw))]
8105pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
8106    unsafe {
8107        let shf = _mm512_sra_epi16(a, count).as_i16x32();
8108        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8109    }
8110}
8111
8112/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8113///
8114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
8115#[inline]
8116#[target_feature(enable = "avx512bw,avx512vl")]
8117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8118#[cfg_attr(test, assert_instr(vpsraw))]
8119pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
8120    unsafe {
8121        let shf = _mm256_sra_epi16(a, count).as_i16x16();
8122        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
8123    }
8124}
8125
8126/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
8129#[inline]
8130#[target_feature(enable = "avx512bw,avx512vl")]
8131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8132#[cfg_attr(test, assert_instr(vpsraw))]
8133pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
8134    unsafe {
8135        let shf = _mm256_sra_epi16(a, count).as_i16x16();
8136        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
8137    }
8138}
8139
8140/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8141///
8142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
8143#[inline]
8144#[target_feature(enable = "avx512bw,avx512vl")]
8145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8146#[cfg_attr(test, assert_instr(vpsraw))]
8147pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8148    unsafe {
8149        let shf = _mm_sra_epi16(a, count).as_i16x8();
8150        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
8151    }
8152}
8153
8154/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8155///
8156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
8157#[inline]
8158#[target_feature(enable = "avx512bw,avx512vl")]
8159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8160#[cfg_attr(test, assert_instr(vpsraw))]
8161pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8162    unsafe {
8163        let shf = _mm_sra_epi16(a, count).as_i16x8();
8164        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8165    }
8166}
8167
8168/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
8169///
8170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
8171#[inline]
8172#[target_feature(enable = "avx512bw")]
8173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8174#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8175#[rustc_legacy_const_generics(1)]
8176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8177pub const fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
8178    unsafe {
8179        static_assert_uimm_bits!(IMM8, 8);
8180        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
8181    }
8182}
8183
8184/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8185///
8186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
8187#[inline]
8188#[target_feature(enable = "avx512bw")]
8189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8190#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8191#[rustc_legacy_const_generics(3)]
8192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8193pub const fn _mm512_mask_srai_epi16<const IMM8: u32>(
8194    src: __m512i,
8195    k: __mmask32,
8196    a: __m512i,
8197) -> __m512i {
8198    unsafe {
8199        static_assert_uimm_bits!(IMM8, 8);
8200        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8201        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8202    }
8203}
8204
8205/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8206///
8207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
8208#[inline]
8209#[target_feature(enable = "avx512bw")]
8210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8211#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8212#[rustc_legacy_const_generics(2)]
8213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8214pub const fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
8215    unsafe {
8216        static_assert_uimm_bits!(IMM8, 8);
8217        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8218        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8219    }
8220}
8221
8222/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8223///
8224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
8225#[inline]
8226#[target_feature(enable = "avx512bw,avx512vl")]
8227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8228#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8229#[rustc_legacy_const_generics(3)]
8230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8231pub const fn _mm256_mask_srai_epi16<const IMM8: u32>(
8232    src: __m256i,
8233    k: __mmask16,
8234    a: __m256i,
8235) -> __m256i {
8236    unsafe {
8237        static_assert_uimm_bits!(IMM8, 8);
8238        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8239        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8240    }
8241}
8242
8243/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8244///
8245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
8246#[inline]
8247#[target_feature(enable = "avx512bw,avx512vl")]
8248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8249#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8250#[rustc_legacy_const_generics(2)]
8251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8252pub const fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
8253    unsafe {
8254        static_assert_uimm_bits!(IMM8, 8);
8255        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8256        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8257    }
8258}
8259
8260/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8261///
8262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
8263#[inline]
8264#[target_feature(enable = "avx512bw,avx512vl")]
8265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8266#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8267#[rustc_legacy_const_generics(3)]
8268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8269pub const fn _mm_mask_srai_epi16<const IMM8: u32>(
8270    src: __m128i,
8271    k: __mmask8,
8272    a: __m128i,
8273) -> __m128i {
8274    unsafe {
8275        static_assert_uimm_bits!(IMM8, 8);
8276        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8277        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8278    }
8279}
8280
8281/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8282///
8283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
8284#[inline]
8285#[target_feature(enable = "avx512bw,avx512vl")]
8286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8287#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8288#[rustc_legacy_const_generics(2)]
8289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8290pub const fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
8291    unsafe {
8292        static_assert_uimm_bits!(IMM8, 8);
8293        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8294        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8295    }
8296}
8297
8298/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8299///
8300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
8301#[inline]
8302#[target_feature(enable = "avx512bw")]
8303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8304#[cfg_attr(test, assert_instr(vpsravw))]
8305#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8306pub const fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
8307    unsafe {
8308        let count = count.as_u16x32();
8309        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
8310        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
8311        simd_shr(a.as_i16x32(), count).as_m512i()
8312    }
8313}
8314
8315/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8316///
8317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
8318#[inline]
8319#[target_feature(enable = "avx512bw")]
8320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8321#[cfg_attr(test, assert_instr(vpsravw))]
8322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8323pub const fn _mm512_mask_srav_epi16(
8324    src: __m512i,
8325    k: __mmask32,
8326    a: __m512i,
8327    count: __m512i,
8328) -> __m512i {
8329    unsafe {
8330        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8331        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8332    }
8333}
8334
8335/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8336///
8337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
8338#[inline]
8339#[target_feature(enable = "avx512bw")]
8340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8341#[cfg_attr(test, assert_instr(vpsravw))]
8342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8343pub const fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
8344    unsafe {
8345        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8346        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8347    }
8348}
8349
8350/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8351///
8352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
8353#[inline]
8354#[target_feature(enable = "avx512bw,avx512vl")]
8355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8356#[cfg_attr(test, assert_instr(vpsravw))]
8357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8358pub const fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
8359    unsafe {
8360        let count = count.as_u16x16();
8361        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
8362        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
8363        simd_shr(a.as_i16x16(), count).as_m256i()
8364    }
8365}
8366
8367/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8368///
8369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
8370#[inline]
8371#[target_feature(enable = "avx512bw,avx512vl")]
8372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8373#[cfg_attr(test, assert_instr(vpsravw))]
8374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8375pub const fn _mm256_mask_srav_epi16(
8376    src: __m256i,
8377    k: __mmask16,
8378    a: __m256i,
8379    count: __m256i,
8380) -> __m256i {
8381    unsafe {
8382        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8383        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
8384    }
8385}
8386
8387/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8388///
8389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
8390#[inline]
8391#[target_feature(enable = "avx512bw,avx512vl")]
8392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8393#[cfg_attr(test, assert_instr(vpsravw))]
8394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8395pub const fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
8396    unsafe {
8397        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8398        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
8399    }
8400}
8401
8402/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8403///
8404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
8405#[inline]
8406#[target_feature(enable = "avx512bw,avx512vl")]
8407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8408#[cfg_attr(test, assert_instr(vpsravw))]
8409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8410pub const fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
8411    unsafe {
8412        let count = count.as_u16x8();
8413        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
8414        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
8415        simd_shr(a.as_i16x8(), count).as_m128i()
8416    }
8417}
8418
8419/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8420///
8421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
8422#[inline]
8423#[target_feature(enable = "avx512bw,avx512vl")]
8424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8425#[cfg_attr(test, assert_instr(vpsravw))]
8426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8427pub const fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8428    unsafe {
8429        let shf = _mm_srav_epi16(a, count).as_i16x8();
8430        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
8431    }
8432}
8433
8434/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8435///
8436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
8437#[inline]
8438#[target_feature(enable = "avx512bw,avx512vl")]
8439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8440#[cfg_attr(test, assert_instr(vpsravw))]
8441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8442pub const fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8443    unsafe {
8444        let shf = _mm_srav_epi16(a, count).as_i16x8();
8445        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8446    }
8447}
8448
8449/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8450///
8451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
8452#[inline]
8453#[target_feature(enable = "avx512bw")]
8454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8455#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8456pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
8457    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
8458}
8459
8460/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8461///
8462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
8463#[inline]
8464#[target_feature(enable = "avx512bw")]
8465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8466#[cfg_attr(test, assert_instr(vpermt2w))]
8467pub fn _mm512_mask_permutex2var_epi16(
8468    a: __m512i,
8469    k: __mmask32,
8470    idx: __m512i,
8471    b: __m512i,
8472) -> __m512i {
8473    unsafe {
8474        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8475        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
8476    }
8477}
8478
8479/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8480///
8481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
8482#[inline]
8483#[target_feature(enable = "avx512bw")]
8484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8485#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8486pub fn _mm512_maskz_permutex2var_epi16(
8487    k: __mmask32,
8488    a: __m512i,
8489    idx: __m512i,
8490    b: __m512i,
8491) -> __m512i {
8492    unsafe {
8493        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8494        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8495    }
8496}
8497
8498/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8499///
8500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
8501#[inline]
8502#[target_feature(enable = "avx512bw")]
8503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8504#[cfg_attr(test, assert_instr(vpermi2w))]
8505pub fn _mm512_mask2_permutex2var_epi16(
8506    a: __m512i,
8507    idx: __m512i,
8508    k: __mmask32,
8509    b: __m512i,
8510) -> __m512i {
8511    unsafe {
8512        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8513        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
8514    }
8515}
8516
8517/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8518///
8519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
8520#[inline]
8521#[target_feature(enable = "avx512bw,avx512vl")]
8522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8523#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8524pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
8525    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
8526}
8527
8528/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8529///
8530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
8531#[inline]
8532#[target_feature(enable = "avx512bw,avx512vl")]
8533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8534#[cfg_attr(test, assert_instr(vpermt2w))]
8535pub fn _mm256_mask_permutex2var_epi16(
8536    a: __m256i,
8537    k: __mmask16,
8538    idx: __m256i,
8539    b: __m256i,
8540) -> __m256i {
8541    unsafe {
8542        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8543        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
8544    }
8545}
8546
8547/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8548///
8549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
8550#[inline]
8551#[target_feature(enable = "avx512bw,avx512vl")]
8552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8553#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8554pub fn _mm256_maskz_permutex2var_epi16(
8555    k: __mmask16,
8556    a: __m256i,
8557    idx: __m256i,
8558    b: __m256i,
8559) -> __m256i {
8560    unsafe {
8561        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8562        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8563    }
8564}
8565
8566/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8567///
8568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
8569#[inline]
8570#[target_feature(enable = "avx512bw,avx512vl")]
8571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8572#[cfg_attr(test, assert_instr(vpermi2w))]
8573pub fn _mm256_mask2_permutex2var_epi16(
8574    a: __m256i,
8575    idx: __m256i,
8576    k: __mmask16,
8577    b: __m256i,
8578) -> __m256i {
8579    unsafe {
8580        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8581        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
8582    }
8583}
8584
8585/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8586///
8587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
8588#[inline]
8589#[target_feature(enable = "avx512bw,avx512vl")]
8590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8591#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8592pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8593    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
8594}
8595
8596/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8597///
8598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
8599#[inline]
8600#[target_feature(enable = "avx512bw,avx512vl")]
8601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8602#[cfg_attr(test, assert_instr(vpermt2w))]
8603pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
8604    unsafe {
8605        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8606        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
8607    }
8608}
8609
8610/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8611///
8612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
8613#[inline]
8614#[target_feature(enable = "avx512bw,avx512vl")]
8615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8616#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8617pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8618    unsafe {
8619        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8620        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8621    }
8622}
8623
8624/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8625///
8626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
8627#[inline]
8628#[target_feature(enable = "avx512bw,avx512vl")]
8629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8630#[cfg_attr(test, assert_instr(vpermi2w))]
8631pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
8632    unsafe {
8633        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8634        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
8635    }
8636}
8637
8638/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8639///
8640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
8641#[inline]
8642#[target_feature(enable = "avx512bw")]
8643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8644#[cfg_attr(test, assert_instr(vpermw))]
8645pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
8646    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
8647}
8648
8649/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8650///
8651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
8652#[inline]
8653#[target_feature(enable = "avx512bw")]
8654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8655#[cfg_attr(test, assert_instr(vpermw))]
8656pub fn _mm512_mask_permutexvar_epi16(
8657    src: __m512i,
8658    k: __mmask32,
8659    idx: __m512i,
8660    a: __m512i,
8661) -> __m512i {
8662    unsafe {
8663        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8664        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
8665    }
8666}
8667
8668/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8669///
8670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
8671#[inline]
8672#[target_feature(enable = "avx512bw")]
8673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8674#[cfg_attr(test, assert_instr(vpermw))]
8675pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
8676    unsafe {
8677        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8678        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8679    }
8680}
8681
8682/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8683///
8684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
8685#[inline]
8686#[target_feature(enable = "avx512bw,avx512vl")]
8687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8688#[cfg_attr(test, assert_instr(vpermw))]
8689pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
8690    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
8691}
8692
8693/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8694///
8695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
8696#[inline]
8697#[target_feature(enable = "avx512bw,avx512vl")]
8698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8699#[cfg_attr(test, assert_instr(vpermw))]
8700pub fn _mm256_mask_permutexvar_epi16(
8701    src: __m256i,
8702    k: __mmask16,
8703    idx: __m256i,
8704    a: __m256i,
8705) -> __m256i {
8706    unsafe {
8707        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8708        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
8709    }
8710}
8711
8712/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8713///
8714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
8715#[inline]
8716#[target_feature(enable = "avx512bw,avx512vl")]
8717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8718#[cfg_attr(test, assert_instr(vpermw))]
8719pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
8720    unsafe {
8721        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8722        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8723    }
8724}
8725
8726/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8727///
8728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
8729#[inline]
8730#[target_feature(enable = "avx512bw,avx512vl")]
8731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8732#[cfg_attr(test, assert_instr(vpermw))]
8733pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
8734    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
8735}
8736
8737/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8738///
8739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
8740#[inline]
8741#[target_feature(enable = "avx512bw,avx512vl")]
8742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8743#[cfg_attr(test, assert_instr(vpermw))]
8744pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8745    unsafe {
8746        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8747        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
8748    }
8749}
8750
8751/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8752///
8753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
8754#[inline]
8755#[target_feature(enable = "avx512bw,avx512vl")]
8756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8757#[cfg_attr(test, assert_instr(vpermw))]
8758pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8759    unsafe {
8760        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8761        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8762    }
8763}
8764
8765/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8766///
8767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
8768#[inline]
8769#[target_feature(enable = "avx512bw")]
8770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8771#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8773pub const fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8774    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
8775}
8776
8777/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8778///
8779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
8780#[inline]
8781#[target_feature(enable = "avx512bw,avx512vl")]
8782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8783#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8785pub const fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8786    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
8787}
8788
8789/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8790///
8791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
8792#[inline]
8793#[target_feature(enable = "avx512bw,avx512vl")]
8794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8795#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8797pub const fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8798    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
8799}
8800
8801/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8802///
8803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
8804#[inline]
8805#[target_feature(enable = "avx512bw")]
8806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8807#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8809pub const fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8810    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
8811}
8812
8813/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8814///
8815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
8816#[inline]
8817#[target_feature(enable = "avx512bw,avx512vl")]
8818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8819#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8821pub const fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8822    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
8823}
8824
8825/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8826///
8827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
8828#[inline]
8829#[target_feature(enable = "avx512bw,avx512vl")]
8830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8831#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8833pub const fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8834    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8835}
8836
8837/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8840#[inline]
8841#[target_feature(enable = "avx512bw")]
8842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8843#[cfg_attr(test, assert_instr(vpbroadcastw))]
8844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8845pub const fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8846    unsafe {
8847        let a = _mm512_castsi128_si512(a).as_i16x32();
8848        let ret: i16x32 = simd_shuffle!(
8849            a,
8850            a,
8851            [
8852                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8853                0, 0, 0, 0,
8854            ],
8855        );
8856        transmute(ret)
8857    }
8858}
8859
8860/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8861///
8862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8863#[inline]
8864#[target_feature(enable = "avx512bw")]
8865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8866#[cfg_attr(test, assert_instr(vpbroadcastw))]
8867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8868pub const fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8869    unsafe {
8870        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8871        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8872    }
8873}
8874
8875/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8876///
8877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8878#[inline]
8879#[target_feature(enable = "avx512bw")]
8880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8881#[cfg_attr(test, assert_instr(vpbroadcastw))]
8882#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8883pub const fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8884    unsafe {
8885        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8886        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8887    }
8888}
8889
8890/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8891///
8892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8893#[inline]
8894#[target_feature(enable = "avx512bw,avx512vl")]
8895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8896#[cfg_attr(test, assert_instr(vpbroadcastw))]
8897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8898pub const fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8899    unsafe {
8900        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8901        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8902    }
8903}
8904
8905/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8906///
8907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8908#[inline]
8909#[target_feature(enable = "avx512bw,avx512vl")]
8910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8911#[cfg_attr(test, assert_instr(vpbroadcastw))]
8912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8913pub const fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8914    unsafe {
8915        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8916        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8917    }
8918}
8919
8920/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8921///
8922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8923#[inline]
8924#[target_feature(enable = "avx512bw,avx512vl")]
8925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8926#[cfg_attr(test, assert_instr(vpbroadcastw))]
8927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8928pub const fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8929    unsafe {
8930        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8931        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8932    }
8933}
8934
8935/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8936///
8937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8938#[inline]
8939#[target_feature(enable = "avx512bw,avx512vl")]
8940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8941#[cfg_attr(test, assert_instr(vpbroadcastw))]
8942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8943pub const fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8944    unsafe {
8945        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8946        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8947    }
8948}
8949
8950/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8951///
8952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8953#[inline]
8954#[target_feature(enable = "avx512bw")]
8955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8956#[cfg_attr(test, assert_instr(vpbroadcastb))]
8957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8958pub const fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8959    unsafe {
8960        let a = _mm512_castsi128_si512(a).as_i8x64();
8961        let ret: i8x64 = simd_shuffle!(
8962            a,
8963            a,
8964            [
8965                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8966                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8967                0, 0, 0, 0, 0, 0, 0, 0,
8968            ],
8969        );
8970        transmute(ret)
8971    }
8972}
8973
8974/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8975///
8976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8977#[inline]
8978#[target_feature(enable = "avx512bw")]
8979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8980#[cfg_attr(test, assert_instr(vpbroadcastb))]
8981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8982pub const fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8983    unsafe {
8984        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8985        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8986    }
8987}
8988
8989/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8990///
8991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8992#[inline]
8993#[target_feature(enable = "avx512bw")]
8994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8995#[cfg_attr(test, assert_instr(vpbroadcastb))]
8996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8997pub const fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8998    unsafe {
8999        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
9000        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
9001    }
9002}
9003
9004/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9005///
9006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
9007#[inline]
9008#[target_feature(enable = "avx512bw,avx512vl")]
9009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9010#[cfg_attr(test, assert_instr(vpbroadcastb))]
9011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9012pub const fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
9013    unsafe {
9014        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
9015        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
9016    }
9017}
9018
9019/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9020///
9021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
9022#[inline]
9023#[target_feature(enable = "avx512bw,avx512vl")]
9024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9025#[cfg_attr(test, assert_instr(vpbroadcastb))]
9026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9027pub const fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
9028    unsafe {
9029        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
9030        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
9031    }
9032}
9033
9034/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9035///
9036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
9037#[inline]
9038#[target_feature(enable = "avx512bw,avx512vl")]
9039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9040#[cfg_attr(test, assert_instr(vpbroadcastb))]
9041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9042pub const fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
9043    unsafe {
9044        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
9045        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
9046    }
9047}
9048
9049/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
9052#[inline]
9053#[target_feature(enable = "avx512bw,avx512vl")]
9054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9055#[cfg_attr(test, assert_instr(vpbroadcastb))]
9056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9057pub const fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
9058    unsafe {
9059        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
9060        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
9061    }
9062}
9063
9064/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
9065///
9066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
9067#[inline]
9068#[target_feature(enable = "avx512bw")]
9069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9070#[cfg_attr(test, assert_instr(vpunpckhwd))]
9071#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9072pub const fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
9073    unsafe {
9074        let a = a.as_i16x32();
9075        let b = b.as_i16x32();
9076        #[rustfmt::skip]
9077        let r: i16x32 = simd_shuffle!(
9078            a,
9079            b,
9080            [
9081                4, 32 + 4, 5, 32 + 5,
9082                6, 32 + 6, 7, 32 + 7,
9083                12, 32 + 12, 13, 32 + 13,
9084                14, 32 + 14, 15, 32 + 15,
9085                20, 32 + 20, 21, 32 + 21,
9086                22, 32 + 22, 23, 32 + 23,
9087                28, 32 + 28, 29, 32 + 29,
9088                30, 32 + 30, 31, 32 + 31,
9089            ],
9090        );
9091        transmute(r)
9092    }
9093}
9094
9095/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9096///
9097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
9098#[inline]
9099#[target_feature(enable = "avx512bw")]
9100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9101#[cfg_attr(test, assert_instr(vpunpckhwd))]
9102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9103pub const fn _mm512_mask_unpackhi_epi16(
9104    src: __m512i,
9105    k: __mmask32,
9106    a: __m512i,
9107    b: __m512i,
9108) -> __m512i {
9109    unsafe {
9110        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
9111        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
9112    }
9113}
9114
9115/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9116///
9117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
9118#[inline]
9119#[target_feature(enable = "avx512bw")]
9120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9121#[cfg_attr(test, assert_instr(vpunpckhwd))]
9122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9123pub const fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9124    unsafe {
9125        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
9126        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
9127    }
9128}
9129
9130/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9131///
9132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
9133#[inline]
9134#[target_feature(enable = "avx512bw,avx512vl")]
9135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9136#[cfg_attr(test, assert_instr(vpunpckhwd))]
9137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9138pub const fn _mm256_mask_unpackhi_epi16(
9139    src: __m256i,
9140    k: __mmask16,
9141    a: __m256i,
9142    b: __m256i,
9143) -> __m256i {
9144    unsafe {
9145        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
9146        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
9147    }
9148}
9149
9150/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9151///
9152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
9153#[inline]
9154#[target_feature(enable = "avx512bw,avx512vl")]
9155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9156#[cfg_attr(test, assert_instr(vpunpckhwd))]
9157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9158pub const fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9159    unsafe {
9160        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
9161        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
9162    }
9163}
9164
9165/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9166///
9167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
9168#[inline]
9169#[target_feature(enable = "avx512bw,avx512vl")]
9170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9171#[cfg_attr(test, assert_instr(vpunpckhwd))]
9172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9173pub const fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9174    unsafe {
9175        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9176        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
9177    }
9178}
9179
9180/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9181///
9182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
9183#[inline]
9184#[target_feature(enable = "avx512bw,avx512vl")]
9185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9186#[cfg_attr(test, assert_instr(vpunpckhwd))]
9187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9188pub const fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9189    unsafe {
9190        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9191        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
9192    }
9193}
9194
9195/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
9196///
9197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
9198#[inline]
9199#[target_feature(enable = "avx512bw")]
9200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9201#[cfg_attr(test, assert_instr(vpunpckhbw))]
9202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9203pub const fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
9204    unsafe {
9205        let a = a.as_i8x64();
9206        let b = b.as_i8x64();
9207        #[rustfmt::skip]
9208        let r: i8x64 = simd_shuffle!(
9209            a,
9210            b,
9211            [
9212                8, 64 + 8, 9, 64 + 9,
9213                10, 64 + 10, 11, 64 + 11,
9214                12, 64 + 12, 13, 64 + 13,
9215                14, 64 + 14, 15, 64 + 15,
9216                24, 64 + 24, 25, 64 + 25,
9217                26, 64 + 26, 27, 64 + 27,
9218                28, 64 + 28, 29, 64 + 29,
9219                30, 64 + 30, 31, 64 + 31,
9220                40, 64 + 40, 41, 64 + 41,
9221                42, 64 + 42, 43, 64 + 43,
9222                44, 64 + 44, 45, 64 + 45,
9223                46, 64 + 46, 47, 64 + 47,
9224                56, 64 + 56, 57, 64 + 57,
9225                58, 64 + 58, 59, 64 + 59,
9226                60, 64 + 60, 61, 64 + 61,
9227                62, 64 + 62, 63, 64 + 63,
9228            ],
9229        );
9230        transmute(r)
9231    }
9232}
9233
9234/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9235///
9236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
9237#[inline]
9238#[target_feature(enable = "avx512bw")]
9239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9240#[cfg_attr(test, assert_instr(vpunpckhbw))]
9241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9242pub const fn _mm512_mask_unpackhi_epi8(
9243    src: __m512i,
9244    k: __mmask64,
9245    a: __m512i,
9246    b: __m512i,
9247) -> __m512i {
9248    unsafe {
9249        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9250        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
9251    }
9252}
9253
9254/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9255///
9256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
9257#[inline]
9258#[target_feature(enable = "avx512bw")]
9259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9260#[cfg_attr(test, assert_instr(vpunpckhbw))]
9261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9262pub const fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9263    unsafe {
9264        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9265        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
9266    }
9267}
9268
9269/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9270///
9271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
9272#[inline]
9273#[target_feature(enable = "avx512bw,avx512vl")]
9274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9275#[cfg_attr(test, assert_instr(vpunpckhbw))]
9276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9277pub const fn _mm256_mask_unpackhi_epi8(
9278    src: __m256i,
9279    k: __mmask32,
9280    a: __m256i,
9281    b: __m256i,
9282) -> __m256i {
9283    unsafe {
9284        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9285        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
9286    }
9287}
9288
9289/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9290///
9291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
9292#[inline]
9293#[target_feature(enable = "avx512bw,avx512vl")]
9294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9295#[cfg_attr(test, assert_instr(vpunpckhbw))]
9296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9297pub const fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9298    unsafe {
9299        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9300        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
9301    }
9302}
9303
9304/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9305///
9306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
9307#[inline]
9308#[target_feature(enable = "avx512bw,avx512vl")]
9309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9310#[cfg_attr(test, assert_instr(vpunpckhbw))]
9311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9312pub const fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9313    unsafe {
9314        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9315        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
9316    }
9317}
9318
9319/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
9322#[inline]
9323#[target_feature(enable = "avx512bw,avx512vl")]
9324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9325#[cfg_attr(test, assert_instr(vpunpckhbw))]
9326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9327pub const fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9328    unsafe {
9329        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9330        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
9331    }
9332}
9333
9334/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9335///
9336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
9337#[inline]
9338#[target_feature(enable = "avx512bw")]
9339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9340#[cfg_attr(test, assert_instr(vpunpcklwd))]
9341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9342pub const fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
9343    unsafe {
9344        let a = a.as_i16x32();
9345        let b = b.as_i16x32();
9346        #[rustfmt::skip]
9347        let r: i16x32 = simd_shuffle!(
9348            a,
9349            b,
9350            [
9351               0,  32+0,   1, 32+1,
9352               2,  32+2,   3, 32+3,
9353               8,  32+8,   9, 32+9,
9354               10, 32+10, 11, 32+11,
9355               16, 32+16, 17, 32+17,
9356               18, 32+18, 19, 32+19,
9357               24, 32+24, 25, 32+25,
9358               26, 32+26, 27, 32+27
9359            ],
9360        );
9361        transmute(r)
9362    }
9363}
9364
9365/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9366///
9367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
9368#[inline]
9369#[target_feature(enable = "avx512bw")]
9370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9371#[cfg_attr(test, assert_instr(vpunpcklwd))]
9372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9373pub const fn _mm512_mask_unpacklo_epi16(
9374    src: __m512i,
9375    k: __mmask32,
9376    a: __m512i,
9377    b: __m512i,
9378) -> __m512i {
9379    unsafe {
9380        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9381        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
9382    }
9383}
9384
9385/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9386///
9387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
9388#[inline]
9389#[target_feature(enable = "avx512bw")]
9390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9391#[cfg_attr(test, assert_instr(vpunpcklwd))]
9392#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9393pub const fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9394    unsafe {
9395        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9396        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
9397    }
9398}
9399
9400/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9401///
9402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
9403#[inline]
9404#[target_feature(enable = "avx512bw,avx512vl")]
9405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9406#[cfg_attr(test, assert_instr(vpunpcklwd))]
9407#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9408pub const fn _mm256_mask_unpacklo_epi16(
9409    src: __m256i,
9410    k: __mmask16,
9411    a: __m256i,
9412    b: __m256i,
9413) -> __m256i {
9414    unsafe {
9415        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9416        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
9417    }
9418}
9419
9420/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9421///
9422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
9423#[inline]
9424#[target_feature(enable = "avx512bw,avx512vl")]
9425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9426#[cfg_attr(test, assert_instr(vpunpcklwd))]
9427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9428pub const fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9429    unsafe {
9430        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9431        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
9432    }
9433}
9434
9435/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9436///
9437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
9438#[inline]
9439#[target_feature(enable = "avx512bw,avx512vl")]
9440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9441#[cfg_attr(test, assert_instr(vpunpcklwd))]
9442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9443pub const fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9444    unsafe {
9445        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9446        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
9447    }
9448}
9449
9450/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9451///
9452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
9453#[inline]
9454#[target_feature(enable = "avx512bw,avx512vl")]
9455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9456#[cfg_attr(test, assert_instr(vpunpcklwd))]
9457#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9458pub const fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9459    unsafe {
9460        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9461        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
9462    }
9463}
9464
9465/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9466///
9467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
9468#[inline]
9469#[target_feature(enable = "avx512bw")]
9470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9471#[cfg_attr(test, assert_instr(vpunpcklbw))]
9472#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9473pub const fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
9474    unsafe {
9475        let a = a.as_i8x64();
9476        let b = b.as_i8x64();
9477        #[rustfmt::skip]
9478        let r: i8x64 = simd_shuffle!(
9479            a,
9480            b,
9481            [
9482                0,  64+0,   1, 64+1,
9483                2,  64+2,   3, 64+3,
9484                4,  64+4,   5, 64+5,
9485                6,  64+6,   7, 64+7,
9486                16, 64+16, 17, 64+17,
9487                18, 64+18, 19, 64+19,
9488                20, 64+20, 21, 64+21,
9489                22, 64+22, 23, 64+23,
9490                32, 64+32, 33, 64+33,
9491                34, 64+34, 35, 64+35,
9492                36, 64+36, 37, 64+37,
9493                38, 64+38, 39, 64+39,
9494                48, 64+48, 49, 64+49,
9495                50, 64+50, 51, 64+51,
9496                52, 64+52, 53, 64+53,
9497                54, 64+54, 55, 64+55,
9498            ],
9499        );
9500        transmute(r)
9501    }
9502}
9503
9504/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9505///
9506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
9507#[inline]
9508#[target_feature(enable = "avx512bw")]
9509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9510#[cfg_attr(test, assert_instr(vpunpcklbw))]
9511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9512pub const fn _mm512_mask_unpacklo_epi8(
9513    src: __m512i,
9514    k: __mmask64,
9515    a: __m512i,
9516    b: __m512i,
9517) -> __m512i {
9518    unsafe {
9519        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9520        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
9521    }
9522}
9523
9524/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9525///
9526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
9527#[inline]
9528#[target_feature(enable = "avx512bw")]
9529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9530#[cfg_attr(test, assert_instr(vpunpcklbw))]
9531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9532pub const fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9533    unsafe {
9534        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9535        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
9536    }
9537}
9538
9539/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9540///
9541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
9542#[inline]
9543#[target_feature(enable = "avx512bw,avx512vl")]
9544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9545#[cfg_attr(test, assert_instr(vpunpcklbw))]
9546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9547pub const fn _mm256_mask_unpacklo_epi8(
9548    src: __m256i,
9549    k: __mmask32,
9550    a: __m256i,
9551    b: __m256i,
9552) -> __m256i {
9553    unsafe {
9554        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9555        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
9556    }
9557}
9558
9559/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9560///
9561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
9562#[inline]
9563#[target_feature(enable = "avx512bw,avx512vl")]
9564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9565#[cfg_attr(test, assert_instr(vpunpcklbw))]
9566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9567pub const fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9568    unsafe {
9569        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9570        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
9571    }
9572}
9573
9574/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9575///
9576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
9577#[inline]
9578#[target_feature(enable = "avx512bw,avx512vl")]
9579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9580#[cfg_attr(test, assert_instr(vpunpcklbw))]
9581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9582pub const fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9583    unsafe {
9584        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9585        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
9586    }
9587}
9588
9589/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9590///
9591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
9592#[inline]
9593#[target_feature(enable = "avx512bw,avx512vl")]
9594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9595#[cfg_attr(test, assert_instr(vpunpcklbw))]
9596#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9597pub const fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9598    unsafe {
9599        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9600        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
9601    }
9602}
9603
9604/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9605///
9606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
9607#[inline]
9608#[target_feature(enable = "avx512bw")]
9609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9610#[cfg_attr(test, assert_instr(vmovdqu16))]
9611#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9612pub const fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
9613    unsafe {
9614        let mov = a.as_i16x32();
9615        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
9616    }
9617}
9618
9619/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9620///
9621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
9622#[inline]
9623#[target_feature(enable = "avx512bw")]
9624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9625#[cfg_attr(test, assert_instr(vmovdqu16))]
9626#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9627pub const fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
9628    unsafe {
9629        let mov = a.as_i16x32();
9630        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
9631    }
9632}
9633
9634/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9635///
9636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
9637#[inline]
9638#[target_feature(enable = "avx512bw,avx512vl")]
9639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9640#[cfg_attr(test, assert_instr(vmovdqu16))]
9641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9642pub const fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
9643    unsafe {
9644        let mov = a.as_i16x16();
9645        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
9646    }
9647}
9648
9649/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9650///
9651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
9652#[inline]
9653#[target_feature(enable = "avx512bw,avx512vl")]
9654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9655#[cfg_attr(test, assert_instr(vmovdqu16))]
9656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9657pub const fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
9658    unsafe {
9659        let mov = a.as_i16x16();
9660        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
9661    }
9662}
9663
9664/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9665///
9666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
9667#[inline]
9668#[target_feature(enable = "avx512bw,avx512vl")]
9669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9670#[cfg_attr(test, assert_instr(vmovdqu16))]
9671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9672pub const fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9673    unsafe {
9674        let mov = a.as_i16x8();
9675        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
9676    }
9677}
9678
9679/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9680///
9681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
9682#[inline]
9683#[target_feature(enable = "avx512bw,avx512vl")]
9684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9685#[cfg_attr(test, assert_instr(vmovdqu16))]
9686#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9687pub const fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
9688    unsafe {
9689        let mov = a.as_i16x8();
9690        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
9691    }
9692}
9693
9694/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9695///
9696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
9697#[inline]
9698#[target_feature(enable = "avx512bw")]
9699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9700#[cfg_attr(test, assert_instr(vmovdqu8))]
9701#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9702pub const fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
9703    unsafe {
9704        let mov = a.as_i8x64();
9705        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
9706    }
9707}
9708
9709/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9710///
9711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
9712#[inline]
9713#[target_feature(enable = "avx512bw")]
9714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9715#[cfg_attr(test, assert_instr(vmovdqu8))]
9716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9717pub const fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
9718    unsafe {
9719        let mov = a.as_i8x64();
9720        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
9721    }
9722}
9723
9724/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9725///
9726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
9727#[inline]
9728#[target_feature(enable = "avx512bw,avx512vl")]
9729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9730#[cfg_attr(test, assert_instr(vmovdqu8))]
9731#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9732pub const fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
9733    unsafe {
9734        let mov = a.as_i8x32();
9735        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
9736    }
9737}
9738
9739/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9740///
9741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
9742#[inline]
9743#[target_feature(enable = "avx512bw,avx512vl")]
9744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9745#[cfg_attr(test, assert_instr(vmovdqu8))]
9746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9747pub const fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
9748    unsafe {
9749        let mov = a.as_i8x32();
9750        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
9751    }
9752}
9753
9754/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9755///
9756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
9757#[inline]
9758#[target_feature(enable = "avx512bw,avx512vl")]
9759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9760#[cfg_attr(test, assert_instr(vmovdqu8))]
9761#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9762pub const fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
9763    unsafe {
9764        let mov = a.as_i8x16();
9765        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
9766    }
9767}
9768
9769/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9770///
9771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
9772#[inline]
9773#[target_feature(enable = "avx512bw,avx512vl")]
9774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9775#[cfg_attr(test, assert_instr(vmovdqu8))]
9776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9777pub const fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
9778    unsafe {
9779        let mov = a.as_i8x16();
9780        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
9781    }
9782}
9783
9784/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9785///
9786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
9787#[inline]
9788#[target_feature(enable = "avx512bw")]
9789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9790#[cfg_attr(test, assert_instr(vpbroadcastw))]
9791#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9792pub const fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
9793    unsafe {
9794        let r = _mm512_set1_epi16(a).as_i16x32();
9795        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
9796    }
9797}
9798
9799/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
9802#[inline]
9803#[target_feature(enable = "avx512bw")]
9804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9805#[cfg_attr(test, assert_instr(vpbroadcastw))]
9806#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9807pub const fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
9808    unsafe {
9809        let r = _mm512_set1_epi16(a).as_i16x32();
9810        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
9811    }
9812}
9813
9814/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9815///
9816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
9817#[inline]
9818#[target_feature(enable = "avx512bw,avx512vl")]
9819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9820#[cfg_attr(test, assert_instr(vpbroadcastw))]
9821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9822pub const fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
9823    unsafe {
9824        let r = _mm256_set1_epi16(a).as_i16x16();
9825        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
9826    }
9827}
9828
9829/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9830///
9831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
9832#[inline]
9833#[target_feature(enable = "avx512bw,avx512vl")]
9834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9835#[cfg_attr(test, assert_instr(vpbroadcastw))]
9836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9837pub const fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
9838    unsafe {
9839        let r = _mm256_set1_epi16(a).as_i16x16();
9840        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
9841    }
9842}
9843
9844/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9845///
9846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
9847#[inline]
9848#[target_feature(enable = "avx512bw,avx512vl")]
9849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9850#[cfg_attr(test, assert_instr(vpbroadcastw))]
9851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9852pub const fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
9853    unsafe {
9854        let r = _mm_set1_epi16(a).as_i16x8();
9855        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
9856    }
9857}
9858
9859/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9860///
9861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
9862#[inline]
9863#[target_feature(enable = "avx512bw,avx512vl")]
9864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9865#[cfg_attr(test, assert_instr(vpbroadcastw))]
9866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9867pub const fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
9868    unsafe {
9869        let r = _mm_set1_epi16(a).as_i16x8();
9870        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
9871    }
9872}
9873
9874/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9875///
9876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
9877#[inline]
9878#[target_feature(enable = "avx512bw")]
9879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9880#[cfg_attr(test, assert_instr(vpbroadcast))]
9881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9882pub const fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
9883    unsafe {
9884        let r = _mm512_set1_epi8(a).as_i8x64();
9885        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
9886    }
9887}
9888
9889/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9890///
9891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
9892#[inline]
9893#[target_feature(enable = "avx512bw")]
9894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9895#[cfg_attr(test, assert_instr(vpbroadcast))]
9896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9897pub const fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
9898    unsafe {
9899        let r = _mm512_set1_epi8(a).as_i8x64();
9900        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
9901    }
9902}
9903
9904/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
9907#[inline]
9908#[target_feature(enable = "avx512bw,avx512vl")]
9909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9910#[cfg_attr(test, assert_instr(vpbroadcast))]
9911#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9912pub const fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
9913    unsafe {
9914        let r = _mm256_set1_epi8(a).as_i8x32();
9915        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
9916    }
9917}
9918
9919/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9920///
9921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
9922#[inline]
9923#[target_feature(enable = "avx512bw,avx512vl")]
9924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9925#[cfg_attr(test, assert_instr(vpbroadcast))]
9926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9927pub const fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
9928    unsafe {
9929        let r = _mm256_set1_epi8(a).as_i8x32();
9930        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
9931    }
9932}
9933
9934/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9935///
9936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9937#[inline]
9938#[target_feature(enable = "avx512bw,avx512vl")]
9939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9940#[cfg_attr(test, assert_instr(vpbroadcast))]
9941#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9942pub const fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9943    unsafe {
9944        let r = _mm_set1_epi8(a).as_i8x16();
9945        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9946    }
9947}
9948
9949/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9950///
9951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9952#[inline]
9953#[target_feature(enable = "avx512bw,avx512vl")]
9954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9955#[cfg_attr(test, assert_instr(vpbroadcast))]
9956#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9957pub const fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9958    unsafe {
9959        let r = _mm_set1_epi8(a).as_i8x16();
9960        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9961    }
9962}
9963
9964/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9965///
9966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9967#[inline]
9968#[target_feature(enable = "avx512bw")]
9969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9970#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9971#[rustc_legacy_const_generics(1)]
9972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9973pub const fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9974    unsafe {
9975        static_assert_uimm_bits!(IMM8, 8);
9976        let a = a.as_i16x32();
9977        let r: i16x32 = simd_shuffle!(
9978            a,
9979            a,
9980            [
9981                IMM8 as u32 & 0b11,
9982                (IMM8 as u32 >> 2) & 0b11,
9983                (IMM8 as u32 >> 4) & 0b11,
9984                (IMM8 as u32 >> 6) & 0b11,
9985                4,
9986                5,
9987                6,
9988                7,
9989                (IMM8 as u32 & 0b11) + 8,
9990                ((IMM8 as u32 >> 2) & 0b11) + 8,
9991                ((IMM8 as u32 >> 4) & 0b11) + 8,
9992                ((IMM8 as u32 >> 6) & 0b11) + 8,
9993                12,
9994                13,
9995                14,
9996                15,
9997                (IMM8 as u32 & 0b11) + 16,
9998                ((IMM8 as u32 >> 2) & 0b11) + 16,
9999                ((IMM8 as u32 >> 4) & 0b11) + 16,
10000                ((IMM8 as u32 >> 6) & 0b11) + 16,
10001                20,
10002                21,
10003                22,
10004                23,
10005                (IMM8 as u32 & 0b11) + 24,
10006                ((IMM8 as u32 >> 2) & 0b11) + 24,
10007                ((IMM8 as u32 >> 4) & 0b11) + 24,
10008                ((IMM8 as u32 >> 6) & 0b11) + 24,
10009                28,
10010                29,
10011                30,
10012                31,
10013            ],
10014        );
10015        transmute(r)
10016    }
10017}
10018
10019/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10020///
10021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
10022#[inline]
10023#[target_feature(enable = "avx512bw")]
10024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10025#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
10026#[rustc_legacy_const_generics(3)]
10027#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10028pub const fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
10029    src: __m512i,
10030    k: __mmask32,
10031    a: __m512i,
10032) -> __m512i {
10033    unsafe {
10034        static_assert_uimm_bits!(IMM8, 8);
10035        let r = _mm512_shufflelo_epi16::<IMM8>(a);
10036        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
10037    }
10038}
10039
10040/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
10043#[inline]
10044#[target_feature(enable = "avx512bw")]
10045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10046#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
10047#[rustc_legacy_const_generics(2)]
10048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10049pub const fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
10050    unsafe {
10051        static_assert_uimm_bits!(IMM8, 8);
10052        let r = _mm512_shufflelo_epi16::<IMM8>(a);
10053        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
10054    }
10055}
10056
10057/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10058///
10059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
10060#[inline]
10061#[target_feature(enable = "avx512bw,avx512vl")]
10062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10063#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
10064#[rustc_legacy_const_generics(3)]
10065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10066pub const fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
10067    src: __m256i,
10068    k: __mmask16,
10069    a: __m256i,
10070) -> __m256i {
10071    unsafe {
10072        static_assert_uimm_bits!(IMM8, 8);
10073        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
10074        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
10075    }
10076}
10077
10078/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10079///
10080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
10081#[inline]
10082#[target_feature(enable = "avx512bw,avx512vl")]
10083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10084#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
10085#[rustc_legacy_const_generics(2)]
10086#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10087pub const fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
10088    unsafe {
10089        static_assert_uimm_bits!(IMM8, 8);
10090        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
10091        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
10092    }
10093}
10094
10095/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10096///
10097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
10098#[inline]
10099#[target_feature(enable = "avx512bw,avx512vl")]
10100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10101#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
10102#[rustc_legacy_const_generics(3)]
10103#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10104pub const fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
10105    src: __m128i,
10106    k: __mmask8,
10107    a: __m128i,
10108) -> __m128i {
10109    unsafe {
10110        static_assert_uimm_bits!(IMM8, 8);
10111        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
10112        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
10113    }
10114}
10115
10116/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10117///
10118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
10119#[inline]
10120#[target_feature(enable = "avx512bw,avx512vl")]
10121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10122#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
10123#[rustc_legacy_const_generics(2)]
10124#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10125pub const fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
10126    unsafe {
10127        static_assert_uimm_bits!(IMM8, 8);
10128        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
10129        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
10130    }
10131}
10132
10133/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
10134///
10135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
10136#[inline]
10137#[target_feature(enable = "avx512bw")]
10138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10139#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10140#[rustc_legacy_const_generics(1)]
10141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10142pub const fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
10143    unsafe {
10144        static_assert_uimm_bits!(IMM8, 8);
10145        let a = a.as_i16x32();
10146        let r: i16x32 = simd_shuffle!(
10147            a,
10148            a,
10149            [
10150                0,
10151                1,
10152                2,
10153                3,
10154                (IMM8 as u32 & 0b11) + 4,
10155                ((IMM8 as u32 >> 2) & 0b11) + 4,
10156                ((IMM8 as u32 >> 4) & 0b11) + 4,
10157                ((IMM8 as u32 >> 6) & 0b11) + 4,
10158                8,
10159                9,
10160                10,
10161                11,
10162                (IMM8 as u32 & 0b11) + 12,
10163                ((IMM8 as u32 >> 2) & 0b11) + 12,
10164                ((IMM8 as u32 >> 4) & 0b11) + 12,
10165                ((IMM8 as u32 >> 6) & 0b11) + 12,
10166                16,
10167                17,
10168                18,
10169                19,
10170                (IMM8 as u32 & 0b11) + 20,
10171                ((IMM8 as u32 >> 2) & 0b11) + 20,
10172                ((IMM8 as u32 >> 4) & 0b11) + 20,
10173                ((IMM8 as u32 >> 6) & 0b11) + 20,
10174                24,
10175                25,
10176                26,
10177                27,
10178                (IMM8 as u32 & 0b11) + 28,
10179                ((IMM8 as u32 >> 2) & 0b11) + 28,
10180                ((IMM8 as u32 >> 4) & 0b11) + 28,
10181                ((IMM8 as u32 >> 6) & 0b11) + 28,
10182            ],
10183        );
10184        transmute(r)
10185    }
10186}
10187
10188/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10189///
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
10191#[inline]
10192#[target_feature(enable = "avx512bw")]
10193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10194#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10195#[rustc_legacy_const_generics(3)]
10196#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10197pub const fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
10198    src: __m512i,
10199    k: __mmask32,
10200    a: __m512i,
10201) -> __m512i {
10202    unsafe {
10203        static_assert_uimm_bits!(IMM8, 8);
10204        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10205        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
10206    }
10207}
10208
10209/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10210///
10211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
10212#[inline]
10213#[target_feature(enable = "avx512bw")]
10214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10215#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10216#[rustc_legacy_const_generics(2)]
10217#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10218pub const fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
10219    unsafe {
10220        static_assert_uimm_bits!(IMM8, 8);
10221        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10222        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
10223    }
10224}
10225
10226/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10227///
10228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
10229#[inline]
10230#[target_feature(enable = "avx512bw,avx512vl")]
10231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10232#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10233#[rustc_legacy_const_generics(3)]
10234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10235pub const fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
10236    src: __m256i,
10237    k: __mmask16,
10238    a: __m256i,
10239) -> __m256i {
10240    unsafe {
10241        static_assert_uimm_bits!(IMM8, 8);
10242        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10243        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
10244    }
10245}
10246
10247/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10248///
10249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
10250#[inline]
10251#[target_feature(enable = "avx512bw,avx512vl")]
10252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10253#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10254#[rustc_legacy_const_generics(2)]
10255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10256pub const fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
10257    unsafe {
10258        static_assert_uimm_bits!(IMM8, 8);
10259        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10260        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
10261    }
10262}
10263
10264/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10265///
10266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
10267#[inline]
10268#[target_feature(enable = "avx512bw,avx512vl")]
10269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10270#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10271#[rustc_legacy_const_generics(3)]
10272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10273pub const fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
10274    src: __m128i,
10275    k: __mmask8,
10276    a: __m128i,
10277) -> __m128i {
10278    unsafe {
10279        static_assert_uimm_bits!(IMM8, 8);
10280        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10281        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
10282    }
10283}
10284
10285/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10286///
10287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
10288#[inline]
10289#[target_feature(enable = "avx512bw,avx512vl")]
10290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10291#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10292#[rustc_legacy_const_generics(2)]
10293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10294pub const fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
10295    unsafe {
10296        static_assert_uimm_bits!(IMM8, 8);
10297        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10298        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
10299    }
10300}
10301
10302/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
10303///
10304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
10305#[inline]
10306#[target_feature(enable = "avx512bw")]
10307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10308#[cfg_attr(test, assert_instr(vpshufb))]
10309pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
10310    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
10311}
10312
10313/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10314///
10315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
10316#[inline]
10317#[target_feature(enable = "avx512bw")]
10318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10319#[cfg_attr(test, assert_instr(vpshufb))]
10320pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10321    unsafe {
10322        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10323        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
10324    }
10325}
10326
10327/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
10330#[inline]
10331#[target_feature(enable = "avx512bw")]
10332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10333#[cfg_attr(test, assert_instr(vpshufb))]
10334pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10335    unsafe {
10336        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10337        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
10338    }
10339}
10340
10341/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10342///
10343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
10344#[inline]
10345#[target_feature(enable = "avx512bw,avx512vl")]
10346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10347#[cfg_attr(test, assert_instr(vpshufb))]
10348pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10349    unsafe {
10350        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10351        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
10352    }
10353}
10354
10355/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10356///
10357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
10358#[inline]
10359#[target_feature(enable = "avx512bw,avx512vl")]
10360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10361#[cfg_attr(test, assert_instr(vpshufb))]
10362pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10363    unsafe {
10364        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10365        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
10366    }
10367}
10368
10369/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10370///
10371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
10372#[inline]
10373#[target_feature(enable = "avx512bw,avx512vl")]
10374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10375#[cfg_attr(test, assert_instr(vpshufb))]
10376pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10377    unsafe {
10378        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10379        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
10380    }
10381}
10382
10383/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10384///
10385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
10386#[inline]
10387#[target_feature(enable = "avx512bw,avx512vl")]
10388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10389#[cfg_attr(test, assert_instr(vpshufb))]
10390pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10391    unsafe {
10392        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10393        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
10394    }
10395}
10396
10397/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10398///
10399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
10400#[inline]
10401#[target_feature(enable = "avx512bw")]
10402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10403#[cfg_attr(test, assert_instr(vptestmw))]
10404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10405pub const fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10406    let and = _mm512_and_si512(a, b);
10407    let zero = _mm512_setzero_si512();
10408    _mm512_cmpneq_epi16_mask(and, zero)
10409}
10410
10411/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10412///
10413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
10414#[inline]
10415#[target_feature(enable = "avx512bw")]
10416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10417#[cfg_attr(test, assert_instr(vptestmw))]
10418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10419pub const fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10420    let and = _mm512_and_si512(a, b);
10421    let zero = _mm512_setzero_si512();
10422    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
10423}
10424
10425/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10426///
10427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
10428#[inline]
10429#[target_feature(enable = "avx512bw,avx512vl")]
10430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10431#[cfg_attr(test, assert_instr(vptestmw))]
10432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10433pub const fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10434    let and = _mm256_and_si256(a, b);
10435    let zero = _mm256_setzero_si256();
10436    _mm256_cmpneq_epi16_mask(and, zero)
10437}
10438
10439/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10440///
10441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
10442#[inline]
10443#[target_feature(enable = "avx512bw,avx512vl")]
10444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10445#[cfg_attr(test, assert_instr(vptestmw))]
10446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10447pub const fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10448    let and = _mm256_and_si256(a, b);
10449    let zero = _mm256_setzero_si256();
10450    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
10451}
10452
10453/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10454///
10455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
10456#[inline]
10457#[target_feature(enable = "avx512bw,avx512vl")]
10458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10459#[cfg_attr(test, assert_instr(vptestmw))]
10460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10461pub const fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10462    let and = _mm_and_si128(a, b);
10463    let zero = _mm_setzero_si128();
10464    _mm_cmpneq_epi16_mask(and, zero)
10465}
10466
10467/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10468///
10469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
10470#[inline]
10471#[target_feature(enable = "avx512bw,avx512vl")]
10472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10473#[cfg_attr(test, assert_instr(vptestmw))]
10474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10475pub const fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10476    let and = _mm_and_si128(a, b);
10477    let zero = _mm_setzero_si128();
10478    _mm_mask_cmpneq_epi16_mask(k, and, zero)
10479}
10480
10481/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10482///
10483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
10484#[inline]
10485#[target_feature(enable = "avx512bw")]
10486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10487#[cfg_attr(test, assert_instr(vptestmb))]
10488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10489pub const fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10490    let and = _mm512_and_si512(a, b);
10491    let zero = _mm512_setzero_si512();
10492    _mm512_cmpneq_epi8_mask(and, zero)
10493}
10494
10495/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10496///
10497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
10498#[inline]
10499#[target_feature(enable = "avx512bw")]
10500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10501#[cfg_attr(test, assert_instr(vptestmb))]
10502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10503pub const fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10504    let and = _mm512_and_si512(a, b);
10505    let zero = _mm512_setzero_si512();
10506    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
10507}
10508
10509/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10510///
10511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
10512#[inline]
10513#[target_feature(enable = "avx512bw,avx512vl")]
10514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10515#[cfg_attr(test, assert_instr(vptestmb))]
10516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10517pub const fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10518    let and = _mm256_and_si256(a, b);
10519    let zero = _mm256_setzero_si256();
10520    _mm256_cmpneq_epi8_mask(and, zero)
10521}
10522
10523/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10524///
10525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
10526#[inline]
10527#[target_feature(enable = "avx512bw,avx512vl")]
10528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10529#[cfg_attr(test, assert_instr(vptestmb))]
10530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10531pub const fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10532    let and = _mm256_and_si256(a, b);
10533    let zero = _mm256_setzero_si256();
10534    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
10535}
10536
10537/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10538///
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
10540#[inline]
10541#[target_feature(enable = "avx512bw,avx512vl")]
10542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10543#[cfg_attr(test, assert_instr(vptestmb))]
10544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10545pub const fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10546    let and = _mm_and_si128(a, b);
10547    let zero = _mm_setzero_si128();
10548    _mm_cmpneq_epi8_mask(and, zero)
10549}
10550
10551/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10552///
10553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
10554#[inline]
10555#[target_feature(enable = "avx512bw,avx512vl")]
10556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10557#[cfg_attr(test, assert_instr(vptestmb))]
10558#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10559pub const fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10560    let and = _mm_and_si128(a, b);
10561    let zero = _mm_setzero_si128();
10562    _mm_mask_cmpneq_epi8_mask(k, and, zero)
10563}
10564
10565/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10566///
10567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
10568#[inline]
10569#[target_feature(enable = "avx512bw")]
10570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10571#[cfg_attr(test, assert_instr(vptestnmw))]
10572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10573pub const fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10574    let and = _mm512_and_si512(a, b);
10575    let zero = _mm512_setzero_si512();
10576    _mm512_cmpeq_epi16_mask(and, zero)
10577}
10578
10579/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10580///
10581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
10582#[inline]
10583#[target_feature(enable = "avx512bw")]
10584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10585#[cfg_attr(test, assert_instr(vptestnmw))]
10586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10587pub const fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10588    let and = _mm512_and_si512(a, b);
10589    let zero = _mm512_setzero_si512();
10590    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
10591}
10592
10593/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10594///
10595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
10596#[inline]
10597#[target_feature(enable = "avx512bw,avx512vl")]
10598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10599#[cfg_attr(test, assert_instr(vptestnmw))]
10600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10601pub const fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10602    let and = _mm256_and_si256(a, b);
10603    let zero = _mm256_setzero_si256();
10604    _mm256_cmpeq_epi16_mask(and, zero)
10605}
10606
10607/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10608///
10609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
10610#[inline]
10611#[target_feature(enable = "avx512bw,avx512vl")]
10612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10613#[cfg_attr(test, assert_instr(vptestnmw))]
10614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10615pub const fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10616    let and = _mm256_and_si256(a, b);
10617    let zero = _mm256_setzero_si256();
10618    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
10619}
10620
10621/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10622///
10623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
10624#[inline]
10625#[target_feature(enable = "avx512bw,avx512vl")]
10626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10627#[cfg_attr(test, assert_instr(vptestnmw))]
10628#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10629pub const fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10630    let and = _mm_and_si128(a, b);
10631    let zero = _mm_setzero_si128();
10632    _mm_cmpeq_epi16_mask(and, zero)
10633}
10634
10635/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10636///
10637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
10638#[inline]
10639#[target_feature(enable = "avx512bw,avx512vl")]
10640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10641#[cfg_attr(test, assert_instr(vptestnmw))]
10642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10643pub const fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10644    let and = _mm_and_si128(a, b);
10645    let zero = _mm_setzero_si128();
10646    _mm_mask_cmpeq_epi16_mask(k, and, zero)
10647}
10648
10649/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10650///
10651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
10652#[inline]
10653#[target_feature(enable = "avx512bw")]
10654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10655#[cfg_attr(test, assert_instr(vptestnmb))]
10656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10657pub const fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10658    let and = _mm512_and_si512(a, b);
10659    let zero = _mm512_setzero_si512();
10660    _mm512_cmpeq_epi8_mask(and, zero)
10661}
10662
10663/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10664///
10665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
10666#[inline]
10667#[target_feature(enable = "avx512bw")]
10668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10669#[cfg_attr(test, assert_instr(vptestnmb))]
10670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10671pub const fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10672    let and = _mm512_and_si512(a, b);
10673    let zero = _mm512_setzero_si512();
10674    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
10675}
10676
10677/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10678///
10679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
10680#[inline]
10681#[target_feature(enable = "avx512bw,avx512vl")]
10682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10683#[cfg_attr(test, assert_instr(vptestnmb))]
10684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10685pub const fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10686    let and = _mm256_and_si256(a, b);
10687    let zero = _mm256_setzero_si256();
10688    _mm256_cmpeq_epi8_mask(and, zero)
10689}
10690
10691/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10692///
10693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
10694#[inline]
10695#[target_feature(enable = "avx512bw,avx512vl")]
10696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10697#[cfg_attr(test, assert_instr(vptestnmb))]
10698#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10699pub const fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10700    let and = _mm256_and_si256(a, b);
10701    let zero = _mm256_setzero_si256();
10702    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
10703}
10704
10705/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10706///
10707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
10708#[inline]
10709#[target_feature(enable = "avx512bw,avx512vl")]
10710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10711#[cfg_attr(test, assert_instr(vptestnmb))]
10712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10713pub const fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10714    let and = _mm_and_si128(a, b);
10715    let zero = _mm_setzero_si128();
10716    _mm_cmpeq_epi8_mask(and, zero)
10717}
10718
10719/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10720///
10721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
10722#[inline]
10723#[target_feature(enable = "avx512bw,avx512vl")]
10724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10725#[cfg_attr(test, assert_instr(vptestnmb))]
10726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10727pub const fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10728    let and = _mm_and_si128(a, b);
10729    let zero = _mm_setzero_si128();
10730    _mm_mask_cmpeq_epi8_mask(k, and, zero)
10731}
10732
10733/// Store 64-bit mask from a into memory.
10734///
10735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
10736#[inline]
10737#[target_feature(enable = "avx512bw")]
10738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10739#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10740#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10741pub const unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
10742    ptr::write(mem_addr as *mut __mmask64, a);
10743}
10744
10745/// Store 32-bit mask from a into memory.
10746///
10747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
10748#[inline]
10749#[target_feature(enable = "avx512bw")]
10750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10751#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10752#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10753pub const unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
10754    ptr::write(mem_addr as *mut __mmask32, a);
10755}
10756
10757/// Load 64-bit mask from memory into k.
10758///
10759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
10760#[inline]
10761#[target_feature(enable = "avx512bw")]
10762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10763#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10764#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10765pub const unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
10766    ptr::read(mem_addr as *const __mmask64)
10767}
10768
10769/// Load 32-bit mask from memory into k.
10770///
10771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
10772#[inline]
10773#[target_feature(enable = "avx512bw")]
10774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10775#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10777pub const unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
10778    ptr::read(mem_addr as *const __mmask32)
10779}
10780
10781/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
10782///
10783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
10784#[inline]
10785#[target_feature(enable = "avx512bw")]
10786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10787#[cfg_attr(test, assert_instr(vpsadbw))]
10788pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
10789    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
10790}
10791
10792/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10793///
10794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
10795#[inline]
10796#[target_feature(enable = "avx512bw")]
10797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10798#[rustc_legacy_const_generics(2)]
10799#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10800pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
10801    unsafe {
10802        static_assert_uimm_bits!(IMM8, 8);
10803        let a = a.as_u8x64();
10804        let b = b.as_u8x64();
10805        let r = vdbpsadbw(a, b, IMM8);
10806        transmute(r)
10807    }
10808}
10809
10810/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10811///
10812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
10813#[inline]
10814#[target_feature(enable = "avx512bw")]
10815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10816#[rustc_legacy_const_generics(4)]
10817#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10818pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
10819    src: __m512i,
10820    k: __mmask32,
10821    a: __m512i,
10822    b: __m512i,
10823) -> __m512i {
10824    unsafe {
10825        static_assert_uimm_bits!(IMM8, 8);
10826        let a = a.as_u8x64();
10827        let b = b.as_u8x64();
10828        let r = vdbpsadbw(a, b, IMM8);
10829        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
10830    }
10831}
10832
10833/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10834///
10835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
10836#[inline]
10837#[target_feature(enable = "avx512bw")]
10838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10839#[rustc_legacy_const_generics(3)]
10840#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10841pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
10842    unsafe {
10843        static_assert_uimm_bits!(IMM8, 8);
10844        let a = a.as_u8x64();
10845        let b = b.as_u8x64();
10846        let r = vdbpsadbw(a, b, IMM8);
10847        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
10848    }
10849}
10850
10851/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10852///
10853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
10854#[inline]
10855#[target_feature(enable = "avx512bw,avx512vl")]
10856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10857#[rustc_legacy_const_generics(2)]
10858#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10859pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
10860    unsafe {
10861        static_assert_uimm_bits!(IMM8, 8);
10862        let a = a.as_u8x32();
10863        let b = b.as_u8x32();
10864        let r = vdbpsadbw256(a, b, IMM8);
10865        transmute(r)
10866    }
10867}
10868
10869/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10870///
10871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
10872#[inline]
10873#[target_feature(enable = "avx512bw,avx512vl")]
10874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10875#[rustc_legacy_const_generics(4)]
10876#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10877pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
10878    src: __m256i,
10879    k: __mmask16,
10880    a: __m256i,
10881    b: __m256i,
10882) -> __m256i {
10883    unsafe {
10884        static_assert_uimm_bits!(IMM8, 8);
10885        let a = a.as_u8x32();
10886        let b = b.as_u8x32();
10887        let r = vdbpsadbw256(a, b, IMM8);
10888        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
10889    }
10890}
10891
10892/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10893///
10894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
10895#[inline]
10896#[target_feature(enable = "avx512bw,avx512vl")]
10897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10898#[rustc_legacy_const_generics(3)]
10899#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10900pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
10901    unsafe {
10902        static_assert_uimm_bits!(IMM8, 8);
10903        let a = a.as_u8x32();
10904        let b = b.as_u8x32();
10905        let r = vdbpsadbw256(a, b, IMM8);
10906        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
10907    }
10908}
10909
10910/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10911///
10912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
10913#[inline]
10914#[target_feature(enable = "avx512bw,avx512vl")]
10915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10916#[rustc_legacy_const_generics(2)]
10917#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10918pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
10919    unsafe {
10920        static_assert_uimm_bits!(IMM8, 8);
10921        let a = a.as_u8x16();
10922        let b = b.as_u8x16();
10923        let r = vdbpsadbw128(a, b, IMM8);
10924        transmute(r)
10925    }
10926}
10927
10928/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10929///
10930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
10931#[inline]
10932#[target_feature(enable = "avx512bw,avx512vl")]
10933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10934#[rustc_legacy_const_generics(4)]
10935#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10936pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
10937    src: __m128i,
10938    k: __mmask8,
10939    a: __m128i,
10940    b: __m128i,
10941) -> __m128i {
10942    unsafe {
10943        static_assert_uimm_bits!(IMM8, 8);
10944        let a = a.as_u8x16();
10945        let b = b.as_u8x16();
10946        let r = vdbpsadbw128(a, b, IMM8);
10947        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
10948    }
10949}
10950
10951/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10952///
10953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
10954#[inline]
10955#[target_feature(enable = "avx512bw,avx512vl")]
10956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10957#[rustc_legacy_const_generics(3)]
10958#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10959pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
10960    unsafe {
10961        static_assert_uimm_bits!(IMM8, 8);
10962        let a = a.as_u8x16();
10963        let b = b.as_u8x16();
10964        let r = vdbpsadbw128(a, b, IMM8);
10965        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
10966    }
10967}
10968
10969/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10970///
10971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
10972#[inline]
10973#[target_feature(enable = "avx512bw")]
10974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10975#[cfg_attr(test, assert_instr(vpmovw2m))]
10976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10977pub const fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
10978    let filter = _mm512_set1_epi16(1 << 15);
10979    let a = _mm512_and_si512(a, filter);
10980    _mm512_cmpeq_epi16_mask(a, filter)
10981}
10982
10983/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10984///
10985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
10986#[inline]
10987#[target_feature(enable = "avx512bw,avx512vl")]
10988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10989#[cfg_attr(test, assert_instr(vpmovw2m))]
10990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10991pub const fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10992    let filter = _mm256_set1_epi16(1 << 15);
10993    let a = _mm256_and_si256(a, filter);
10994    _mm256_cmpeq_epi16_mask(a, filter)
10995}
10996
10997/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10998///
10999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
11000#[inline]
11001#[target_feature(enable = "avx512bw,avx512vl")]
11002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11003#[cfg_attr(test, assert_instr(vpmovw2m))]
11004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11005pub const fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
11006    let filter = _mm_set1_epi16(1 << 15);
11007    let a = _mm_and_si128(a, filter);
11008    _mm_cmpeq_epi16_mask(a, filter)
11009}
11010
11011/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
11012///
11013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
11014#[inline]
11015#[target_feature(enable = "avx512bw")]
11016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11017#[cfg_attr(test, assert_instr(vpmovb2m))]
11018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11019pub const fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
11020    let filter = _mm512_set1_epi8(1 << 7);
11021    let a = _mm512_and_si512(a, filter);
11022    _mm512_cmpeq_epi8_mask(a, filter)
11023}
11024
11025/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
11028#[inline]
11029#[target_feature(enable = "avx512bw,avx512vl")]
11030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11031#[cfg_attr(test, assert_instr(vpmovmskb))]
11032// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
11033// using vpmovb2m plus converting the mask register to a standard register.
11034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11035pub const fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
11036    let filter = _mm256_set1_epi8(1 << 7);
11037    let a = _mm256_and_si256(a, filter);
11038    _mm256_cmpeq_epi8_mask(a, filter)
11039}
11040
11041/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
11042///
11043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
11044#[inline]
11045#[target_feature(enable = "avx512bw,avx512vl")]
11046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11047#[cfg_attr(test, assert_instr(vpmovmskb))]
11048// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
11049// using vpmovb2m plus converting the mask register to a standard register.
11050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11051pub const fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
11052    let filter = _mm_set1_epi8(1 << 7);
11053    let a = _mm_and_si128(a, filter);
11054    _mm_cmpeq_epi8_mask(a, filter)
11055}
11056
11057/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11058///
11059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
11060#[inline]
11061#[target_feature(enable = "avx512bw")]
11062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11063#[cfg_attr(test, assert_instr(vpmovm2w))]
11064#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11065pub const fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
11066    unsafe {
11067        let one = _mm512_set1_epi16(
11068            1 << 15
11069                | 1 << 14
11070                | 1 << 13
11071                | 1 << 12
11072                | 1 << 11
11073                | 1 << 10
11074                | 1 << 9
11075                | 1 << 8
11076                | 1 << 7
11077                | 1 << 6
11078                | 1 << 5
11079                | 1 << 4
11080                | 1 << 3
11081                | 1 << 2
11082                | 1 << 1
11083                | 1 << 0,
11084        )
11085        .as_i16x32();
11086        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
11087    }
11088}
11089
11090/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
11093#[inline]
11094#[target_feature(enable = "avx512bw,avx512vl")]
11095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11096#[cfg_attr(test, assert_instr(vpmovm2w))]
11097#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11098pub const fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
11099    unsafe {
11100        let one = _mm256_set1_epi16(
11101            1 << 15
11102                | 1 << 14
11103                | 1 << 13
11104                | 1 << 12
11105                | 1 << 11
11106                | 1 << 10
11107                | 1 << 9
11108                | 1 << 8
11109                | 1 << 7
11110                | 1 << 6
11111                | 1 << 5
11112                | 1 << 4
11113                | 1 << 3
11114                | 1 << 2
11115                | 1 << 1
11116                | 1 << 0,
11117        )
11118        .as_i16x16();
11119        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
11120    }
11121}
11122
11123/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11124///
11125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
11126#[inline]
11127#[target_feature(enable = "avx512bw,avx512vl")]
11128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11129#[cfg_attr(test, assert_instr(vpmovm2w))]
11130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11131pub const fn _mm_movm_epi16(k: __mmask8) -> __m128i {
11132    unsafe {
11133        let one = _mm_set1_epi16(
11134            1 << 15
11135                | 1 << 14
11136                | 1 << 13
11137                | 1 << 12
11138                | 1 << 11
11139                | 1 << 10
11140                | 1 << 9
11141                | 1 << 8
11142                | 1 << 7
11143                | 1 << 6
11144                | 1 << 5
11145                | 1 << 4
11146                | 1 << 3
11147                | 1 << 2
11148                | 1 << 1
11149                | 1 << 0,
11150        )
11151        .as_i16x8();
11152        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
11153    }
11154}
11155
11156/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11157///
11158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
11159#[inline]
11160#[target_feature(enable = "avx512bw")]
11161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11162#[cfg_attr(test, assert_instr(vpmovm2b))]
11163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11164pub const fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
11165    unsafe {
11166        let one =
11167            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11168                .as_i8x64();
11169        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
11170    }
11171}
11172
11173/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11174///
11175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
11176#[inline]
11177#[target_feature(enable = "avx512bw,avx512vl")]
11178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11179#[cfg_attr(test, assert_instr(vpmovm2b))]
11180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11181pub const fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
11182    unsafe {
11183        let one =
11184            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11185                .as_i8x32();
11186        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
11187    }
11188}
11189
11190/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11191///
11192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
11193#[inline]
11194#[target_feature(enable = "avx512bw,avx512vl")]
11195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11196#[cfg_attr(test, assert_instr(vpmovm2b))]
11197#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11198pub const fn _mm_movm_epi8(k: __mmask16) -> __m128i {
11199    unsafe {
11200        let one =
11201            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11202                .as_i8x16();
11203        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
11204    }
11205}
11206
11207/// Convert 32-bit mask a into an integer value, and store the result in dst.
11208///
11209/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
11210#[inline]
11211#[target_feature(enable = "avx512bw")]
11212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11214pub const fn _cvtmask32_u32(a: __mmask32) -> u32 {
11215    a
11216}
11217
11218/// Convert integer value a into an 32-bit mask, and store the result in k.
11219///
11220/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
11221#[inline]
11222#[target_feature(enable = "avx512bw")]
11223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11225pub const fn _cvtu32_mask32(a: u32) -> __mmask32 {
11226    a
11227}
11228
11229/// Add 32-bit masks in a and b, and store the result in k.
11230///
11231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
11232#[inline]
11233#[target_feature(enable = "avx512bw")]
11234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11236pub const fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11237    a.wrapping_add(b)
11238}
11239
11240/// Add 64-bit masks in a and b, and store the result in k.
11241///
11242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
11243#[inline]
11244#[target_feature(enable = "avx512bw")]
11245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11247pub const fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11248    a.wrapping_add(b)
11249}
11250
11251/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
11252///
11253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
11254#[inline]
11255#[target_feature(enable = "avx512bw")]
11256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11258pub const fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11259    a & b
11260}
11261
11262/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
11263///
11264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
11265#[inline]
11266#[target_feature(enable = "avx512bw")]
11267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11269pub const fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11270    a & b
11271}
11272
11273/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
11274///
11275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
11276#[inline]
11277#[target_feature(enable = "avx512bw")]
11278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11279#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11280pub const fn _knot_mask32(a: __mmask32) -> __mmask32 {
11281    !a
11282}
11283
11284/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
11285///
11286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
11287#[inline]
11288#[target_feature(enable = "avx512bw")]
11289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11291pub const fn _knot_mask64(a: __mmask64) -> __mmask64 {
11292    !a
11293}
11294
11295/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
11296///
11297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
11298#[inline]
11299#[target_feature(enable = "avx512bw")]
11300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11302pub const fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11303    _knot_mask32(a) & b
11304}
11305
11306/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
11307///
11308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
11309#[inline]
11310#[target_feature(enable = "avx512bw")]
11311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11313pub const fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11314    _knot_mask64(a) & b
11315}
11316
11317/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
11318///
11319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
11320#[inline]
11321#[target_feature(enable = "avx512bw")]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11324pub const fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11325    a | b
11326}
11327
11328/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
11329///
11330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
11331#[inline]
11332#[target_feature(enable = "avx512bw")]
11333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11335pub const fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11336    a | b
11337}
11338
11339/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
11340///
11341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
11342#[inline]
11343#[target_feature(enable = "avx512bw")]
11344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11345#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11346pub const fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11347    a ^ b
11348}
11349
11350/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
11351///
11352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
11353#[inline]
11354#[target_feature(enable = "avx512bw")]
11355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11357pub const fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11358    a ^ b
11359}
11360
11361/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
11362///
11363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
11364#[inline]
11365#[target_feature(enable = "avx512bw")]
11366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11367#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11368pub const fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11369    _knot_mask32(a ^ b)
11370}
11371
11372/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
11373///
11374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
11375#[inline]
11376#[target_feature(enable = "avx512bw")]
11377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11379pub const fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11380    _knot_mask64(a ^ b)
11381}
11382
11383/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11384/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11385///
11386/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
11387#[inline]
11388#[target_feature(enable = "avx512bw")]
11389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11391pub const unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
11392    let tmp = _kor_mask32(a, b);
11393    *all_ones = (tmp == 0xffffffff) as u8;
11394    (tmp == 0) as u8
11395}
11396
11397/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11398/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11399///
11400/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
11401#[inline]
11402#[target_feature(enable = "avx512bw")]
11403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11405pub const unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
11406    let tmp = _kor_mask64(a, b);
11407    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
11408    (tmp == 0) as u8
11409}
11410
11411/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11412/// store 0 in dst.
11413///
11414/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
11415#[inline]
11416#[target_feature(enable = "avx512bw")]
11417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11419pub const fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11420    (_kor_mask32(a, b) == 0xffffffff) as u8
11421}
11422
11423/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11424/// store 0 in dst.
11425///
11426/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
11427#[inline]
11428#[target_feature(enable = "avx512bw")]
11429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11431pub const fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11432    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
11433}
11434
11435/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11436/// store 0 in dst.
11437///
11438/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
11439#[inline]
11440#[target_feature(enable = "avx512bw")]
11441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11443pub const fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11444    (_kor_mask32(a, b) == 0) as u8
11445}
11446
11447/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11448/// store 0 in dst.
11449///
11450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
11451#[inline]
11452#[target_feature(enable = "avx512bw")]
11453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11455pub const fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11456    (_kor_mask64(a, b) == 0) as u8
11457}
11458
11459/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11460///
11461/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
11462#[inline]
11463#[target_feature(enable = "avx512bw")]
11464#[rustc_legacy_const_generics(1)]
11465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11467pub const fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11468    a.unbounded_shl(COUNT)
11469}
11470
11471/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11472///
11473/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
11474#[inline]
11475#[target_feature(enable = "avx512bw")]
11476#[rustc_legacy_const_generics(1)]
11477#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11479pub const fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11480    a.unbounded_shl(COUNT)
11481}
11482
11483/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11484///
11485/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
11486#[inline]
11487#[target_feature(enable = "avx512bw")]
11488#[rustc_legacy_const_generics(1)]
11489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11491pub const fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11492    a.unbounded_shr(COUNT)
11493}
11494
11495/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11496///
11497/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
11498#[inline]
11499#[target_feature(enable = "avx512bw")]
11500#[rustc_legacy_const_generics(1)]
11501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11503pub const fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11504    a.unbounded_shr(COUNT)
11505}
11506
11507/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
11508/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11509/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11510///
11511/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
11512#[inline]
11513#[target_feature(enable = "avx512bw")]
11514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11515#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11516pub const unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
11517    *and_not = (_kandn_mask32(a, b) == 0) as u8;
11518    (_kand_mask32(a, b) == 0) as u8
11519}
11520
11521/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
11522/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11523/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11524///
11525/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
11526#[inline]
11527#[target_feature(enable = "avx512bw")]
11528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11529#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11530pub const unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
11531    *and_not = (_kandn_mask64(a, b) == 0) as u8;
11532    (_kand_mask64(a, b) == 0) as u8
11533}
11534
11535/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
11536/// zeros, store 1 in dst, otherwise store 0 in dst.
11537///
11538/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
11539#[inline]
11540#[target_feature(enable = "avx512bw")]
11541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11542#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11543pub const fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11544    (_kandn_mask32(a, b) == 0) as u8
11545}
11546
11547/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
11548/// zeros, store 1 in dst, otherwise store 0 in dst.
11549///
11550/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
11551#[inline]
11552#[target_feature(enable = "avx512bw")]
11553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11555pub const fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11556    (_kandn_mask64(a, b) == 0) as u8
11557}
11558
11559/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11560/// store 0 in dst.
11561///
11562/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
11563#[inline]
11564#[target_feature(enable = "avx512bw")]
11565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11567pub const fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11568    (_kand_mask32(a, b) == 0) as u8
11569}
11570
11571/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11572/// store 0 in dst.
11573///
11574/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
11575#[inline]
11576#[target_feature(enable = "avx512bw")]
11577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11579pub const fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11580    (_kand_mask64(a, b) == 0) as u8
11581}
11582
11583/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
11584///
11585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
11586#[inline]
11587#[target_feature(enable = "avx512bw")]
11588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11589#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
11590#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11591pub const fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
11592    ((a & 0xffff) << 16) | (b & 0xffff)
11593}
11594
11595/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
11596///
11597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
11598#[inline]
11599#[target_feature(enable = "avx512bw")]
11600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11601#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
11602#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11603pub const fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
11604    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
11605}
11606
11607/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11608///
11609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
11610#[inline]
11611#[target_feature(enable = "avx512bw")]
11612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11613#[cfg_attr(test, assert_instr(vpmovwb))]
11614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11615pub const fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
11616    unsafe {
11617        let a = a.as_i16x32();
11618        transmute::<i8x32, _>(simd_cast(a))
11619    }
11620}
11621
11622/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11623///
11624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
11625#[inline]
11626#[target_feature(enable = "avx512bw")]
11627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11628#[cfg_attr(test, assert_instr(vpmovwb))]
11629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11630pub const fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11631    unsafe {
11632        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11633        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
11634    }
11635}
11636
11637/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11638///
11639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
11640#[inline]
11641#[target_feature(enable = "avx512bw")]
11642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11643#[cfg_attr(test, assert_instr(vpmovwb))]
11644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11645pub const fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11646    unsafe {
11647        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11648        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
11649    }
11650}
11651
11652/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11653///
11654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
11655#[inline]
11656#[target_feature(enable = "avx512bw,avx512vl")]
11657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11658#[cfg_attr(test, assert_instr(vpmovwb))]
11659#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11660pub const fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
11661    unsafe {
11662        let a = a.as_i16x16();
11663        transmute::<i8x16, _>(simd_cast(a))
11664    }
11665}
11666
11667/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
11670#[inline]
11671#[target_feature(enable = "avx512bw,avx512vl")]
11672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11673#[cfg_attr(test, assert_instr(vpmovwb))]
11674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11675pub const fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11676    unsafe {
11677        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11678        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
11679    }
11680}
11681
11682/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11683///
11684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
11685#[inline]
11686#[target_feature(enable = "avx512bw,avx512vl")]
11687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11688#[cfg_attr(test, assert_instr(vpmovwb))]
11689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11690pub const fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11691    unsafe {
11692        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11693        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
11694    }
11695}
11696
11697/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11698///
11699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
11700#[inline]
11701#[target_feature(enable = "avx512bw,avx512vl")]
11702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11703#[cfg_attr(test, assert_instr(vpmovwb))]
11704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11705pub const fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
11706    unsafe {
11707        let a = a.as_i16x8();
11708        let v256: i16x16 = simd_shuffle!(
11709            a,
11710            i16x8::ZERO,
11711            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
11712        );
11713        transmute::<i8x16, _>(simd_cast(v256))
11714    }
11715}
11716
11717/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11718///
11719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
11720#[inline]
11721#[target_feature(enable = "avx512bw,avx512vl")]
11722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11723#[cfg_attr(test, assert_instr(vpmovwb))]
11724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11725pub const fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11726    unsafe {
11727        let a = _mm_cvtepi16_epi8(a).as_i8x16();
11728        let src = simd_shuffle!(
11729            src.as_i8x16(),
11730            i8x16::ZERO,
11731            [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16]
11732        );
11733        simd_select_bitmask(k as u16, a, src).as_m128i()
11734    }
11735}
11736
11737/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
11740#[inline]
11741#[target_feature(enable = "avx512bw,avx512vl")]
11742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11743#[cfg_attr(test, assert_instr(vpmovwb))]
11744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11745pub const fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11746    _mm_mask_cvtepi16_epi8(_mm_setzero_si128(), k, a)
11747}
11748
11749/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11750///
11751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
11752#[inline]
11753#[target_feature(enable = "avx512bw")]
11754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11755#[cfg_attr(test, assert_instr(vpmovswb))]
11756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11757pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
11758    unsafe {
11759        simd_cast::<_, i8x32>(simd_imax(
11760            simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
11761            i16x32::splat(i8::MIN as _),
11762        ))
11763        .as_m256i()
11764    }
11765}
11766
11767/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11768///
11769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
11770#[inline]
11771#[target_feature(enable = "avx512bw")]
11772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11773#[cfg_attr(test, assert_instr(vpmovswb))]
11774#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11775pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11776    unsafe {
11777        simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
11778    }
11779}
11780
11781/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11782///
11783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
11784#[inline]
11785#[target_feature(enable = "avx512bw")]
11786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11787#[cfg_attr(test, assert_instr(vpmovswb))]
11788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11789pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11790    unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
11791}
11792
11793/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11794///
11795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
11796#[inline]
11797#[target_feature(enable = "avx512bw,avx512vl")]
11798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11799#[cfg_attr(test, assert_instr(vpmovswb))]
11800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11801pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
11802    unsafe {
11803        simd_cast::<_, i8x16>(simd_imax(
11804            simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
11805            i16x16::splat(i8::MIN as _),
11806        ))
11807        .as_m128i()
11808    }
11809}
11810
11811/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11812///
11813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
11814#[inline]
11815#[target_feature(enable = "avx512bw,avx512vl")]
11816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11817#[cfg_attr(test, assert_instr(vpmovswb))]
11818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11819pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11820    unsafe {
11821        simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
11822    }
11823}
11824
11825/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11826///
11827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
11828#[inline]
11829#[target_feature(enable = "avx512bw,avx512vl")]
11830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11831#[cfg_attr(test, assert_instr(vpmovswb))]
11832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11833pub const fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11834    unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
11835}
11836
11837/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11838///
11839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
11840#[inline]
11841#[target_feature(enable = "avx512bw,avx512vl")]
11842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11843#[cfg_attr(test, assert_instr(vpmovswb))]
11844pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
11845    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
11846}
11847
11848/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11849///
11850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
11851#[inline]
11852#[target_feature(enable = "avx512bw,avx512vl")]
11853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11854#[cfg_attr(test, assert_instr(vpmovswb))]
11855pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11856    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
11857}
11858
11859/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11860///
11861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
11862#[inline]
11863#[target_feature(enable = "avx512bw,avx512vl")]
11864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11865#[cfg_attr(test, assert_instr(vpmovswb))]
11866pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11867    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
11868}
11869
11870/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11871///
11872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
11873#[inline]
11874#[target_feature(enable = "avx512bw")]
11875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11876#[cfg_attr(test, assert_instr(vpmovuswb))]
11877#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11878pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
11879    unsafe {
11880        simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
11881    }
11882}
11883
11884/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11885///
11886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
11887#[inline]
11888#[target_feature(enable = "avx512bw")]
11889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11890#[cfg_attr(test, assert_instr(vpmovuswb))]
11891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11892pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11893    unsafe {
11894        simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
11895    }
11896}
11897
11898/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11899///
11900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
11901#[inline]
11902#[target_feature(enable = "avx512bw")]
11903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11904#[cfg_attr(test, assert_instr(vpmovuswb))]
11905#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11906pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11907    unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
11908}
11909
11910/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11911///
11912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
11913#[inline]
11914#[target_feature(enable = "avx512bw,avx512vl")]
11915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11916#[cfg_attr(test, assert_instr(vpmovuswb))]
11917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11918pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
11919    unsafe {
11920        simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
11921    }
11922}
11923
11924/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11925///
11926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
11927#[inline]
11928#[target_feature(enable = "avx512bw,avx512vl")]
11929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11930#[cfg_attr(test, assert_instr(vpmovuswb))]
11931#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11932pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11933    unsafe {
11934        simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
11935    }
11936}
11937
11938/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11939///
11940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
11941#[inline]
11942#[target_feature(enable = "avx512bw,avx512vl")]
11943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11944#[cfg_attr(test, assert_instr(vpmovuswb))]
11945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11946pub const fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11947    unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
11948}
11949
11950/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11951///
11952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
11953#[inline]
11954#[target_feature(enable = "avx512bw,avx512vl")]
11955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11956#[cfg_attr(test, assert_instr(vpmovuswb))]
11957pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
11958    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
11959}
11960
11961/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11962///
11963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
11964#[inline]
11965#[target_feature(enable = "avx512bw,avx512vl")]
11966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11967#[cfg_attr(test, assert_instr(vpmovuswb))]
11968pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11969    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
11970}
11971
11972/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11973///
11974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
11975#[inline]
11976#[target_feature(enable = "avx512bw,avx512vl")]
11977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11978#[cfg_attr(test, assert_instr(vpmovuswb))]
11979pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11980    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
11981}
11982
11983/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11984///
11985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
11986#[inline]
11987#[target_feature(enable = "avx512bw")]
11988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11989#[cfg_attr(test, assert_instr(vpmovsxbw))]
11990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11991pub const fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
11992    unsafe {
11993        let a = a.as_i8x32();
11994        transmute::<i16x32, _>(simd_cast(a))
11995    }
11996}
11997
11998/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11999///
12000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
12001#[inline]
12002#[target_feature(enable = "avx512bw")]
12003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12004#[cfg_attr(test, assert_instr(vpmovsxbw))]
12005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12006pub const fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
12007    unsafe {
12008        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
12009        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
12010    }
12011}
12012
12013/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12014///
12015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
12016#[inline]
12017#[target_feature(enable = "avx512bw")]
12018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12019#[cfg_attr(test, assert_instr(vpmovsxbw))]
12020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12021pub const fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
12022    unsafe {
12023        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
12024        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
12025    }
12026}
12027
12028/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12029///
12030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
12031#[inline]
12032#[target_feature(enable = "avx512bw,avx512vl")]
12033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12034#[cfg_attr(test, assert_instr(vpmovsxbw))]
12035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12036pub const fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
12037    unsafe {
12038        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
12039        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12040    }
12041}
12042
12043/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12044///
12045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
12046#[inline]
12047#[target_feature(enable = "avx512bw,avx512vl")]
12048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12049#[cfg_attr(test, assert_instr(vpmovsxbw))]
12050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12051pub const fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
12052    unsafe {
12053        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
12054        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12055    }
12056}
12057
12058/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12059///
12060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
12061#[inline]
12062#[target_feature(enable = "avx512bw,avx512vl")]
12063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12064#[cfg_attr(test, assert_instr(vpmovsxbw))]
12065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12066pub const fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12067    unsafe {
12068        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
12069        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12070    }
12071}
12072
12073/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12074///
12075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
12076#[inline]
12077#[target_feature(enable = "avx512bw,avx512vl")]
12078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12079#[cfg_attr(test, assert_instr(vpmovsxbw))]
12080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12081pub const fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
12084        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12085    }
12086}
12087
12088/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
12091#[inline]
12092#[target_feature(enable = "avx512bw")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovzxbw))]
12095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12096pub const fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
12097    unsafe {
12098        let a = a.as_u8x32();
12099        transmute::<i16x32, _>(simd_cast(a))
12100    }
12101}
12102
12103/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12104///
12105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
12106#[inline]
12107#[target_feature(enable = "avx512bw")]
12108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12109#[cfg_attr(test, assert_instr(vpmovzxbw))]
12110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12111pub const fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
12112    unsafe {
12113        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
12114        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
12115    }
12116}
12117
12118/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12119///
12120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
12121#[inline]
12122#[target_feature(enable = "avx512bw")]
12123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12124#[cfg_attr(test, assert_instr(vpmovzxbw))]
12125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12126pub const fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
12127    unsafe {
12128        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
12129        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
12130    }
12131}
12132
12133/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12134///
12135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
12136#[inline]
12137#[target_feature(enable = "avx512bw,avx512vl")]
12138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12139#[cfg_attr(test, assert_instr(vpmovzxbw))]
12140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12141pub const fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
12142    unsafe {
12143        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
12144        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12145    }
12146}
12147
12148/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12149///
12150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
12151#[inline]
12152#[target_feature(enable = "avx512bw,avx512vl")]
12153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12154#[cfg_attr(test, assert_instr(vpmovzxbw))]
12155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12156pub const fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
12157    unsafe {
12158        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
12159        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12160    }
12161}
12162
12163/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12164///
12165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
12166#[inline]
12167#[target_feature(enable = "avx512bw,avx512vl")]
12168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12169#[cfg_attr(test, assert_instr(vpmovzxbw))]
12170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12171pub const fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12172    unsafe {
12173        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12174        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12175    }
12176}
12177
12178/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12179///
12180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
12181#[inline]
12182#[target_feature(enable = "avx512bw,avx512vl")]
12183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12184#[cfg_attr(test, assert_instr(vpmovzxbw))]
12185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12186pub const fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
12187    unsafe {
12188        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12189        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12190    }
12191}
12192
12193/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
12194///
12195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
12196#[inline]
12197#[target_feature(enable = "avx512bw")]
12198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12199#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
12200#[rustc_legacy_const_generics(1)]
12201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12202pub const fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12203    unsafe {
12204        static_assert_uimm_bits!(IMM8, 8);
12205        const fn mask(shift: i32, i: u32) -> u32 {
12206            let shift = shift as u32 & 0xff;
12207            if shift > 15 || i % 16 < shift {
12208                0
12209            } else {
12210                64 + (i - shift)
12211            }
12212        }
12213        let a = a.as_i8x64();
12214        let zero = i8x64::ZERO;
12215        let r: i8x64 = simd_shuffle!(
12216            zero,
12217            a,
12218            [
12219                mask(IMM8, 0),
12220                mask(IMM8, 1),
12221                mask(IMM8, 2),
12222                mask(IMM8, 3),
12223                mask(IMM8, 4),
12224                mask(IMM8, 5),
12225                mask(IMM8, 6),
12226                mask(IMM8, 7),
12227                mask(IMM8, 8),
12228                mask(IMM8, 9),
12229                mask(IMM8, 10),
12230                mask(IMM8, 11),
12231                mask(IMM8, 12),
12232                mask(IMM8, 13),
12233                mask(IMM8, 14),
12234                mask(IMM8, 15),
12235                mask(IMM8, 16),
12236                mask(IMM8, 17),
12237                mask(IMM8, 18),
12238                mask(IMM8, 19),
12239                mask(IMM8, 20),
12240                mask(IMM8, 21),
12241                mask(IMM8, 22),
12242                mask(IMM8, 23),
12243                mask(IMM8, 24),
12244                mask(IMM8, 25),
12245                mask(IMM8, 26),
12246                mask(IMM8, 27),
12247                mask(IMM8, 28),
12248                mask(IMM8, 29),
12249                mask(IMM8, 30),
12250                mask(IMM8, 31),
12251                mask(IMM8, 32),
12252                mask(IMM8, 33),
12253                mask(IMM8, 34),
12254                mask(IMM8, 35),
12255                mask(IMM8, 36),
12256                mask(IMM8, 37),
12257                mask(IMM8, 38),
12258                mask(IMM8, 39),
12259                mask(IMM8, 40),
12260                mask(IMM8, 41),
12261                mask(IMM8, 42),
12262                mask(IMM8, 43),
12263                mask(IMM8, 44),
12264                mask(IMM8, 45),
12265                mask(IMM8, 46),
12266                mask(IMM8, 47),
12267                mask(IMM8, 48),
12268                mask(IMM8, 49),
12269                mask(IMM8, 50),
12270                mask(IMM8, 51),
12271                mask(IMM8, 52),
12272                mask(IMM8, 53),
12273                mask(IMM8, 54),
12274                mask(IMM8, 55),
12275                mask(IMM8, 56),
12276                mask(IMM8, 57),
12277                mask(IMM8, 58),
12278                mask(IMM8, 59),
12279                mask(IMM8, 60),
12280                mask(IMM8, 61),
12281                mask(IMM8, 62),
12282                mask(IMM8, 63),
12283            ],
12284        );
12285        transmute(r)
12286    }
12287}
12288
12289/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
12290///
12291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
12292#[inline]
12293#[target_feature(enable = "avx512bw")]
12294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12295#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
12296#[rustc_legacy_const_generics(1)]
12297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12298pub const fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12299    unsafe {
12300        static_assert_uimm_bits!(IMM8, 8);
12301        const fn mask(shift: i32, i: u32) -> u32 {
12302            let shift = shift as u32 & 0xff;
12303            if shift > 15 || (15 - (i % 16)) < shift {
12304                0
12305            } else {
12306                64 + (i + shift)
12307            }
12308        }
12309        let a = a.as_i8x64();
12310        let zero = i8x64::ZERO;
12311        let r: i8x64 = simd_shuffle!(
12312            zero,
12313            a,
12314            [
12315                mask(IMM8, 0),
12316                mask(IMM8, 1),
12317                mask(IMM8, 2),
12318                mask(IMM8, 3),
12319                mask(IMM8, 4),
12320                mask(IMM8, 5),
12321                mask(IMM8, 6),
12322                mask(IMM8, 7),
12323                mask(IMM8, 8),
12324                mask(IMM8, 9),
12325                mask(IMM8, 10),
12326                mask(IMM8, 11),
12327                mask(IMM8, 12),
12328                mask(IMM8, 13),
12329                mask(IMM8, 14),
12330                mask(IMM8, 15),
12331                mask(IMM8, 16),
12332                mask(IMM8, 17),
12333                mask(IMM8, 18),
12334                mask(IMM8, 19),
12335                mask(IMM8, 20),
12336                mask(IMM8, 21),
12337                mask(IMM8, 22),
12338                mask(IMM8, 23),
12339                mask(IMM8, 24),
12340                mask(IMM8, 25),
12341                mask(IMM8, 26),
12342                mask(IMM8, 27),
12343                mask(IMM8, 28),
12344                mask(IMM8, 29),
12345                mask(IMM8, 30),
12346                mask(IMM8, 31),
12347                mask(IMM8, 32),
12348                mask(IMM8, 33),
12349                mask(IMM8, 34),
12350                mask(IMM8, 35),
12351                mask(IMM8, 36),
12352                mask(IMM8, 37),
12353                mask(IMM8, 38),
12354                mask(IMM8, 39),
12355                mask(IMM8, 40),
12356                mask(IMM8, 41),
12357                mask(IMM8, 42),
12358                mask(IMM8, 43),
12359                mask(IMM8, 44),
12360                mask(IMM8, 45),
12361                mask(IMM8, 46),
12362                mask(IMM8, 47),
12363                mask(IMM8, 48),
12364                mask(IMM8, 49),
12365                mask(IMM8, 50),
12366                mask(IMM8, 51),
12367                mask(IMM8, 52),
12368                mask(IMM8, 53),
12369                mask(IMM8, 54),
12370                mask(IMM8, 55),
12371                mask(IMM8, 56),
12372                mask(IMM8, 57),
12373                mask(IMM8, 58),
12374                mask(IMM8, 59),
12375                mask(IMM8, 60),
12376                mask(IMM8, 61),
12377                mask(IMM8, 62),
12378                mask(IMM8, 63),
12379            ],
12380        );
12381        transmute(r)
12382    }
12383}
12384
12385/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
12386/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
12387/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
12388///
12389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
12390#[inline]
12391#[target_feature(enable = "avx512bw")]
12392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12393#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12394#[rustc_legacy_const_generics(2)]
12395#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12396pub const fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
12397    const fn mask(shift: u32, i: u32) -> u32 {
12398        let shift = shift % 16;
12399        let mod_i = i % 16;
12400        if mod_i < (16 - shift) {
12401            i + shift
12402        } else {
12403            i + 48 + shift
12404        }
12405    }
12406
12407    // If palignr is shifting the pair of vectors more than the size of two
12408    // lanes, emit zero.
12409    if IMM8 >= 32 {
12410        return _mm512_setzero_si512();
12411    }
12412    // If palignr is shifting the pair of input vectors more than one lane,
12413    // but less than two lanes, convert to shifting in zeroes.
12414    let (a, b) = if IMM8 > 16 {
12415        (_mm512_setzero_si512(), a)
12416    } else {
12417        (a, b)
12418    };
12419    unsafe {
12420        if IMM8 == 16 {
12421            return transmute(a);
12422        }
12423
12424        let r: i8x64 = simd_shuffle!(
12425            b.as_i8x64(),
12426            a.as_i8x64(),
12427            [
12428                mask(IMM8 as u32, 0),
12429                mask(IMM8 as u32, 1),
12430                mask(IMM8 as u32, 2),
12431                mask(IMM8 as u32, 3),
12432                mask(IMM8 as u32, 4),
12433                mask(IMM8 as u32, 5),
12434                mask(IMM8 as u32, 6),
12435                mask(IMM8 as u32, 7),
12436                mask(IMM8 as u32, 8),
12437                mask(IMM8 as u32, 9),
12438                mask(IMM8 as u32, 10),
12439                mask(IMM8 as u32, 11),
12440                mask(IMM8 as u32, 12),
12441                mask(IMM8 as u32, 13),
12442                mask(IMM8 as u32, 14),
12443                mask(IMM8 as u32, 15),
12444                mask(IMM8 as u32, 16),
12445                mask(IMM8 as u32, 17),
12446                mask(IMM8 as u32, 18),
12447                mask(IMM8 as u32, 19),
12448                mask(IMM8 as u32, 20),
12449                mask(IMM8 as u32, 21),
12450                mask(IMM8 as u32, 22),
12451                mask(IMM8 as u32, 23),
12452                mask(IMM8 as u32, 24),
12453                mask(IMM8 as u32, 25),
12454                mask(IMM8 as u32, 26),
12455                mask(IMM8 as u32, 27),
12456                mask(IMM8 as u32, 28),
12457                mask(IMM8 as u32, 29),
12458                mask(IMM8 as u32, 30),
12459                mask(IMM8 as u32, 31),
12460                mask(IMM8 as u32, 32),
12461                mask(IMM8 as u32, 33),
12462                mask(IMM8 as u32, 34),
12463                mask(IMM8 as u32, 35),
12464                mask(IMM8 as u32, 36),
12465                mask(IMM8 as u32, 37),
12466                mask(IMM8 as u32, 38),
12467                mask(IMM8 as u32, 39),
12468                mask(IMM8 as u32, 40),
12469                mask(IMM8 as u32, 41),
12470                mask(IMM8 as u32, 42),
12471                mask(IMM8 as u32, 43),
12472                mask(IMM8 as u32, 44),
12473                mask(IMM8 as u32, 45),
12474                mask(IMM8 as u32, 46),
12475                mask(IMM8 as u32, 47),
12476                mask(IMM8 as u32, 48),
12477                mask(IMM8 as u32, 49),
12478                mask(IMM8 as u32, 50),
12479                mask(IMM8 as u32, 51),
12480                mask(IMM8 as u32, 52),
12481                mask(IMM8 as u32, 53),
12482                mask(IMM8 as u32, 54),
12483                mask(IMM8 as u32, 55),
12484                mask(IMM8 as u32, 56),
12485                mask(IMM8 as u32, 57),
12486                mask(IMM8 as u32, 58),
12487                mask(IMM8 as u32, 59),
12488                mask(IMM8 as u32, 60),
12489                mask(IMM8 as u32, 61),
12490                mask(IMM8 as u32, 62),
12491                mask(IMM8 as u32, 63),
12492            ],
12493        );
12494        transmute(r)
12495    }
12496}
12497
12498/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12499///
12500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
12501#[inline]
12502#[target_feature(enable = "avx512bw")]
12503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12504#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12505#[rustc_legacy_const_generics(4)]
12506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12507pub const fn _mm512_mask_alignr_epi8<const IMM8: i32>(
12508    src: __m512i,
12509    k: __mmask64,
12510    a: __m512i,
12511    b: __m512i,
12512) -> __m512i {
12513    unsafe {
12514        static_assert_uimm_bits!(IMM8, 8);
12515        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12516        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
12517    }
12518}
12519
12520/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12521///
12522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
12523#[inline]
12524#[target_feature(enable = "avx512bw")]
12525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12526#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12527#[rustc_legacy_const_generics(3)]
12528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12529pub const fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
12530    k: __mmask64,
12531    a: __m512i,
12532    b: __m512i,
12533) -> __m512i {
12534    unsafe {
12535        static_assert_uimm_bits!(IMM8, 8);
12536        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12537        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
12538    }
12539}
12540
12541/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12542///
12543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
12544#[inline]
12545#[target_feature(enable = "avx512bw,avx512vl")]
12546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12547#[rustc_legacy_const_generics(4)]
12548#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12550pub const fn _mm256_mask_alignr_epi8<const IMM8: i32>(
12551    src: __m256i,
12552    k: __mmask32,
12553    a: __m256i,
12554    b: __m256i,
12555) -> __m256i {
12556    unsafe {
12557        static_assert_uimm_bits!(IMM8, 8);
12558        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12559        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
12560    }
12561}
12562
12563/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12564///
12565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
12566#[inline]
12567#[target_feature(enable = "avx512bw,avx512vl")]
12568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12569#[rustc_legacy_const_generics(3)]
12570#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12572pub const fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
12573    k: __mmask32,
12574    a: __m256i,
12575    b: __m256i,
12576) -> __m256i {
12577    unsafe {
12578        static_assert_uimm_bits!(IMM8, 8);
12579        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12580        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
12581    }
12582}
12583
12584/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12585///
12586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
12587#[inline]
12588#[target_feature(enable = "avx512bw,avx512vl")]
12589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12590#[rustc_legacy_const_generics(4)]
12591#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12593pub const fn _mm_mask_alignr_epi8<const IMM8: i32>(
12594    src: __m128i,
12595    k: __mmask16,
12596    a: __m128i,
12597    b: __m128i,
12598) -> __m128i {
12599    unsafe {
12600        static_assert_uimm_bits!(IMM8, 8);
12601        let r = _mm_alignr_epi8::<IMM8>(a, b);
12602        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
12603    }
12604}
12605
12606/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
12609#[inline]
12610#[target_feature(enable = "avx512bw,avx512vl")]
12611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12612#[rustc_legacy_const_generics(3)]
12613#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12615pub const fn _mm_maskz_alignr_epi8<const IMM8: i32>(
12616    k: __mmask16,
12617    a: __m128i,
12618    b: __m128i,
12619) -> __m128i {
12620    unsafe {
12621        static_assert_uimm_bits!(IMM8, 8);
12622        let r = _mm_alignr_epi8::<IMM8>(a, b);
12623        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
12624    }
12625}
12626
12627/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12628///
12629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
12630#[inline]
12631#[target_feature(enable = "avx512bw")]
12632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12633#[cfg_attr(test, assert_instr(vpmovswb))]
12634pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12635    vpmovswbmem(mem_addr, a.as_i16x32(), k);
12636}
12637
12638/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12639///
12640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
12641#[inline]
12642#[target_feature(enable = "avx512bw,avx512vl")]
12643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12644#[cfg_attr(test, assert_instr(vpmovswb))]
12645pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12646    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
12647
12648    let max = simd_splat(i16::from(i8::MAX));
12649    let min = simd_splat(i16::from(i8::MIN));
12650
12651    let v = simd_imax(simd_imin(a.as_i16x16(), max), min);
12652    let truncated: i8x16 = simd_cast(v);
12653    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12654}
12655
12656/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12657///
12658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
12659#[inline]
12660#[target_feature(enable = "avx512bw,avx512vl")]
12661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12662#[cfg_attr(test, assert_instr(vpmovswb))]
12663pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12664    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
12665
12666    let max = simd_splat(i16::from(i8::MAX));
12667    let min = simd_splat(i16::from(i8::MIN));
12668
12669    let v = simd_imax(simd_imin(a.as_i16x8(), max), min);
12670    let truncated: i8x8 = simd_cast(v);
12671    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12672}
12673
12674/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12675///
12676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
12677#[inline]
12678#[target_feature(enable = "avx512bw")]
12679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12680#[cfg_attr(test, assert_instr(vpmovwb))]
12681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12682pub const unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12683    let result = _mm512_cvtepi16_epi8(a).as_i8x32();
12684    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
12685    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12686}
12687
12688/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12689///
12690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
12691#[inline]
12692#[target_feature(enable = "avx512bw,avx512vl")]
12693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12694#[cfg_attr(test, assert_instr(vpmovwb))]
12695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12696pub const unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12697    let result = _mm256_cvtepi16_epi8(a).as_i8x16();
12698    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
12699    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12700}
12701
12702/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12703///
12704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
12705#[inline]
12706#[target_feature(enable = "avx512bw,avx512vl")]
12707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12708#[cfg_attr(test, assert_instr(vpmovwb))]
12709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12710pub const unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12711    let result: i8x8 = simd_shuffle!(
12712        _mm_cvtepi16_epi8(a).as_i8x16(),
12713        i8x16::ZERO,
12714        [0, 1, 2, 3, 4, 5, 6, 7]
12715    );
12716    let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
12717    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12718}
12719
12720/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12721///
12722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
12723#[inline]
12724#[target_feature(enable = "avx512bw")]
12725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12726#[cfg_attr(test, assert_instr(vpmovuswb))]
12727pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12728    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
12729}
12730
12731/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12732///
12733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
12734#[inline]
12735#[target_feature(enable = "avx512bw,avx512vl")]
12736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12737#[cfg_attr(test, assert_instr(vpmovuswb))]
12738pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12739    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
12740    let mem_addr = mem_addr.cast::<u8>();
12741    let max = simd_splat(u16::from(u8::MAX));
12742
12743    let truncated: u8x16 = simd_cast(simd_imin(a.as_u16x16(), max));
12744    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12745}
12746
12747/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12748///
12749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
12750#[inline]
12751#[target_feature(enable = "avx512bw,avx512vl")]
12752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12753#[cfg_attr(test, assert_instr(vpmovuswb))]
12754pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12755    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
12756    let mem_addr = mem_addr.cast::<u8>();
12757    let max = simd_splat(u16::from(u8::MAX));
12758
12759    let v = a.as_u16x8();
12760    let v = simd_imin(v, max);
12761
12762    let truncated: u8x8 = simd_cast(v);
12763    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12764}
12765
12766#[allow(improper_ctypes)]
12767unsafe extern "C" {
12768    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
12769    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
12770
12771    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
12772    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
12773    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
12774    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
12775
12776    #[link_name = "llvm.x86.avx512.psll.w.512"]
12777    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
12778
12779    #[link_name = "llvm.x86.avx512.psrl.w.512"]
12780    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
12781
12782    #[link_name = "llvm.x86.avx512.psra.w.512"]
12783    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
12784
12785    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
12786    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
12787    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
12788    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
12789    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
12790    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
12791
12792    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
12793    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
12794    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
12795    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
12796    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
12797    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
12798
12799    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
12800    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
12801
12802    #[link_name = "llvm.x86.avx512.psad.bw.512"]
12803    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
12804
12805    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
12806    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
12807    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
12808    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
12809    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
12810    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
12811
12812    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
12813    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
12814
12815    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
12816    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
12817
12818    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
12819    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12820
12821    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
12822    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12823}
12824
12825#[cfg(test)]
12826mod tests {
12827    use crate::core_arch::assert_eq_const as assert_eq;
12828
12829    use stdarch_test::simd_test;
12830
12831    use crate::core_arch::x86::*;
12832    use crate::hint::black_box;
12833    use crate::mem::{self};
12834
12835    #[simd_test(enable = "avx512bw")]
12836    const fn test_mm512_abs_epi16() {
12837        let a = _mm512_set1_epi16(-1);
12838        let r = _mm512_abs_epi16(a);
12839        let e = _mm512_set1_epi16(1);
12840        assert_eq_m512i(r, e);
12841    }
12842
12843    #[simd_test(enable = "avx512bw")]
12844    const fn test_mm512_mask_abs_epi16() {
12845        let a = _mm512_set1_epi16(-1);
12846        let r = _mm512_mask_abs_epi16(a, 0, a);
12847        assert_eq_m512i(r, a);
12848        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
12849        #[rustfmt::skip]
12850        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12851                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12852        assert_eq_m512i(r, e);
12853    }
12854
12855    #[simd_test(enable = "avx512bw")]
12856    const fn test_mm512_maskz_abs_epi16() {
12857        let a = _mm512_set1_epi16(-1);
12858        let r = _mm512_maskz_abs_epi16(0, a);
12859        assert_eq_m512i(r, _mm512_setzero_si512());
12860        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
12861        #[rustfmt::skip]
12862        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12863                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12864        assert_eq_m512i(r, e);
12865    }
12866
12867    #[simd_test(enable = "avx512bw,avx512vl")]
12868    const fn test_mm256_mask_abs_epi16() {
12869        let a = _mm256_set1_epi16(-1);
12870        let r = _mm256_mask_abs_epi16(a, 0, a);
12871        assert_eq_m256i(r, a);
12872        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
12873        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12874        assert_eq_m256i(r, e);
12875    }
12876
12877    #[simd_test(enable = "avx512bw,avx512vl")]
12878    const fn test_mm256_maskz_abs_epi16() {
12879        let a = _mm256_set1_epi16(-1);
12880        let r = _mm256_maskz_abs_epi16(0, a);
12881        assert_eq_m256i(r, _mm256_setzero_si256());
12882        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
12883        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12884        assert_eq_m256i(r, e);
12885    }
12886
12887    #[simd_test(enable = "avx512bw,avx512vl")]
12888    const fn test_mm_mask_abs_epi16() {
12889        let a = _mm_set1_epi16(-1);
12890        let r = _mm_mask_abs_epi16(a, 0, a);
12891        assert_eq_m128i(r, a);
12892        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
12893        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
12894        assert_eq_m128i(r, e);
12895    }
12896
12897    #[simd_test(enable = "avx512bw,avx512vl")]
12898    const fn test_mm_maskz_abs_epi16() {
12899        let a = _mm_set1_epi16(-1);
12900        let r = _mm_maskz_abs_epi16(0, a);
12901        assert_eq_m128i(r, _mm_setzero_si128());
12902        let r = _mm_maskz_abs_epi16(0b00001111, a);
12903        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
12904        assert_eq_m128i(r, e);
12905    }
12906
12907    #[simd_test(enable = "avx512bw")]
12908    const fn test_mm512_abs_epi8() {
12909        let a = _mm512_set1_epi8(-1);
12910        let r = _mm512_abs_epi8(a);
12911        let e = _mm512_set1_epi8(1);
12912        assert_eq_m512i(r, e);
12913    }
12914
12915    #[simd_test(enable = "avx512bw")]
12916    const fn test_mm512_mask_abs_epi8() {
12917        let a = _mm512_set1_epi8(-1);
12918        let r = _mm512_mask_abs_epi8(a, 0, a);
12919        assert_eq_m512i(r, a);
12920        let r = _mm512_mask_abs_epi8(
12921            a,
12922            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12923            a,
12924        );
12925        #[rustfmt::skip]
12926        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12927                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12928                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12929                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12930        assert_eq_m512i(r, e);
12931    }
12932
12933    #[simd_test(enable = "avx512bw")]
12934    const fn test_mm512_maskz_abs_epi8() {
12935        let a = _mm512_set1_epi8(-1);
12936        let r = _mm512_maskz_abs_epi8(0, a);
12937        assert_eq_m512i(r, _mm512_setzero_si512());
12938        let r = _mm512_maskz_abs_epi8(
12939            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12940            a,
12941        );
12942        #[rustfmt::skip]
12943        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12944                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12945                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12946                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12947        assert_eq_m512i(r, e);
12948    }
12949
12950    #[simd_test(enable = "avx512bw,avx512vl")]
12951    const fn test_mm256_mask_abs_epi8() {
12952        let a = _mm256_set1_epi8(-1);
12953        let r = _mm256_mask_abs_epi8(a, 0, a);
12954        assert_eq_m256i(r, a);
12955        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
12956        #[rustfmt::skip]
12957        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12958                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12959        assert_eq_m256i(r, e);
12960    }
12961
12962    #[simd_test(enable = "avx512bw,avx512vl")]
12963    const fn test_mm256_maskz_abs_epi8() {
12964        let a = _mm256_set1_epi8(-1);
12965        let r = _mm256_maskz_abs_epi8(0, a);
12966        assert_eq_m256i(r, _mm256_setzero_si256());
12967        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
12968        #[rustfmt::skip]
12969        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12970                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12971        assert_eq_m256i(r, e);
12972    }
12973
12974    #[simd_test(enable = "avx512bw,avx512vl")]
12975    const fn test_mm_mask_abs_epi8() {
12976        let a = _mm_set1_epi8(-1);
12977        let r = _mm_mask_abs_epi8(a, 0, a);
12978        assert_eq_m128i(r, a);
12979        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
12980        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12981        assert_eq_m128i(r, e);
12982    }
12983
12984    #[simd_test(enable = "avx512bw,avx512vl")]
12985    const fn test_mm_maskz_abs_epi8() {
12986        let a = _mm_set1_epi8(-1);
12987        let r = _mm_maskz_abs_epi8(0, a);
12988        assert_eq_m128i(r, _mm_setzero_si128());
12989        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
12990        #[rustfmt::skip]
12991        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12992        assert_eq_m128i(r, e);
12993    }
12994
12995    #[simd_test(enable = "avx512bw")]
12996    const fn test_mm512_add_epi16() {
12997        let a = _mm512_set1_epi16(1);
12998        let b = _mm512_set1_epi16(2);
12999        let r = _mm512_add_epi16(a, b);
13000        let e = _mm512_set1_epi16(3);
13001        assert_eq_m512i(r, e);
13002    }
13003
13004    #[simd_test(enable = "avx512bw")]
13005    const fn test_mm512_mask_add_epi16() {
13006        let a = _mm512_set1_epi16(1);
13007        let b = _mm512_set1_epi16(2);
13008        let r = _mm512_mask_add_epi16(a, 0, a, b);
13009        assert_eq_m512i(r, a);
13010        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13011        #[rustfmt::skip]
13012        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
13013                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
13014        assert_eq_m512i(r, e);
13015    }
13016
13017    #[simd_test(enable = "avx512bw")]
13018    const fn test_mm512_maskz_add_epi16() {
13019        let a = _mm512_set1_epi16(1);
13020        let b = _mm512_set1_epi16(2);
13021        let r = _mm512_maskz_add_epi16(0, a, b);
13022        assert_eq_m512i(r, _mm512_setzero_si512());
13023        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
13024        #[rustfmt::skip]
13025        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
13026                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
13027        assert_eq_m512i(r, e);
13028    }
13029
13030    #[simd_test(enable = "avx512bw,avx512vl")]
13031    const fn test_mm256_mask_add_epi16() {
13032        let a = _mm256_set1_epi16(1);
13033        let b = _mm256_set1_epi16(2);
13034        let r = _mm256_mask_add_epi16(a, 0, a, b);
13035        assert_eq_m256i(r, a);
13036        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
13037        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
13038        assert_eq_m256i(r, e);
13039    }
13040
13041    #[simd_test(enable = "avx512bw,avx512vl")]
13042    const fn test_mm256_maskz_add_epi16() {
13043        let a = _mm256_set1_epi16(1);
13044        let b = _mm256_set1_epi16(2);
13045        let r = _mm256_maskz_add_epi16(0, a, b);
13046        assert_eq_m256i(r, _mm256_setzero_si256());
13047        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
13048        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
13049        assert_eq_m256i(r, e);
13050    }
13051
13052    #[simd_test(enable = "avx512bw,avx512vl")]
13053    const fn test_mm_mask_add_epi16() {
13054        let a = _mm_set1_epi16(1);
13055        let b = _mm_set1_epi16(2);
13056        let r = _mm_mask_add_epi16(a, 0, a, b);
13057        assert_eq_m128i(r, a);
13058        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
13059        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
13060        assert_eq_m128i(r, e);
13061    }
13062
13063    #[simd_test(enable = "avx512bw,avx512vl")]
13064    const fn test_mm_maskz_add_epi16() {
13065        let a = _mm_set1_epi16(1);
13066        let b = _mm_set1_epi16(2);
13067        let r = _mm_maskz_add_epi16(0, a, b);
13068        assert_eq_m128i(r, _mm_setzero_si128());
13069        let r = _mm_maskz_add_epi16(0b00001111, a, b);
13070        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
13071        assert_eq_m128i(r, e);
13072    }
13073
13074    #[simd_test(enable = "avx512bw")]
13075    const fn test_mm512_add_epi8() {
13076        let a = _mm512_set1_epi8(1);
13077        let b = _mm512_set1_epi8(2);
13078        let r = _mm512_add_epi8(a, b);
13079        let e = _mm512_set1_epi8(3);
13080        assert_eq_m512i(r, e);
13081    }
13082
13083    #[simd_test(enable = "avx512bw")]
13084    const fn test_mm512_mask_add_epi8() {
13085        let a = _mm512_set1_epi8(1);
13086        let b = _mm512_set1_epi8(2);
13087        let r = _mm512_mask_add_epi8(a, 0, a, b);
13088        assert_eq_m512i(r, a);
13089        let r = _mm512_mask_add_epi8(
13090            a,
13091            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13092            a,
13093            b,
13094        );
13095        #[rustfmt::skip]
13096        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
13097                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
13098                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
13099                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
13100        assert_eq_m512i(r, e);
13101    }
13102
13103    #[simd_test(enable = "avx512bw")]
13104    const fn test_mm512_maskz_add_epi8() {
13105        let a = _mm512_set1_epi8(1);
13106        let b = _mm512_set1_epi8(2);
13107        let r = _mm512_maskz_add_epi8(0, a, b);
13108        assert_eq_m512i(r, _mm512_setzero_si512());
13109        let r = _mm512_maskz_add_epi8(
13110            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13111            a,
13112            b,
13113        );
13114        #[rustfmt::skip]
13115        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
13116                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
13117                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
13118                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
13119        assert_eq_m512i(r, e);
13120    }
13121
13122    #[simd_test(enable = "avx512bw,avx512vl")]
13123    const fn test_mm256_mask_add_epi8() {
13124        let a = _mm256_set1_epi8(1);
13125        let b = _mm256_set1_epi8(2);
13126        let r = _mm256_mask_add_epi8(a, 0, a, b);
13127        assert_eq_m256i(r, a);
13128        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13129        #[rustfmt::skip]
13130        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
13131                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
13132        assert_eq_m256i(r, e);
13133    }
13134
13135    #[simd_test(enable = "avx512bw,avx512vl")]
13136    const fn test_mm256_maskz_add_epi8() {
13137        let a = _mm256_set1_epi8(1);
13138        let b = _mm256_set1_epi8(2);
13139        let r = _mm256_maskz_add_epi8(0, a, b);
13140        assert_eq_m256i(r, _mm256_setzero_si256());
13141        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
13142        #[rustfmt::skip]
13143        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
13144                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
13145        assert_eq_m256i(r, e);
13146    }
13147
13148    #[simd_test(enable = "avx512bw,avx512vl")]
13149    const fn test_mm_mask_add_epi8() {
13150        let a = _mm_set1_epi8(1);
13151        let b = _mm_set1_epi8(2);
13152        let r = _mm_mask_add_epi8(a, 0, a, b);
13153        assert_eq_m128i(r, a);
13154        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
13155        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
13156        assert_eq_m128i(r, e);
13157    }
13158
13159    #[simd_test(enable = "avx512bw,avx512vl")]
13160    const fn test_mm_maskz_add_epi8() {
13161        let a = _mm_set1_epi8(1);
13162        let b = _mm_set1_epi8(2);
13163        let r = _mm_maskz_add_epi8(0, a, b);
13164        assert_eq_m128i(r, _mm_setzero_si128());
13165        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
13166        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
13167        assert_eq_m128i(r, e);
13168    }
13169
13170    #[simd_test(enable = "avx512bw")]
13171    const fn test_mm512_adds_epu16() {
13172        let a = _mm512_set1_epi16(1);
13173        let b = _mm512_set1_epi16(u16::MAX as i16);
13174        let r = _mm512_adds_epu16(a, b);
13175        let e = _mm512_set1_epi16(u16::MAX as i16);
13176        assert_eq_m512i(r, e);
13177    }
13178
13179    #[simd_test(enable = "avx512bw")]
13180    const fn test_mm512_mask_adds_epu16() {
13181        let a = _mm512_set1_epi16(1);
13182        let b = _mm512_set1_epi16(u16::MAX as i16);
13183        let r = _mm512_mask_adds_epu16(a, 0, a, b);
13184        assert_eq_m512i(r, a);
13185        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13186        #[rustfmt::skip]
13187        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13188                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13189        assert_eq_m512i(r, e);
13190    }
13191
13192    #[simd_test(enable = "avx512bw")]
13193    const fn test_mm512_maskz_adds_epu16() {
13194        let a = _mm512_set1_epi16(1);
13195        let b = _mm512_set1_epi16(u16::MAX as i16);
13196        let r = _mm512_maskz_adds_epu16(0, a, b);
13197        assert_eq_m512i(r, _mm512_setzero_si512());
13198        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
13199        #[rustfmt::skip]
13200        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13201                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13202        assert_eq_m512i(r, e);
13203    }
13204
13205    #[simd_test(enable = "avx512bw,avx512vl")]
13206    const fn test_mm256_mask_adds_epu16() {
13207        let a = _mm256_set1_epi16(1);
13208        let b = _mm256_set1_epi16(u16::MAX as i16);
13209        let r = _mm256_mask_adds_epu16(a, 0, a, b);
13210        assert_eq_m256i(r, a);
13211        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
13212        #[rustfmt::skip]
13213        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13214        assert_eq_m256i(r, e);
13215    }
13216
13217    #[simd_test(enable = "avx512bw,avx512vl")]
13218    const fn test_mm256_maskz_adds_epu16() {
13219        let a = _mm256_set1_epi16(1);
13220        let b = _mm256_set1_epi16(u16::MAX as i16);
13221        let r = _mm256_maskz_adds_epu16(0, a, b);
13222        assert_eq_m256i(r, _mm256_setzero_si256());
13223        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
13224        #[rustfmt::skip]
13225        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13226        assert_eq_m256i(r, e);
13227    }
13228
13229    #[simd_test(enable = "avx512bw,avx512vl")]
13230    const fn test_mm_mask_adds_epu16() {
13231        let a = _mm_set1_epi16(1);
13232        let b = _mm_set1_epi16(u16::MAX as i16);
13233        let r = _mm_mask_adds_epu16(a, 0, a, b);
13234        assert_eq_m128i(r, a);
13235        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
13236        #[rustfmt::skip]
13237        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13238        assert_eq_m128i(r, e);
13239    }
13240
13241    #[simd_test(enable = "avx512bw,avx512vl")]
13242    const fn test_mm_maskz_adds_epu16() {
13243        let a = _mm_set1_epi16(1);
13244        let b = _mm_set1_epi16(u16::MAX as i16);
13245        let r = _mm_maskz_adds_epu16(0, a, b);
13246        assert_eq_m128i(r, _mm_setzero_si128());
13247        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
13248        #[rustfmt::skip]
13249        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13250        assert_eq_m128i(r, e);
13251    }
13252
13253    #[simd_test(enable = "avx512bw")]
13254    const fn test_mm512_adds_epu8() {
13255        let a = _mm512_set1_epi8(1);
13256        let b = _mm512_set1_epi8(u8::MAX as i8);
13257        let r = _mm512_adds_epu8(a, b);
13258        let e = _mm512_set1_epi8(u8::MAX as i8);
13259        assert_eq_m512i(r, e);
13260    }
13261
13262    #[simd_test(enable = "avx512bw")]
13263    const fn test_mm512_mask_adds_epu8() {
13264        let a = _mm512_set1_epi8(1);
13265        let b = _mm512_set1_epi8(u8::MAX as i8);
13266        let r = _mm512_mask_adds_epu8(a, 0, a, b);
13267        assert_eq_m512i(r, a);
13268        let r = _mm512_mask_adds_epu8(
13269            a,
13270            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13271            a,
13272            b,
13273        );
13274        #[rustfmt::skip]
13275        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13276                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13277                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13278                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13279        assert_eq_m512i(r, e);
13280    }
13281
13282    #[simd_test(enable = "avx512bw")]
13283    const fn test_mm512_maskz_adds_epu8() {
13284        let a = _mm512_set1_epi8(1);
13285        let b = _mm512_set1_epi8(u8::MAX as i8);
13286        let r = _mm512_maskz_adds_epu8(0, a, b);
13287        assert_eq_m512i(r, _mm512_setzero_si512());
13288        let r = _mm512_maskz_adds_epu8(
13289            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13290            a,
13291            b,
13292        );
13293        #[rustfmt::skip]
13294        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13295                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13296                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13297                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13298        assert_eq_m512i(r, e);
13299    }
13300
13301    #[simd_test(enable = "avx512bw,avx512vl")]
13302    const fn test_mm256_mask_adds_epu8() {
13303        let a = _mm256_set1_epi8(1);
13304        let b = _mm256_set1_epi8(u8::MAX as i8);
13305        let r = _mm256_mask_adds_epu8(a, 0, a, b);
13306        assert_eq_m256i(r, a);
13307        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13308        #[rustfmt::skip]
13309        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13310                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13311        assert_eq_m256i(r, e);
13312    }
13313
13314    #[simd_test(enable = "avx512bw,avx512vl")]
13315    const fn test_mm256_maskz_adds_epu8() {
13316        let a = _mm256_set1_epi8(1);
13317        let b = _mm256_set1_epi8(u8::MAX as i8);
13318        let r = _mm256_maskz_adds_epu8(0, a, b);
13319        assert_eq_m256i(r, _mm256_setzero_si256());
13320        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
13321        #[rustfmt::skip]
13322        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13323                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13324        assert_eq_m256i(r, e);
13325    }
13326
13327    #[simd_test(enable = "avx512bw,avx512vl")]
13328    const fn test_mm_mask_adds_epu8() {
13329        let a = _mm_set1_epi8(1);
13330        let b = _mm_set1_epi8(u8::MAX as i8);
13331        let r = _mm_mask_adds_epu8(a, 0, a, b);
13332        assert_eq_m128i(r, a);
13333        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
13334        #[rustfmt::skip]
13335        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13336        assert_eq_m128i(r, e);
13337    }
13338
13339    #[simd_test(enable = "avx512bw,avx512vl")]
13340    const fn test_mm_maskz_adds_epu8() {
13341        let a = _mm_set1_epi8(1);
13342        let b = _mm_set1_epi8(u8::MAX as i8);
13343        let r = _mm_maskz_adds_epu8(0, a, b);
13344        assert_eq_m128i(r, _mm_setzero_si128());
13345        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
13346        #[rustfmt::skip]
13347        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13348        assert_eq_m128i(r, e);
13349    }
13350
13351    #[simd_test(enable = "avx512bw")]
13352    const fn test_mm512_adds_epi16() {
13353        let a = _mm512_set1_epi16(1);
13354        let b = _mm512_set1_epi16(i16::MAX);
13355        let r = _mm512_adds_epi16(a, b);
13356        let e = _mm512_set1_epi16(i16::MAX);
13357        assert_eq_m512i(r, e);
13358    }
13359
13360    #[simd_test(enable = "avx512bw")]
13361    const fn test_mm512_mask_adds_epi16() {
13362        let a = _mm512_set1_epi16(1);
13363        let b = _mm512_set1_epi16(i16::MAX);
13364        let r = _mm512_mask_adds_epi16(a, 0, a, b);
13365        assert_eq_m512i(r, a);
13366        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13367        #[rustfmt::skip]
13368        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13369                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13370        assert_eq_m512i(r, e);
13371    }
13372
13373    #[simd_test(enable = "avx512bw")]
13374    const fn test_mm512_maskz_adds_epi16() {
13375        let a = _mm512_set1_epi16(1);
13376        let b = _mm512_set1_epi16(i16::MAX);
13377        let r = _mm512_maskz_adds_epi16(0, a, b);
13378        assert_eq_m512i(r, _mm512_setzero_si512());
13379        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
13380        #[rustfmt::skip]
13381        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13382                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13383        assert_eq_m512i(r, e);
13384    }
13385
13386    #[simd_test(enable = "avx512bw,avx512vl")]
13387    const fn test_mm256_mask_adds_epi16() {
13388        let a = _mm256_set1_epi16(1);
13389        let b = _mm256_set1_epi16(i16::MAX);
13390        let r = _mm256_mask_adds_epi16(a, 0, a, b);
13391        assert_eq_m256i(r, a);
13392        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
13393        #[rustfmt::skip]
13394        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13395        assert_eq_m256i(r, e);
13396    }
13397
13398    #[simd_test(enable = "avx512bw,avx512vl")]
13399    const fn test_mm256_maskz_adds_epi16() {
13400        let a = _mm256_set1_epi16(1);
13401        let b = _mm256_set1_epi16(i16::MAX);
13402        let r = _mm256_maskz_adds_epi16(0, a, b);
13403        assert_eq_m256i(r, _mm256_setzero_si256());
13404        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
13405        #[rustfmt::skip]
13406        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13407        assert_eq_m256i(r, e);
13408    }
13409
13410    #[simd_test(enable = "avx512bw,avx512vl")]
13411    const fn test_mm_mask_adds_epi16() {
13412        let a = _mm_set1_epi16(1);
13413        let b = _mm_set1_epi16(i16::MAX);
13414        let r = _mm_mask_adds_epi16(a, 0, a, b);
13415        assert_eq_m128i(r, a);
13416        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
13417        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13418        assert_eq_m128i(r, e);
13419    }
13420
13421    #[simd_test(enable = "avx512bw,avx512vl")]
13422    const fn test_mm_maskz_adds_epi16() {
13423        let a = _mm_set1_epi16(1);
13424        let b = _mm_set1_epi16(i16::MAX);
13425        let r = _mm_maskz_adds_epi16(0, a, b);
13426        assert_eq_m128i(r, _mm_setzero_si128());
13427        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
13428        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13429        assert_eq_m128i(r, e);
13430    }
13431
13432    #[simd_test(enable = "avx512bw")]
13433    const fn test_mm512_adds_epi8() {
13434        let a = _mm512_set1_epi8(1);
13435        let b = _mm512_set1_epi8(i8::MAX);
13436        let r = _mm512_adds_epi8(a, b);
13437        let e = _mm512_set1_epi8(i8::MAX);
13438        assert_eq_m512i(r, e);
13439    }
13440
13441    #[simd_test(enable = "avx512bw")]
13442    const fn test_mm512_mask_adds_epi8() {
13443        let a = _mm512_set1_epi8(1);
13444        let b = _mm512_set1_epi8(i8::MAX);
13445        let r = _mm512_mask_adds_epi8(a, 0, a, b);
13446        assert_eq_m512i(r, a);
13447        let r = _mm512_mask_adds_epi8(
13448            a,
13449            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13450            a,
13451            b,
13452        );
13453        #[rustfmt::skip]
13454        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13455                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13456                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13457                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13458        assert_eq_m512i(r, e);
13459    }
13460
13461    #[simd_test(enable = "avx512bw")]
13462    const fn test_mm512_maskz_adds_epi8() {
13463        let a = _mm512_set1_epi8(1);
13464        let b = _mm512_set1_epi8(i8::MAX);
13465        let r = _mm512_maskz_adds_epi8(0, a, b);
13466        assert_eq_m512i(r, _mm512_setzero_si512());
13467        let r = _mm512_maskz_adds_epi8(
13468            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13469            a,
13470            b,
13471        );
13472        #[rustfmt::skip]
13473        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13474                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13475                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13476                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13477        assert_eq_m512i(r, e);
13478    }
13479
13480    #[simd_test(enable = "avx512bw,avx512vl")]
13481    const fn test_mm256_mask_adds_epi8() {
13482        let a = _mm256_set1_epi8(1);
13483        let b = _mm256_set1_epi8(i8::MAX);
13484        let r = _mm256_mask_adds_epi8(a, 0, a, b);
13485        assert_eq_m256i(r, a);
13486        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13487        #[rustfmt::skip]
13488        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13489                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13490        assert_eq_m256i(r, e);
13491    }
13492
13493    #[simd_test(enable = "avx512bw,avx512vl")]
13494    const fn test_mm256_maskz_adds_epi8() {
13495        let a = _mm256_set1_epi8(1);
13496        let b = _mm256_set1_epi8(i8::MAX);
13497        let r = _mm256_maskz_adds_epi8(0, a, b);
13498        assert_eq_m256i(r, _mm256_setzero_si256());
13499        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
13500        #[rustfmt::skip]
13501        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13502                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13503        assert_eq_m256i(r, e);
13504    }
13505
13506    #[simd_test(enable = "avx512bw,avx512vl")]
13507    const fn test_mm_mask_adds_epi8() {
13508        let a = _mm_set1_epi8(1);
13509        let b = _mm_set1_epi8(i8::MAX);
13510        let r = _mm_mask_adds_epi8(a, 0, a, b);
13511        assert_eq_m128i(r, a);
13512        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
13513        #[rustfmt::skip]
13514        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13515        assert_eq_m128i(r, e);
13516    }
13517
13518    #[simd_test(enable = "avx512bw,avx512vl")]
13519    const fn test_mm_maskz_adds_epi8() {
13520        let a = _mm_set1_epi8(1);
13521        let b = _mm_set1_epi8(i8::MAX);
13522        let r = _mm_maskz_adds_epi8(0, a, b);
13523        assert_eq_m128i(r, _mm_setzero_si128());
13524        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
13525        #[rustfmt::skip]
13526        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13527        assert_eq_m128i(r, e);
13528    }
13529
13530    #[simd_test(enable = "avx512bw")]
13531    const fn test_mm512_sub_epi16() {
13532        let a = _mm512_set1_epi16(1);
13533        let b = _mm512_set1_epi16(2);
13534        let r = _mm512_sub_epi16(a, b);
13535        let e = _mm512_set1_epi16(-1);
13536        assert_eq_m512i(r, e);
13537    }
13538
13539    #[simd_test(enable = "avx512bw")]
13540    const fn test_mm512_mask_sub_epi16() {
13541        let a = _mm512_set1_epi16(1);
13542        let b = _mm512_set1_epi16(2);
13543        let r = _mm512_mask_sub_epi16(a, 0, a, b);
13544        assert_eq_m512i(r, a);
13545        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13546        #[rustfmt::skip]
13547        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13548                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13549        assert_eq_m512i(r, e);
13550    }
13551
13552    #[simd_test(enable = "avx512bw")]
13553    const fn test_mm512_maskz_sub_epi16() {
13554        let a = _mm512_set1_epi16(1);
13555        let b = _mm512_set1_epi16(2);
13556        let r = _mm512_maskz_sub_epi16(0, a, b);
13557        assert_eq_m512i(r, _mm512_setzero_si512());
13558        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
13559        #[rustfmt::skip]
13560        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13561                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13562        assert_eq_m512i(r, e);
13563    }
13564
13565    #[simd_test(enable = "avx512bw,avx512vl")]
13566    const fn test_mm256_mask_sub_epi16() {
13567        let a = _mm256_set1_epi16(1);
13568        let b = _mm256_set1_epi16(2);
13569        let r = _mm256_mask_sub_epi16(a, 0, a, b);
13570        assert_eq_m256i(r, a);
13571        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
13572        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13573        assert_eq_m256i(r, e);
13574    }
13575
13576    #[simd_test(enable = "avx512bw,avx512vl")]
13577    const fn test_mm256_maskz_sub_epi16() {
13578        let a = _mm256_set1_epi16(1);
13579        let b = _mm256_set1_epi16(2);
13580        let r = _mm256_maskz_sub_epi16(0, a, b);
13581        assert_eq_m256i(r, _mm256_setzero_si256());
13582        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
13583        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13584        assert_eq_m256i(r, e);
13585    }
13586
13587    #[simd_test(enable = "avx512bw,avx512vl")]
13588    const fn test_mm_mask_sub_epi16() {
13589        let a = _mm_set1_epi16(1);
13590        let b = _mm_set1_epi16(2);
13591        let r = _mm_mask_sub_epi16(a, 0, a, b);
13592        assert_eq_m128i(r, a);
13593        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
13594        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
13595        assert_eq_m128i(r, e);
13596    }
13597
13598    #[simd_test(enable = "avx512bw,avx512vl")]
13599    const fn test_mm_maskz_sub_epi16() {
13600        let a = _mm_set1_epi16(1);
13601        let b = _mm_set1_epi16(2);
13602        let r = _mm_maskz_sub_epi16(0, a, b);
13603        assert_eq_m128i(r, _mm_setzero_si128());
13604        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
13605        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
13606        assert_eq_m128i(r, e);
13607    }
13608
13609    #[simd_test(enable = "avx512bw")]
13610    const fn test_mm512_sub_epi8() {
13611        let a = _mm512_set1_epi8(1);
13612        let b = _mm512_set1_epi8(2);
13613        let r = _mm512_sub_epi8(a, b);
13614        let e = _mm512_set1_epi8(-1);
13615        assert_eq_m512i(r, e);
13616    }
13617
13618    #[simd_test(enable = "avx512bw")]
13619    const fn test_mm512_mask_sub_epi8() {
13620        let a = _mm512_set1_epi8(1);
13621        let b = _mm512_set1_epi8(2);
13622        let r = _mm512_mask_sub_epi8(a, 0, a, b);
13623        assert_eq_m512i(r, a);
13624        let r = _mm512_mask_sub_epi8(
13625            a,
13626            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13627            a,
13628            b,
13629        );
13630        #[rustfmt::skip]
13631        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13632                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13633                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13634                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13635        assert_eq_m512i(r, e);
13636    }
13637
13638    #[simd_test(enable = "avx512bw")]
13639    const fn test_mm512_maskz_sub_epi8() {
13640        let a = _mm512_set1_epi8(1);
13641        let b = _mm512_set1_epi8(2);
13642        let r = _mm512_maskz_sub_epi8(0, a, b);
13643        assert_eq_m512i(r, _mm512_setzero_si512());
13644        let r = _mm512_maskz_sub_epi8(
13645            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13646            a,
13647            b,
13648        );
13649        #[rustfmt::skip]
13650        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13651                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13652                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13653                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13654        assert_eq_m512i(r, e);
13655    }
13656
13657    #[simd_test(enable = "avx512bw,avx512vl")]
13658    const fn test_mm256_mask_sub_epi8() {
13659        let a = _mm256_set1_epi8(1);
13660        let b = _mm256_set1_epi8(2);
13661        let r = _mm256_mask_sub_epi8(a, 0, a, b);
13662        assert_eq_m256i(r, a);
13663        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13664        #[rustfmt::skip]
13665        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13666                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13667        assert_eq_m256i(r, e);
13668    }
13669
13670    #[simd_test(enable = "avx512bw,avx512vl")]
13671    const fn test_mm256_maskz_sub_epi8() {
13672        let a = _mm256_set1_epi8(1);
13673        let b = _mm256_set1_epi8(2);
13674        let r = _mm256_maskz_sub_epi8(0, a, b);
13675        assert_eq_m256i(r, _mm256_setzero_si256());
13676        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
13677        #[rustfmt::skip]
13678        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13679                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13680        assert_eq_m256i(r, e);
13681    }
13682
13683    #[simd_test(enable = "avx512bw,avx512vl")]
13684    const fn test_mm_mask_sub_epi8() {
13685        let a = _mm_set1_epi8(1);
13686        let b = _mm_set1_epi8(2);
13687        let r = _mm_mask_sub_epi8(a, 0, a, b);
13688        assert_eq_m128i(r, a);
13689        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
13690        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13691        assert_eq_m128i(r, e);
13692    }
13693
13694    #[simd_test(enable = "avx512bw,avx512vl")]
13695    const fn test_mm_maskz_sub_epi8() {
13696        let a = _mm_set1_epi8(1);
13697        let b = _mm_set1_epi8(2);
13698        let r = _mm_maskz_sub_epi8(0, a, b);
13699        assert_eq_m128i(r, _mm_setzero_si128());
13700        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
13701        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13702        assert_eq_m128i(r, e);
13703    }
13704
13705    #[simd_test(enable = "avx512bw")]
13706    const fn test_mm512_subs_epu16() {
13707        let a = _mm512_set1_epi16(1);
13708        let b = _mm512_set1_epi16(u16::MAX as i16);
13709        let r = _mm512_subs_epu16(a, b);
13710        let e = _mm512_set1_epi16(0);
13711        assert_eq_m512i(r, e);
13712    }
13713
13714    #[simd_test(enable = "avx512bw")]
13715    const fn test_mm512_mask_subs_epu16() {
13716        let a = _mm512_set1_epi16(1);
13717        let b = _mm512_set1_epi16(u16::MAX as i16);
13718        let r = _mm512_mask_subs_epu16(a, 0, a, b);
13719        assert_eq_m512i(r, a);
13720        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13721        #[rustfmt::skip]
13722        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13723                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13724        assert_eq_m512i(r, e);
13725    }
13726
13727    #[simd_test(enable = "avx512bw")]
13728    const fn test_mm512_maskz_subs_epu16() {
13729        let a = _mm512_set1_epi16(1);
13730        let b = _mm512_set1_epi16(u16::MAX as i16);
13731        let r = _mm512_maskz_subs_epu16(0, a, b);
13732        assert_eq_m512i(r, _mm512_setzero_si512());
13733        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
13734        #[rustfmt::skip]
13735        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13736                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13737        assert_eq_m512i(r, e);
13738    }
13739
13740    #[simd_test(enable = "avx512bw,avx512vl")]
13741    const fn test_mm256_mask_subs_epu16() {
13742        let a = _mm256_set1_epi16(1);
13743        let b = _mm256_set1_epi16(u16::MAX as i16);
13744        let r = _mm256_mask_subs_epu16(a, 0, a, b);
13745        assert_eq_m256i(r, a);
13746        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
13747        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13748        assert_eq_m256i(r, e);
13749    }
13750
13751    #[simd_test(enable = "avx512bw,avx512vl")]
13752    const fn test_mm256_maskz_subs_epu16() {
13753        let a = _mm256_set1_epi16(1);
13754        let b = _mm256_set1_epi16(u16::MAX as i16);
13755        let r = _mm256_maskz_subs_epu16(0, a, b);
13756        assert_eq_m256i(r, _mm256_setzero_si256());
13757        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
13758        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13759        assert_eq_m256i(r, e);
13760    }
13761
13762    #[simd_test(enable = "avx512bw,avx512vl")]
13763    const fn test_mm_mask_subs_epu16() {
13764        let a = _mm_set1_epi16(1);
13765        let b = _mm_set1_epi16(u16::MAX as i16);
13766        let r = _mm_mask_subs_epu16(a, 0, a, b);
13767        assert_eq_m128i(r, a);
13768        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
13769        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13770        assert_eq_m128i(r, e);
13771    }
13772
13773    #[simd_test(enable = "avx512bw,avx512vl")]
13774    const fn test_mm_maskz_subs_epu16() {
13775        let a = _mm_set1_epi16(1);
13776        let b = _mm_set1_epi16(u16::MAX as i16);
13777        let r = _mm_maskz_subs_epu16(0, a, b);
13778        assert_eq_m128i(r, _mm_setzero_si128());
13779        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
13780        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13781        assert_eq_m128i(r, e);
13782    }
13783
13784    #[simd_test(enable = "avx512bw")]
13785    const fn test_mm512_subs_epu8() {
13786        let a = _mm512_set1_epi8(1);
13787        let b = _mm512_set1_epi8(u8::MAX as i8);
13788        let r = _mm512_subs_epu8(a, b);
13789        let e = _mm512_set1_epi8(0);
13790        assert_eq_m512i(r, e);
13791    }
13792
13793    #[simd_test(enable = "avx512bw")]
13794    const fn test_mm512_mask_subs_epu8() {
13795        let a = _mm512_set1_epi8(1);
13796        let b = _mm512_set1_epi8(u8::MAX as i8);
13797        let r = _mm512_mask_subs_epu8(a, 0, a, b);
13798        assert_eq_m512i(r, a);
13799        let r = _mm512_mask_subs_epu8(
13800            a,
13801            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13802            a,
13803            b,
13804        );
13805        #[rustfmt::skip]
13806        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13807                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13808                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13809                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13810        assert_eq_m512i(r, e);
13811    }
13812
13813    #[simd_test(enable = "avx512bw")]
13814    const fn test_mm512_maskz_subs_epu8() {
13815        let a = _mm512_set1_epi8(1);
13816        let b = _mm512_set1_epi8(u8::MAX as i8);
13817        let r = _mm512_maskz_subs_epu8(0, a, b);
13818        assert_eq_m512i(r, _mm512_setzero_si512());
13819        let r = _mm512_maskz_subs_epu8(
13820            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13821            a,
13822            b,
13823        );
13824        #[rustfmt::skip]
13825        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13826                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13827                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13828                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13829        assert_eq_m512i(r, e);
13830    }
13831
13832    #[simd_test(enable = "avx512bw,avx512vl")]
13833    const fn test_mm256_mask_subs_epu8() {
13834        let a = _mm256_set1_epi8(1);
13835        let b = _mm256_set1_epi8(u8::MAX as i8);
13836        let r = _mm256_mask_subs_epu8(a, 0, a, b);
13837        assert_eq_m256i(r, a);
13838        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13839        #[rustfmt::skip]
13840        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13841                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13842        assert_eq_m256i(r, e);
13843    }
13844
13845    #[simd_test(enable = "avx512bw,avx512vl")]
13846    const fn test_mm256_maskz_subs_epu8() {
13847        let a = _mm256_set1_epi8(1);
13848        let b = _mm256_set1_epi8(u8::MAX as i8);
13849        let r = _mm256_maskz_subs_epu8(0, a, b);
13850        assert_eq_m256i(r, _mm256_setzero_si256());
13851        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
13852        #[rustfmt::skip]
13853        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13854                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13855        assert_eq_m256i(r, e);
13856    }
13857
13858    #[simd_test(enable = "avx512bw,avx512vl")]
13859    const fn test_mm_mask_subs_epu8() {
13860        let a = _mm_set1_epi8(1);
13861        let b = _mm_set1_epi8(u8::MAX as i8);
13862        let r = _mm_mask_subs_epu8(a, 0, a, b);
13863        assert_eq_m128i(r, a);
13864        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
13865        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13866        assert_eq_m128i(r, e);
13867    }
13868
13869    #[simd_test(enable = "avx512bw,avx512vl")]
13870    const fn test_mm_maskz_subs_epu8() {
13871        let a = _mm_set1_epi8(1);
13872        let b = _mm_set1_epi8(u8::MAX as i8);
13873        let r = _mm_maskz_subs_epu8(0, a, b);
13874        assert_eq_m128i(r, _mm_setzero_si128());
13875        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
13876        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13877        assert_eq_m128i(r, e);
13878    }
13879
13880    #[simd_test(enable = "avx512bw")]
13881    const fn test_mm512_subs_epi16() {
13882        let a = _mm512_set1_epi16(-1);
13883        let b = _mm512_set1_epi16(i16::MAX);
13884        let r = _mm512_subs_epi16(a, b);
13885        let e = _mm512_set1_epi16(i16::MIN);
13886        assert_eq_m512i(r, e);
13887    }
13888
13889    #[simd_test(enable = "avx512bw")]
13890    const fn test_mm512_mask_subs_epi16() {
13891        let a = _mm512_set1_epi16(-1);
13892        let b = _mm512_set1_epi16(i16::MAX);
13893        let r = _mm512_mask_subs_epi16(a, 0, a, b);
13894        assert_eq_m512i(r, a);
13895        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13896        #[rustfmt::skip]
13897        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13898                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13899        assert_eq_m512i(r, e);
13900    }
13901
13902    #[simd_test(enable = "avx512bw")]
13903    const fn test_mm512_maskz_subs_epi16() {
13904        let a = _mm512_set1_epi16(-1);
13905        let b = _mm512_set1_epi16(i16::MAX);
13906        let r = _mm512_maskz_subs_epi16(0, a, b);
13907        assert_eq_m512i(r, _mm512_setzero_si512());
13908        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13909        #[rustfmt::skip]
13910        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13911                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13912        assert_eq_m512i(r, e);
13913    }
13914
13915    #[simd_test(enable = "avx512bw,avx512vl")]
13916    const fn test_mm256_mask_subs_epi16() {
13917        let a = _mm256_set1_epi16(-1);
13918        let b = _mm256_set1_epi16(i16::MAX);
13919        let r = _mm256_mask_subs_epi16(a, 0, a, b);
13920        assert_eq_m256i(r, a);
13921        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
13922        #[rustfmt::skip]
13923        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13924        assert_eq_m256i(r, e);
13925    }
13926
13927    #[simd_test(enable = "avx512bw,avx512vl")]
13928    const fn test_mm256_maskz_subs_epi16() {
13929        let a = _mm256_set1_epi16(-1);
13930        let b = _mm256_set1_epi16(i16::MAX);
13931        let r = _mm256_maskz_subs_epi16(0, a, b);
13932        assert_eq_m256i(r, _mm256_setzero_si256());
13933        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
13934        #[rustfmt::skip]
13935        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13936        assert_eq_m256i(r, e);
13937    }
13938
13939    #[simd_test(enable = "avx512bw,avx512vl")]
13940    const fn test_mm_mask_subs_epi16() {
13941        let a = _mm_set1_epi16(-1);
13942        let b = _mm_set1_epi16(i16::MAX);
13943        let r = _mm_mask_subs_epi16(a, 0, a, b);
13944        assert_eq_m128i(r, a);
13945        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
13946        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13947        assert_eq_m128i(r, e);
13948    }
13949
13950    #[simd_test(enable = "avx512bw,avx512vl")]
13951    const fn test_mm_maskz_subs_epi16() {
13952        let a = _mm_set1_epi16(-1);
13953        let b = _mm_set1_epi16(i16::MAX);
13954        let r = _mm_maskz_subs_epi16(0, a, b);
13955        assert_eq_m128i(r, _mm_setzero_si128());
13956        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
13957        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13958        assert_eq_m128i(r, e);
13959    }
13960
13961    #[simd_test(enable = "avx512bw")]
13962    const fn test_mm512_subs_epi8() {
13963        let a = _mm512_set1_epi8(-1);
13964        let b = _mm512_set1_epi8(i8::MAX);
13965        let r = _mm512_subs_epi8(a, b);
13966        let e = _mm512_set1_epi8(i8::MIN);
13967        assert_eq_m512i(r, e);
13968    }
13969
13970    #[simd_test(enable = "avx512bw")]
13971    const fn test_mm512_mask_subs_epi8() {
13972        let a = _mm512_set1_epi8(-1);
13973        let b = _mm512_set1_epi8(i8::MAX);
13974        let r = _mm512_mask_subs_epi8(a, 0, a, b);
13975        assert_eq_m512i(r, a);
13976        let r = _mm512_mask_subs_epi8(
13977            a,
13978            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13979            a,
13980            b,
13981        );
13982        #[rustfmt::skip]
13983        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13984                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13985                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13986                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13987        assert_eq_m512i(r, e);
13988    }
13989
13990    #[simd_test(enable = "avx512bw")]
13991    const fn test_mm512_maskz_subs_epi8() {
13992        let a = _mm512_set1_epi8(-1);
13993        let b = _mm512_set1_epi8(i8::MAX);
13994        let r = _mm512_maskz_subs_epi8(0, a, b);
13995        assert_eq_m512i(r, _mm512_setzero_si512());
13996        let r = _mm512_maskz_subs_epi8(
13997            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13998            a,
13999            b,
14000        );
14001        #[rustfmt::skip]
14002        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14003                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14004                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14005                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
14006        assert_eq_m512i(r, e);
14007    }
14008
14009    #[simd_test(enable = "avx512bw,avx512vl")]
14010    const fn test_mm256_mask_subs_epi8() {
14011        let a = _mm256_set1_epi8(-1);
14012        let b = _mm256_set1_epi8(i8::MAX);
14013        let r = _mm256_mask_subs_epi8(a, 0, a, b);
14014        assert_eq_m256i(r, a);
14015        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
14016        #[rustfmt::skip]
14017        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
14018                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
14019        assert_eq_m256i(r, e);
14020    }
14021
14022    #[simd_test(enable = "avx512bw,avx512vl")]
14023    const fn test_mm256_maskz_subs_epi8() {
14024        let a = _mm256_set1_epi8(-1);
14025        let b = _mm256_set1_epi8(i8::MAX);
14026        let r = _mm256_maskz_subs_epi8(0, a, b);
14027        assert_eq_m256i(r, _mm256_setzero_si256());
14028        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
14029        #[rustfmt::skip]
14030        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14031                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
14032        assert_eq_m256i(r, e);
14033    }
14034
14035    #[simd_test(enable = "avx512bw,avx512vl")]
14036    const fn test_mm_mask_subs_epi8() {
14037        let a = _mm_set1_epi8(-1);
14038        let b = _mm_set1_epi8(i8::MAX);
14039        let r = _mm_mask_subs_epi8(a, 0, a, b);
14040        assert_eq_m128i(r, a);
14041        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
14042        #[rustfmt::skip]
14043        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
14044        assert_eq_m128i(r, e);
14045    }
14046
14047    #[simd_test(enable = "avx512bw,avx512vl")]
14048    const fn test_mm_maskz_subs_epi8() {
14049        let a = _mm_set1_epi8(-1);
14050        let b = _mm_set1_epi8(i8::MAX);
14051        let r = _mm_maskz_subs_epi8(0, a, b);
14052        assert_eq_m128i(r, _mm_setzero_si128());
14053        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
14054        #[rustfmt::skip]
14055        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
14056        assert_eq_m128i(r, e);
14057    }
14058
14059    #[simd_test(enable = "avx512bw")]
14060    const fn test_mm512_mulhi_epu16() {
14061        let a = _mm512_set1_epi16(1);
14062        let b = _mm512_set1_epi16(1);
14063        let r = _mm512_mulhi_epu16(a, b);
14064        let e = _mm512_set1_epi16(0);
14065        assert_eq_m512i(r, e);
14066    }
14067
14068    #[simd_test(enable = "avx512bw")]
14069    const fn test_mm512_mask_mulhi_epu16() {
14070        let a = _mm512_set1_epi16(1);
14071        let b = _mm512_set1_epi16(1);
14072        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
14073        assert_eq_m512i(r, a);
14074        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
14075        #[rustfmt::skip]
14076        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14077                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14078        assert_eq_m512i(r, e);
14079    }
14080
14081    #[simd_test(enable = "avx512bw")]
14082    const fn test_mm512_maskz_mulhi_epu16() {
14083        let a = _mm512_set1_epi16(1);
14084        let b = _mm512_set1_epi16(1);
14085        let r = _mm512_maskz_mulhi_epu16(0, a, b);
14086        assert_eq_m512i(r, _mm512_setzero_si512());
14087        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
14088        #[rustfmt::skip]
14089        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14090                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14091        assert_eq_m512i(r, e);
14092    }
14093
14094    #[simd_test(enable = "avx512bw,avx512vl")]
14095    const fn test_mm256_mask_mulhi_epu16() {
14096        let a = _mm256_set1_epi16(1);
14097        let b = _mm256_set1_epi16(1);
14098        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
14099        assert_eq_m256i(r, a);
14100        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
14101        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14102        assert_eq_m256i(r, e);
14103    }
14104
14105    #[simd_test(enable = "avx512bw,avx512vl")]
14106    const fn test_mm256_maskz_mulhi_epu16() {
14107        let a = _mm256_set1_epi16(1);
14108        let b = _mm256_set1_epi16(1);
14109        let r = _mm256_maskz_mulhi_epu16(0, a, b);
14110        assert_eq_m256i(r, _mm256_setzero_si256());
14111        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
14112        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14113        assert_eq_m256i(r, e);
14114    }
14115
14116    #[simd_test(enable = "avx512bw,avx512vl")]
14117    const fn test_mm_mask_mulhi_epu16() {
14118        let a = _mm_set1_epi16(1);
14119        let b = _mm_set1_epi16(1);
14120        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
14121        assert_eq_m128i(r, a);
14122        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
14123        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14124        assert_eq_m128i(r, e);
14125    }
14126
14127    #[simd_test(enable = "avx512bw,avx512vl")]
14128    const fn test_mm_maskz_mulhi_epu16() {
14129        let a = _mm_set1_epi16(1);
14130        let b = _mm_set1_epi16(1);
14131        let r = _mm_maskz_mulhi_epu16(0, a, b);
14132        assert_eq_m128i(r, _mm_setzero_si128());
14133        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
14134        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14135        assert_eq_m128i(r, e);
14136    }
14137
14138    #[simd_test(enable = "avx512bw")]
14139    const fn test_mm512_mulhi_epi16() {
14140        let a = _mm512_set1_epi16(1);
14141        let b = _mm512_set1_epi16(1);
14142        let r = _mm512_mulhi_epi16(a, b);
14143        let e = _mm512_set1_epi16(0);
14144        assert_eq_m512i(r, e);
14145    }
14146
14147    #[simd_test(enable = "avx512bw")]
14148    const fn test_mm512_mask_mulhi_epi16() {
14149        let a = _mm512_set1_epi16(1);
14150        let b = _mm512_set1_epi16(1);
14151        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
14152        assert_eq_m512i(r, a);
14153        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14154        #[rustfmt::skip]
14155        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14156                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14157        assert_eq_m512i(r, e);
14158    }
14159
14160    #[simd_test(enable = "avx512bw")]
14161    const fn test_mm512_maskz_mulhi_epi16() {
14162        let a = _mm512_set1_epi16(1);
14163        let b = _mm512_set1_epi16(1);
14164        let r = _mm512_maskz_mulhi_epi16(0, a, b);
14165        assert_eq_m512i(r, _mm512_setzero_si512());
14166        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
14167        #[rustfmt::skip]
14168        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14169                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14170        assert_eq_m512i(r, e);
14171    }
14172
14173    #[simd_test(enable = "avx512bw,avx512vl")]
14174    const fn test_mm256_mask_mulhi_epi16() {
14175        let a = _mm256_set1_epi16(1);
14176        let b = _mm256_set1_epi16(1);
14177        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
14178        assert_eq_m256i(r, a);
14179        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
14180        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14181        assert_eq_m256i(r, e);
14182    }
14183
14184    #[simd_test(enable = "avx512bw,avx512vl")]
14185    const fn test_mm256_maskz_mulhi_epi16() {
14186        let a = _mm256_set1_epi16(1);
14187        let b = _mm256_set1_epi16(1);
14188        let r = _mm256_maskz_mulhi_epi16(0, a, b);
14189        assert_eq_m256i(r, _mm256_setzero_si256());
14190        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
14191        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14192        assert_eq_m256i(r, e);
14193    }
14194
14195    #[simd_test(enable = "avx512bw,avx512vl")]
14196    const fn test_mm_mask_mulhi_epi16() {
14197        let a = _mm_set1_epi16(1);
14198        let b = _mm_set1_epi16(1);
14199        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
14200        assert_eq_m128i(r, a);
14201        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
14202        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14203        assert_eq_m128i(r, e);
14204    }
14205
14206    #[simd_test(enable = "avx512bw,avx512vl")]
14207    const fn test_mm_maskz_mulhi_epi16() {
14208        let a = _mm_set1_epi16(1);
14209        let b = _mm_set1_epi16(1);
14210        let r = _mm_maskz_mulhi_epi16(0, a, b);
14211        assert_eq_m128i(r, _mm_setzero_si128());
14212        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
14213        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14214        assert_eq_m128i(r, e);
14215    }
14216
14217    #[simd_test(enable = "avx512bw")]
14218    fn test_mm512_mulhrs_epi16() {
14219        let a = _mm512_set1_epi16(1);
14220        let b = _mm512_set1_epi16(1);
14221        let r = _mm512_mulhrs_epi16(a, b);
14222        let e = _mm512_set1_epi16(0);
14223        assert_eq_m512i(r, e);
14224    }
14225
14226    #[simd_test(enable = "avx512bw")]
14227    fn test_mm512_mask_mulhrs_epi16() {
14228        let a = _mm512_set1_epi16(1);
14229        let b = _mm512_set1_epi16(1);
14230        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
14231        assert_eq_m512i(r, a);
14232        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14233        #[rustfmt::skip]
14234        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14235                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14236        assert_eq_m512i(r, e);
14237    }
14238
14239    #[simd_test(enable = "avx512bw")]
14240    fn test_mm512_maskz_mulhrs_epi16() {
14241        let a = _mm512_set1_epi16(1);
14242        let b = _mm512_set1_epi16(1);
14243        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
14244        assert_eq_m512i(r, _mm512_setzero_si512());
14245        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
14246        #[rustfmt::skip]
14247        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14248                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14249        assert_eq_m512i(r, e);
14250    }
14251
14252    #[simd_test(enable = "avx512bw,avx512vl")]
14253    fn test_mm256_mask_mulhrs_epi16() {
14254        let a = _mm256_set1_epi16(1);
14255        let b = _mm256_set1_epi16(1);
14256        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
14257        assert_eq_m256i(r, a);
14258        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
14259        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14260        assert_eq_m256i(r, e);
14261    }
14262
14263    #[simd_test(enable = "avx512bw,avx512vl")]
14264    fn test_mm256_maskz_mulhrs_epi16() {
14265        let a = _mm256_set1_epi16(1);
14266        let b = _mm256_set1_epi16(1);
14267        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
14268        assert_eq_m256i(r, _mm256_setzero_si256());
14269        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
14270        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14271        assert_eq_m256i(r, e);
14272    }
14273
14274    #[simd_test(enable = "avx512bw,avx512vl")]
14275    fn test_mm_mask_mulhrs_epi16() {
14276        let a = _mm_set1_epi16(1);
14277        let b = _mm_set1_epi16(1);
14278        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
14279        assert_eq_m128i(r, a);
14280        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
14281        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14282        assert_eq_m128i(r, e);
14283    }
14284
14285    #[simd_test(enable = "avx512bw,avx512vl")]
14286    fn test_mm_maskz_mulhrs_epi16() {
14287        let a = _mm_set1_epi16(1);
14288        let b = _mm_set1_epi16(1);
14289        let r = _mm_maskz_mulhrs_epi16(0, a, b);
14290        assert_eq_m128i(r, _mm_setzero_si128());
14291        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
14292        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14293        assert_eq_m128i(r, e);
14294    }
14295
14296    #[simd_test(enable = "avx512bw")]
14297    const fn test_mm512_mullo_epi16() {
14298        let a = _mm512_set1_epi16(1);
14299        let b = _mm512_set1_epi16(1);
14300        let r = _mm512_mullo_epi16(a, b);
14301        let e = _mm512_set1_epi16(1);
14302        assert_eq_m512i(r, e);
14303    }
14304
14305    #[simd_test(enable = "avx512bw")]
14306    const fn test_mm512_mask_mullo_epi16() {
14307        let a = _mm512_set1_epi16(1);
14308        let b = _mm512_set1_epi16(1);
14309        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
14310        assert_eq_m512i(r, a);
14311        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14312        #[rustfmt::skip]
14313        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14314                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14315        assert_eq_m512i(r, e);
14316    }
14317
14318    #[simd_test(enable = "avx512bw")]
14319    const fn test_mm512_maskz_mullo_epi16() {
14320        let a = _mm512_set1_epi16(1);
14321        let b = _mm512_set1_epi16(1);
14322        let r = _mm512_maskz_mullo_epi16(0, a, b);
14323        assert_eq_m512i(r, _mm512_setzero_si512());
14324        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
14325        #[rustfmt::skip]
14326        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14327                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14328        assert_eq_m512i(r, e);
14329    }
14330
14331    #[simd_test(enable = "avx512bw,avx512vl")]
14332    const fn test_mm256_mask_mullo_epi16() {
14333        let a = _mm256_set1_epi16(1);
14334        let b = _mm256_set1_epi16(1);
14335        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
14336        assert_eq_m256i(r, a);
14337        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
14338        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14339        assert_eq_m256i(r, e);
14340    }
14341
14342    #[simd_test(enable = "avx512bw,avx512vl")]
14343    const fn test_mm256_maskz_mullo_epi16() {
14344        let a = _mm256_set1_epi16(1);
14345        let b = _mm256_set1_epi16(1);
14346        let r = _mm256_maskz_mullo_epi16(0, a, b);
14347        assert_eq_m256i(r, _mm256_setzero_si256());
14348        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
14349        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14350        assert_eq_m256i(r, e);
14351    }
14352
14353    #[simd_test(enable = "avx512bw,avx512vl")]
14354    const fn test_mm_mask_mullo_epi16() {
14355        let a = _mm_set1_epi16(1);
14356        let b = _mm_set1_epi16(1);
14357        let r = _mm_mask_mullo_epi16(a, 0, a, b);
14358        assert_eq_m128i(r, a);
14359        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
14360        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
14361        assert_eq_m128i(r, e);
14362    }
14363
14364    #[simd_test(enable = "avx512bw,avx512vl")]
14365    const fn test_mm_maskz_mullo_epi16() {
14366        let a = _mm_set1_epi16(1);
14367        let b = _mm_set1_epi16(1);
14368        let r = _mm_maskz_mullo_epi16(0, a, b);
14369        assert_eq_m128i(r, _mm_setzero_si128());
14370        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
14371        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
14372        assert_eq_m128i(r, e);
14373    }
14374
14375    #[simd_test(enable = "avx512bw")]
14376    const fn test_mm512_max_epu16() {
14377        #[rustfmt::skip]
14378        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14379                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14380        #[rustfmt::skip]
14381        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14382                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14383        let r = _mm512_max_epu16(a, b);
14384        #[rustfmt::skip]
14385        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14386                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14387        assert_eq_m512i(r, e);
14388    }
14389
14390    #[simd_test(enable = "avx512bw")]
14391    const fn test_mm512_mask_max_epu16() {
14392        #[rustfmt::skip]
14393        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14394                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14395        #[rustfmt::skip]
14396        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14397                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14398        let r = _mm512_mask_max_epu16(a, 0, a, b);
14399        assert_eq_m512i(r, a);
14400        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14401        #[rustfmt::skip]
14402        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14403                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14404        assert_eq_m512i(r, e);
14405    }
14406
14407    #[simd_test(enable = "avx512bw")]
14408    const fn test_mm512_maskz_max_epu16() {
14409        #[rustfmt::skip]
14410        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14411                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14412        #[rustfmt::skip]
14413        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14414                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14415        let r = _mm512_maskz_max_epu16(0, a, b);
14416        assert_eq_m512i(r, _mm512_setzero_si512());
14417        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
14418        #[rustfmt::skip]
14419        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14420                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14421        assert_eq_m512i(r, e);
14422    }
14423
14424    #[simd_test(enable = "avx512bw,avx512vl")]
14425    const fn test_mm256_mask_max_epu16() {
14426        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14427        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14428        let r = _mm256_mask_max_epu16(a, 0, a, b);
14429        assert_eq_m256i(r, a);
14430        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
14431        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14432        assert_eq_m256i(r, e);
14433    }
14434
14435    #[simd_test(enable = "avx512bw,avx512vl")]
14436    const fn test_mm256_maskz_max_epu16() {
14437        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14438        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14439        let r = _mm256_maskz_max_epu16(0, a, b);
14440        assert_eq_m256i(r, _mm256_setzero_si256());
14441        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
14442        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14443        assert_eq_m256i(r, e);
14444    }
14445
14446    #[simd_test(enable = "avx512bw,avx512vl")]
14447    const fn test_mm_mask_max_epu16() {
14448        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14449        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14450        let r = _mm_mask_max_epu16(a, 0, a, b);
14451        assert_eq_m128i(r, a);
14452        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
14453        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14454        assert_eq_m128i(r, e);
14455    }
14456
14457    #[simd_test(enable = "avx512bw,avx512vl")]
14458    const fn test_mm_maskz_max_epu16() {
14459        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14460        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14461        let r = _mm_maskz_max_epu16(0, a, b);
14462        assert_eq_m128i(r, _mm_setzero_si128());
14463        let r = _mm_maskz_max_epu16(0b00001111, a, b);
14464        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14465        assert_eq_m128i(r, e);
14466    }
14467
14468    #[simd_test(enable = "avx512bw")]
14469    const fn test_mm512_max_epu8() {
14470        #[rustfmt::skip]
14471        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14472                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14473                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14474                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14475        #[rustfmt::skip]
14476        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14477                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14478                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14479                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14480        let r = _mm512_max_epu8(a, b);
14481        #[rustfmt::skip]
14482        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14483                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14484                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14485                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14486        assert_eq_m512i(r, e);
14487    }
14488
14489    #[simd_test(enable = "avx512bw")]
14490    const fn test_mm512_mask_max_epu8() {
14491        #[rustfmt::skip]
14492        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14493                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14494                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14495                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14496        #[rustfmt::skip]
14497        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14498                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14499                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14500                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14501        let r = _mm512_mask_max_epu8(a, 0, a, b);
14502        assert_eq_m512i(r, a);
14503        let r = _mm512_mask_max_epu8(
14504            a,
14505            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14506            a,
14507            b,
14508        );
14509        #[rustfmt::skip]
14510        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14511                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14512                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14513                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14514        assert_eq_m512i(r, e);
14515    }
14516
14517    #[simd_test(enable = "avx512bw")]
14518    const fn test_mm512_maskz_max_epu8() {
14519        #[rustfmt::skip]
14520        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14521                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14522                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14523                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14524        #[rustfmt::skip]
14525        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14526                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14527                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14528                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14529        let r = _mm512_maskz_max_epu8(0, a, b);
14530        assert_eq_m512i(r, _mm512_setzero_si512());
14531        let r = _mm512_maskz_max_epu8(
14532            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14533            a,
14534            b,
14535        );
14536        #[rustfmt::skip]
14537        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14538                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14539                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14540                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14541        assert_eq_m512i(r, e);
14542    }
14543
14544    #[simd_test(enable = "avx512bw,avx512vl")]
14545    const fn test_mm256_mask_max_epu8() {
14546        #[rustfmt::skip]
14547        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14548                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14549        #[rustfmt::skip]
14550        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14551                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14552        let r = _mm256_mask_max_epu8(a, 0, a, b);
14553        assert_eq_m256i(r, a);
14554        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14555        #[rustfmt::skip]
14556        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14557                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14558        assert_eq_m256i(r, e);
14559    }
14560
14561    #[simd_test(enable = "avx512bw,avx512vl")]
14562    const fn test_mm256_maskz_max_epu8() {
14563        #[rustfmt::skip]
14564        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14565                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14566        #[rustfmt::skip]
14567        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14568                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14569        let r = _mm256_maskz_max_epu8(0, a, b);
14570        assert_eq_m256i(r, _mm256_setzero_si256());
14571        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
14572        #[rustfmt::skip]
14573        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14574                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14575        assert_eq_m256i(r, e);
14576    }
14577
14578    #[simd_test(enable = "avx512bw,avx512vl")]
14579    const fn test_mm_mask_max_epu8() {
14580        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14581        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14582        let r = _mm_mask_max_epu8(a, 0, a, b);
14583        assert_eq_m128i(r, a);
14584        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
14585        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14586        assert_eq_m128i(r, e);
14587    }
14588
14589    #[simd_test(enable = "avx512bw,avx512vl")]
14590    const fn test_mm_maskz_max_epu8() {
14591        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14592        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14593        let r = _mm_maskz_max_epu8(0, a, b);
14594        assert_eq_m128i(r, _mm_setzero_si128());
14595        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
14596        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14597        assert_eq_m128i(r, e);
14598    }
14599
14600    #[simd_test(enable = "avx512bw")]
14601    const fn test_mm512_max_epi16() {
14602        #[rustfmt::skip]
14603        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14604                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14605        #[rustfmt::skip]
14606        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14607                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14608        let r = _mm512_max_epi16(a, b);
14609        #[rustfmt::skip]
14610        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14611                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14612        assert_eq_m512i(r, e);
14613    }
14614
14615    #[simd_test(enable = "avx512bw")]
14616    const fn test_mm512_mask_max_epi16() {
14617        #[rustfmt::skip]
14618        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14619                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14620        #[rustfmt::skip]
14621        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14622                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14623        let r = _mm512_mask_max_epi16(a, 0, a, b);
14624        assert_eq_m512i(r, a);
14625        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14626        #[rustfmt::skip]
14627        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14628                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14629        assert_eq_m512i(r, e);
14630    }
14631
14632    #[simd_test(enable = "avx512bw")]
14633    const fn test_mm512_maskz_max_epi16() {
14634        #[rustfmt::skip]
14635        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14636                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14637        #[rustfmt::skip]
14638        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14639                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14640        let r = _mm512_maskz_max_epi16(0, a, b);
14641        assert_eq_m512i(r, _mm512_setzero_si512());
14642        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
14643        #[rustfmt::skip]
14644        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14645                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14646        assert_eq_m512i(r, e);
14647    }
14648
14649    #[simd_test(enable = "avx512bw,avx512vl")]
14650    const fn test_mm256_mask_max_epi16() {
14651        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14652        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14653        let r = _mm256_mask_max_epi16(a, 0, a, b);
14654        assert_eq_m256i(r, a);
14655        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
14656        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14657        assert_eq_m256i(r, e);
14658    }
14659
14660    #[simd_test(enable = "avx512bw,avx512vl")]
14661    const fn test_mm256_maskz_max_epi16() {
14662        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14663        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14664        let r = _mm256_maskz_max_epi16(0, a, b);
14665        assert_eq_m256i(r, _mm256_setzero_si256());
14666        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
14667        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14668        assert_eq_m256i(r, e);
14669    }
14670
14671    #[simd_test(enable = "avx512bw,avx512vl")]
14672    const fn test_mm_mask_max_epi16() {
14673        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14674        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14675        let r = _mm_mask_max_epi16(a, 0, a, b);
14676        assert_eq_m128i(r, a);
14677        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
14678        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14679        assert_eq_m128i(r, e);
14680    }
14681
14682    #[simd_test(enable = "avx512bw,avx512vl")]
14683    const fn test_mm_maskz_max_epi16() {
14684        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14685        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14686        let r = _mm_maskz_max_epi16(0, a, b);
14687        assert_eq_m128i(r, _mm_setzero_si128());
14688        let r = _mm_maskz_max_epi16(0b00001111, a, b);
14689        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14690        assert_eq_m128i(r, e);
14691    }
14692
14693    #[simd_test(enable = "avx512bw")]
14694    const fn test_mm512_max_epi8() {
14695        #[rustfmt::skip]
14696        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14697                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14698                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14699                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14700        #[rustfmt::skip]
14701        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14702                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14703                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14704                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14705        let r = _mm512_max_epi8(a, b);
14706        #[rustfmt::skip]
14707        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14708                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14709                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14710                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14711        assert_eq_m512i(r, e);
14712    }
14713
14714    #[simd_test(enable = "avx512bw")]
14715    const fn test_mm512_mask_max_epi8() {
14716        #[rustfmt::skip]
14717        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14718                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14719                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14720                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14721        #[rustfmt::skip]
14722        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14723                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14724                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14725                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14726        let r = _mm512_mask_max_epi8(a, 0, a, b);
14727        assert_eq_m512i(r, a);
14728        let r = _mm512_mask_max_epi8(
14729            a,
14730            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14731            a,
14732            b,
14733        );
14734        #[rustfmt::skip]
14735        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14736                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14737                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14738                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14739        assert_eq_m512i(r, e);
14740    }
14741
14742    #[simd_test(enable = "avx512bw")]
14743    const fn test_mm512_maskz_max_epi8() {
14744        #[rustfmt::skip]
14745        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14746                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14747                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14748                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14749        #[rustfmt::skip]
14750        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14751                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14752                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14753                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14754        let r = _mm512_maskz_max_epi8(0, a, b);
14755        assert_eq_m512i(r, _mm512_setzero_si512());
14756        let r = _mm512_maskz_max_epi8(
14757            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14758            a,
14759            b,
14760        );
14761        #[rustfmt::skip]
14762        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14763                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14764                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14765                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14766        assert_eq_m512i(r, e);
14767    }
14768
14769    #[simd_test(enable = "avx512bw,avx512vl")]
14770    const fn test_mm256_mask_max_epi8() {
14771        #[rustfmt::skip]
14772        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14773                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14774        #[rustfmt::skip]
14775        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14776                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14777        let r = _mm256_mask_max_epi8(a, 0, a, b);
14778        assert_eq_m256i(r, a);
14779        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14780        #[rustfmt::skip]
14781        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14782                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14783        assert_eq_m256i(r, e);
14784    }
14785
14786    #[simd_test(enable = "avx512bw,avx512vl")]
14787    const fn test_mm256_maskz_max_epi8() {
14788        #[rustfmt::skip]
14789        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14790                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14791        #[rustfmt::skip]
14792        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14793                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14794        let r = _mm256_maskz_max_epi8(0, a, b);
14795        assert_eq_m256i(r, _mm256_setzero_si256());
14796        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
14797        #[rustfmt::skip]
14798        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14799                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14800        assert_eq_m256i(r, e);
14801    }
14802
14803    #[simd_test(enable = "avx512bw,avx512vl")]
14804    const fn test_mm_mask_max_epi8() {
14805        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14806        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14807        let r = _mm_mask_max_epi8(a, 0, a, b);
14808        assert_eq_m128i(r, a);
14809        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
14810        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14811        assert_eq_m128i(r, e);
14812    }
14813
14814    #[simd_test(enable = "avx512bw,avx512vl")]
14815    const fn test_mm_maskz_max_epi8() {
14816        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14817        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14818        let r = _mm_maskz_max_epi8(0, a, b);
14819        assert_eq_m128i(r, _mm_setzero_si128());
14820        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
14821        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14822        assert_eq_m128i(r, e);
14823    }
14824
14825    #[simd_test(enable = "avx512bw")]
14826    const fn test_mm512_min_epu16() {
14827        #[rustfmt::skip]
14828        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14829                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14830        #[rustfmt::skip]
14831        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14832                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14833        let r = _mm512_min_epu16(a, b);
14834        #[rustfmt::skip]
14835        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14836                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14837        assert_eq_m512i(r, e);
14838    }
14839
14840    #[simd_test(enable = "avx512bw")]
14841    const fn test_mm512_mask_min_epu16() {
14842        #[rustfmt::skip]
14843        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14844                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14845        #[rustfmt::skip]
14846        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14847                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14848        let r = _mm512_mask_min_epu16(a, 0, a, b);
14849        assert_eq_m512i(r, a);
14850        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14851        #[rustfmt::skip]
14852        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14853                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14854        assert_eq_m512i(r, e);
14855    }
14856
14857    #[simd_test(enable = "avx512bw")]
14858    const fn test_mm512_maskz_min_epu16() {
14859        #[rustfmt::skip]
14860        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14861                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14862        #[rustfmt::skip]
14863        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14864                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14865        let r = _mm512_maskz_min_epu16(0, a, b);
14866        assert_eq_m512i(r, _mm512_setzero_si512());
14867        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
14868        #[rustfmt::skip]
14869        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14870                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14871        assert_eq_m512i(r, e);
14872    }
14873
14874    #[simd_test(enable = "avx512bw,avx512vl")]
14875    const fn test_mm256_mask_min_epu16() {
14876        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14877        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14878        let r = _mm256_mask_min_epu16(a, 0, a, b);
14879        assert_eq_m256i(r, a);
14880        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
14881        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14882        assert_eq_m256i(r, e);
14883    }
14884
14885    #[simd_test(enable = "avx512bw,avx512vl")]
14886    const fn test_mm256_maskz_min_epu16() {
14887        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14888        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14889        let r = _mm256_maskz_min_epu16(0, a, b);
14890        assert_eq_m256i(r, _mm256_setzero_si256());
14891        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
14892        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14893        assert_eq_m256i(r, e);
14894    }
14895
14896    #[simd_test(enable = "avx512bw,avx512vl")]
14897    const fn test_mm_mask_min_epu16() {
14898        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14899        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14900        let r = _mm_mask_min_epu16(a, 0, a, b);
14901        assert_eq_m128i(r, a);
14902        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
14903        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14904        assert_eq_m128i(r, e);
14905    }
14906
14907    #[simd_test(enable = "avx512bw,avx512vl")]
14908    const fn test_mm_maskz_min_epu16() {
14909        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14910        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14911        let r = _mm_maskz_min_epu16(0, a, b);
14912        assert_eq_m128i(r, _mm_setzero_si128());
14913        let r = _mm_maskz_min_epu16(0b00001111, a, b);
14914        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14915        assert_eq_m128i(r, e);
14916    }
14917
14918    #[simd_test(enable = "avx512bw")]
14919    const fn test_mm512_min_epu8() {
14920        #[rustfmt::skip]
14921        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14922                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14923                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14924                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14925        #[rustfmt::skip]
14926        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14927                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14928                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14929                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14930        let r = _mm512_min_epu8(a, b);
14931        #[rustfmt::skip]
14932        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14933                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14934                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14935                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14936        assert_eq_m512i(r, e);
14937    }
14938
14939    #[simd_test(enable = "avx512bw")]
14940    const fn test_mm512_mask_min_epu8() {
14941        #[rustfmt::skip]
14942        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14943                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14944                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14945                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14946        #[rustfmt::skip]
14947        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14948                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14949                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14950                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14951        let r = _mm512_mask_min_epu8(a, 0, a, b);
14952        assert_eq_m512i(r, a);
14953        let r = _mm512_mask_min_epu8(
14954            a,
14955            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14956            a,
14957            b,
14958        );
14959        #[rustfmt::skip]
14960        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14961                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14962                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14963                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14964        assert_eq_m512i(r, e);
14965    }
14966
14967    #[simd_test(enable = "avx512bw")]
14968    const fn test_mm512_maskz_min_epu8() {
14969        #[rustfmt::skip]
14970        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14971                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14972                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14973                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14974        #[rustfmt::skip]
14975        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14976                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14977                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14978                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14979        let r = _mm512_maskz_min_epu8(0, a, b);
14980        assert_eq_m512i(r, _mm512_setzero_si512());
14981        let r = _mm512_maskz_min_epu8(
14982            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14983            a,
14984            b,
14985        );
14986        #[rustfmt::skip]
14987        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14988                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14989                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14990                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14991        assert_eq_m512i(r, e);
14992    }
14993
14994    #[simd_test(enable = "avx512bw,avx512vl")]
14995    const fn test_mm256_mask_min_epu8() {
14996        #[rustfmt::skip]
14997        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14998                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14999        #[rustfmt::skip]
15000        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15001                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15002        let r = _mm256_mask_min_epu8(a, 0, a, b);
15003        assert_eq_m256i(r, a);
15004        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
15005        #[rustfmt::skip]
15006        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15007                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15008        assert_eq_m256i(r, e);
15009    }
15010
15011    #[simd_test(enable = "avx512bw,avx512vl")]
15012    const fn test_mm256_maskz_min_epu8() {
15013        #[rustfmt::skip]
15014        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15015                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15016        #[rustfmt::skip]
15017        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15018                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15019        let r = _mm256_maskz_min_epu8(0, a, b);
15020        assert_eq_m256i(r, _mm256_setzero_si256());
15021        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
15022        #[rustfmt::skip]
15023        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15024                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15025        assert_eq_m256i(r, e);
15026    }
15027
15028    #[simd_test(enable = "avx512bw,avx512vl")]
15029    const fn test_mm_mask_min_epu8() {
15030        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15031        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15032        let r = _mm_mask_min_epu8(a, 0, a, b);
15033        assert_eq_m128i(r, a);
15034        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
15035        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15036        assert_eq_m128i(r, e);
15037    }
15038
15039    #[simd_test(enable = "avx512bw,avx512vl")]
15040    const fn test_mm_maskz_min_epu8() {
15041        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15042        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15043        let r = _mm_maskz_min_epu8(0, a, b);
15044        assert_eq_m128i(r, _mm_setzero_si128());
15045        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
15046        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15047        assert_eq_m128i(r, e);
15048    }
15049
15050    #[simd_test(enable = "avx512bw")]
15051    const fn test_mm512_min_epi16() {
15052        #[rustfmt::skip]
15053        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15054                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15055        #[rustfmt::skip]
15056        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15057                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15058        let r = _mm512_min_epi16(a, b);
15059        #[rustfmt::skip]
15060        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15061                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15062        assert_eq_m512i(r, e);
15063    }
15064
15065    #[simd_test(enable = "avx512bw")]
15066    const fn test_mm512_mask_min_epi16() {
15067        #[rustfmt::skip]
15068        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15069                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15070        #[rustfmt::skip]
15071        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15072                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15073        let r = _mm512_mask_min_epi16(a, 0, a, b);
15074        assert_eq_m512i(r, a);
15075        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
15076        #[rustfmt::skip]
15077        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15078                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15079        assert_eq_m512i(r, e);
15080    }
15081
15082    #[simd_test(enable = "avx512bw")]
15083    const fn test_mm512_maskz_min_epi16() {
15084        #[rustfmt::skip]
15085        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15086                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15087        #[rustfmt::skip]
15088        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15089                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15090        let r = _mm512_maskz_min_epi16(0, a, b);
15091        assert_eq_m512i(r, _mm512_setzero_si512());
15092        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
15093        #[rustfmt::skip]
15094        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15095                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15096        assert_eq_m512i(r, e);
15097    }
15098
15099    #[simd_test(enable = "avx512bw,avx512vl")]
15100    const fn test_mm256_mask_min_epi16() {
15101        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15102        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15103        let r = _mm256_mask_min_epi16(a, 0, a, b);
15104        assert_eq_m256i(r, a);
15105        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
15106        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15107        assert_eq_m256i(r, e);
15108    }
15109
15110    #[simd_test(enable = "avx512bw,avx512vl")]
15111    const fn test_mm256_maskz_min_epi16() {
15112        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15113        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15114        let r = _mm256_maskz_min_epi16(0, a, b);
15115        assert_eq_m256i(r, _mm256_setzero_si256());
15116        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
15117        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15118        assert_eq_m256i(r, e);
15119    }
15120
15121    #[simd_test(enable = "avx512bw,avx512vl")]
15122    const fn test_mm_mask_min_epi16() {
15123        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15124        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
15125        let r = _mm_mask_min_epi16(a, 0, a, b);
15126        assert_eq_m128i(r, a);
15127        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
15128        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
15129        assert_eq_m128i(r, e);
15130    }
15131
15132    #[simd_test(enable = "avx512bw,avx512vl")]
15133    const fn test_mm_maskz_min_epi16() {
15134        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15135        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
15136        let r = _mm_maskz_min_epi16(0, a, b);
15137        assert_eq_m128i(r, _mm_setzero_si128());
15138        let r = _mm_maskz_min_epi16(0b00001111, a, b);
15139        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
15140        assert_eq_m128i(r, e);
15141    }
15142
15143    #[simd_test(enable = "avx512bw")]
15144    const fn test_mm512_min_epi8() {
15145        #[rustfmt::skip]
15146        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15147                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15148                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15149                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15150        #[rustfmt::skip]
15151        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15152                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15153                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15154                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15155        let r = _mm512_min_epi8(a, b);
15156        #[rustfmt::skip]
15157        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15158                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15159                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15160                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15161        assert_eq_m512i(r, e);
15162    }
15163
15164    #[simd_test(enable = "avx512bw")]
15165    const fn test_mm512_mask_min_epi8() {
15166        #[rustfmt::skip]
15167        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15168                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15169                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15170                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15171        #[rustfmt::skip]
15172        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15173                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15174                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15175                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15176        let r = _mm512_mask_min_epi8(a, 0, a, b);
15177        assert_eq_m512i(r, a);
15178        let r = _mm512_mask_min_epi8(
15179            a,
15180            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15181            a,
15182            b,
15183        );
15184        #[rustfmt::skip]
15185        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15186                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15187                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15188                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15189        assert_eq_m512i(r, e);
15190    }
15191
15192    #[simd_test(enable = "avx512bw")]
15193    const fn test_mm512_maskz_min_epi8() {
15194        #[rustfmt::skip]
15195        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15196                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15197                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15198                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15199        #[rustfmt::skip]
15200        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15201                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15202                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15203                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15204        let r = _mm512_maskz_min_epi8(0, a, b);
15205        assert_eq_m512i(r, _mm512_setzero_si512());
15206        let r = _mm512_maskz_min_epi8(
15207            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15208            a,
15209            b,
15210        );
15211        #[rustfmt::skip]
15212        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15213                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15214                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15215                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15216        assert_eq_m512i(r, e);
15217    }
15218
15219    #[simd_test(enable = "avx512bw,avx512vl")]
15220    const fn test_mm256_mask_min_epi8() {
15221        #[rustfmt::skip]
15222        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15223                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15224        #[rustfmt::skip]
15225        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15226                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15227        let r = _mm256_mask_min_epi8(a, 0, a, b);
15228        assert_eq_m256i(r, a);
15229        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
15230        #[rustfmt::skip]
15231        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15232                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15233        assert_eq_m256i(r, e);
15234    }
15235
15236    #[simd_test(enable = "avx512bw,avx512vl")]
15237    const fn test_mm256_maskz_min_epi8() {
15238        #[rustfmt::skip]
15239        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15240                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15241        #[rustfmt::skip]
15242        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15243                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15244        let r = _mm256_maskz_min_epi8(0, a, b);
15245        assert_eq_m256i(r, _mm256_setzero_si256());
15246        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
15247        #[rustfmt::skip]
15248        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15249                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15250        assert_eq_m256i(r, e);
15251    }
15252
15253    #[simd_test(enable = "avx512bw,avx512vl")]
15254    const fn test_mm_mask_min_epi8() {
15255        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15256        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15257        let r = _mm_mask_min_epi8(a, 0, a, b);
15258        assert_eq_m128i(r, a);
15259        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
15260        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15261        assert_eq_m128i(r, e);
15262    }
15263
15264    #[simd_test(enable = "avx512bw,avx512vl")]
15265    const fn test_mm_maskz_min_epi8() {
15266        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15267        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15268        let r = _mm_maskz_min_epi8(0, a, b);
15269        assert_eq_m128i(r, _mm_setzero_si128());
15270        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
15271        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15272        assert_eq_m128i(r, e);
15273    }
15274
15275    #[simd_test(enable = "avx512bw")]
15276    const fn test_mm512_cmplt_epu16_mask() {
15277        let a = _mm512_set1_epi16(-2);
15278        let b = _mm512_set1_epi16(-1);
15279        let m = _mm512_cmplt_epu16_mask(a, b);
15280        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15281    }
15282
15283    #[simd_test(enable = "avx512bw")]
15284    const fn test_mm512_mask_cmplt_epu16_mask() {
15285        let a = _mm512_set1_epi16(-2);
15286        let b = _mm512_set1_epi16(-1);
15287        let mask = 0b01010101_01010101_01010101_01010101;
15288        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
15289        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15290    }
15291
15292    #[simd_test(enable = "avx512bw,avx512vl")]
15293    const fn test_mm256_cmplt_epu16_mask() {
15294        let a = _mm256_set1_epi16(-2);
15295        let b = _mm256_set1_epi16(-1);
15296        let m = _mm256_cmplt_epu16_mask(a, b);
15297        assert_eq!(m, 0b11111111_11111111);
15298    }
15299
15300    #[simd_test(enable = "avx512bw,avx512vl")]
15301    const fn test_mm256_mask_cmplt_epu16_mask() {
15302        let a = _mm256_set1_epi16(-2);
15303        let b = _mm256_set1_epi16(-1);
15304        let mask = 0b01010101_01010101;
15305        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
15306        assert_eq!(r, 0b01010101_01010101);
15307    }
15308
15309    #[simd_test(enable = "avx512bw,avx512vl")]
15310    const fn test_mm_cmplt_epu16_mask() {
15311        let a = _mm_set1_epi16(-2);
15312        let b = _mm_set1_epi16(-1);
15313        let m = _mm_cmplt_epu16_mask(a, b);
15314        assert_eq!(m, 0b11111111);
15315    }
15316
15317    #[simd_test(enable = "avx512bw,avx512vl")]
15318    const fn test_mm_mask_cmplt_epu16_mask() {
15319        let a = _mm_set1_epi16(-2);
15320        let b = _mm_set1_epi16(-1);
15321        let mask = 0b01010101;
15322        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
15323        assert_eq!(r, 0b01010101);
15324    }
15325
15326    #[simd_test(enable = "avx512bw")]
15327    const fn test_mm512_cmplt_epu8_mask() {
15328        let a = _mm512_set1_epi8(-2);
15329        let b = _mm512_set1_epi8(-1);
15330        let m = _mm512_cmplt_epu8_mask(a, b);
15331        assert_eq!(
15332            m,
15333            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15334        );
15335    }
15336
15337    #[simd_test(enable = "avx512bw")]
15338    const fn test_mm512_mask_cmplt_epu8_mask() {
15339        let a = _mm512_set1_epi8(-2);
15340        let b = _mm512_set1_epi8(-1);
15341        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15342        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
15343        assert_eq!(
15344            r,
15345            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15346        );
15347    }
15348
15349    #[simd_test(enable = "avx512bw,avx512vl")]
15350    const fn test_mm256_cmplt_epu8_mask() {
15351        let a = _mm256_set1_epi8(-2);
15352        let b = _mm256_set1_epi8(-1);
15353        let m = _mm256_cmplt_epu8_mask(a, b);
15354        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15355    }
15356
15357    #[simd_test(enable = "avx512bw,avx512vl")]
15358    const fn test_mm256_mask_cmplt_epu8_mask() {
15359        let a = _mm256_set1_epi8(-2);
15360        let b = _mm256_set1_epi8(-1);
15361        let mask = 0b01010101_01010101_01010101_01010101;
15362        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
15363        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15364    }
15365
15366    #[simd_test(enable = "avx512bw,avx512vl")]
15367    const fn test_mm_cmplt_epu8_mask() {
15368        let a = _mm_set1_epi8(-2);
15369        let b = _mm_set1_epi8(-1);
15370        let m = _mm_cmplt_epu8_mask(a, b);
15371        assert_eq!(m, 0b11111111_11111111);
15372    }
15373
15374    #[simd_test(enable = "avx512bw,avx512vl")]
15375    const fn test_mm_mask_cmplt_epu8_mask() {
15376        let a = _mm_set1_epi8(-2);
15377        let b = _mm_set1_epi8(-1);
15378        let mask = 0b01010101_01010101;
15379        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
15380        assert_eq!(r, 0b01010101_01010101);
15381    }
15382
15383    #[simd_test(enable = "avx512bw")]
15384    const fn test_mm512_cmplt_epi16_mask() {
15385        let a = _mm512_set1_epi16(-2);
15386        let b = _mm512_set1_epi16(-1);
15387        let m = _mm512_cmplt_epi16_mask(a, b);
15388        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15389    }
15390
15391    #[simd_test(enable = "avx512bw")]
15392    const fn test_mm512_mask_cmplt_epi16_mask() {
15393        let a = _mm512_set1_epi16(-2);
15394        let b = _mm512_set1_epi16(-1);
15395        let mask = 0b01010101_01010101_01010101_01010101;
15396        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
15397        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15398    }
15399
15400    #[simd_test(enable = "avx512bw,avx512vl")]
15401    const fn test_mm256_cmplt_epi16_mask() {
15402        let a = _mm256_set1_epi16(-2);
15403        let b = _mm256_set1_epi16(-1);
15404        let m = _mm256_cmplt_epi16_mask(a, b);
15405        assert_eq!(m, 0b11111111_11111111);
15406    }
15407
15408    #[simd_test(enable = "avx512bw,avx512vl")]
15409    const fn test_mm256_mask_cmplt_epi16_mask() {
15410        let a = _mm256_set1_epi16(-2);
15411        let b = _mm256_set1_epi16(-1);
15412        let mask = 0b01010101_01010101;
15413        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
15414        assert_eq!(r, 0b01010101_01010101);
15415    }
15416
15417    #[simd_test(enable = "avx512bw,avx512vl")]
15418    const fn test_mm_cmplt_epi16_mask() {
15419        let a = _mm_set1_epi16(-2);
15420        let b = _mm_set1_epi16(-1);
15421        let m = _mm_cmplt_epi16_mask(a, b);
15422        assert_eq!(m, 0b11111111);
15423    }
15424
15425    #[simd_test(enable = "avx512bw,avx512vl")]
15426    const fn test_mm_mask_cmplt_epi16_mask() {
15427        let a = _mm_set1_epi16(-2);
15428        let b = _mm_set1_epi16(-1);
15429        let mask = 0b01010101;
15430        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
15431        assert_eq!(r, 0b01010101);
15432    }
15433
15434    #[simd_test(enable = "avx512bw")]
15435    const fn test_mm512_cmplt_epi8_mask() {
15436        let a = _mm512_set1_epi8(-2);
15437        let b = _mm512_set1_epi8(-1);
15438        let m = _mm512_cmplt_epi8_mask(a, b);
15439        assert_eq!(
15440            m,
15441            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15442        );
15443    }
15444
15445    #[simd_test(enable = "avx512bw")]
15446    const fn test_mm512_mask_cmplt_epi8_mask() {
15447        let a = _mm512_set1_epi8(-2);
15448        let b = _mm512_set1_epi8(-1);
15449        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15450        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
15451        assert_eq!(
15452            r,
15453            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15454        );
15455    }
15456
15457    #[simd_test(enable = "avx512bw,avx512vl")]
15458    const fn test_mm256_cmplt_epi8_mask() {
15459        let a = _mm256_set1_epi8(-2);
15460        let b = _mm256_set1_epi8(-1);
15461        let m = _mm256_cmplt_epi8_mask(a, b);
15462        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15463    }
15464
15465    #[simd_test(enable = "avx512bw,avx512vl")]
15466    const fn test_mm256_mask_cmplt_epi8_mask() {
15467        let a = _mm256_set1_epi8(-2);
15468        let b = _mm256_set1_epi8(-1);
15469        let mask = 0b01010101_01010101_01010101_01010101;
15470        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
15471        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15472    }
15473
15474    #[simd_test(enable = "avx512bw,avx512vl")]
15475    const fn test_mm_cmplt_epi8_mask() {
15476        let a = _mm_set1_epi8(-2);
15477        let b = _mm_set1_epi8(-1);
15478        let m = _mm_cmplt_epi8_mask(a, b);
15479        assert_eq!(m, 0b11111111_11111111);
15480    }
15481
15482    #[simd_test(enable = "avx512bw,avx512vl")]
15483    const fn test_mm_mask_cmplt_epi8_mask() {
15484        let a = _mm_set1_epi8(-2);
15485        let b = _mm_set1_epi8(-1);
15486        let mask = 0b01010101_01010101;
15487        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
15488        assert_eq!(r, 0b01010101_01010101);
15489    }
15490
15491    #[simd_test(enable = "avx512bw")]
15492    const fn test_mm512_cmpgt_epu16_mask() {
15493        let a = _mm512_set1_epi16(2);
15494        let b = _mm512_set1_epi16(1);
15495        let m = _mm512_cmpgt_epu16_mask(a, b);
15496        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15497    }
15498
15499    #[simd_test(enable = "avx512bw")]
15500    const fn test_mm512_mask_cmpgt_epu16_mask() {
15501        let a = _mm512_set1_epi16(2);
15502        let b = _mm512_set1_epi16(1);
15503        let mask = 0b01010101_01010101_01010101_01010101;
15504        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
15505        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15506    }
15507
15508    #[simd_test(enable = "avx512bw,avx512vl")]
15509    const fn test_mm256_cmpgt_epu16_mask() {
15510        let a = _mm256_set1_epi16(2);
15511        let b = _mm256_set1_epi16(1);
15512        let m = _mm256_cmpgt_epu16_mask(a, b);
15513        assert_eq!(m, 0b11111111_11111111);
15514    }
15515
15516    #[simd_test(enable = "avx512bw,avx512vl")]
15517    const fn test_mm256_mask_cmpgt_epu16_mask() {
15518        let a = _mm256_set1_epi16(2);
15519        let b = _mm256_set1_epi16(1);
15520        let mask = 0b01010101_01010101;
15521        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
15522        assert_eq!(r, 0b01010101_01010101);
15523    }
15524
15525    #[simd_test(enable = "avx512bw,avx512vl")]
15526    const fn test_mm_cmpgt_epu16_mask() {
15527        let a = _mm_set1_epi16(2);
15528        let b = _mm_set1_epi16(1);
15529        let m = _mm_cmpgt_epu16_mask(a, b);
15530        assert_eq!(m, 0b11111111);
15531    }
15532
15533    #[simd_test(enable = "avx512bw,avx512vl")]
15534    const fn test_mm_mask_cmpgt_epu16_mask() {
15535        let a = _mm_set1_epi16(2);
15536        let b = _mm_set1_epi16(1);
15537        let mask = 0b01010101;
15538        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
15539        assert_eq!(r, 0b01010101);
15540    }
15541
15542    #[simd_test(enable = "avx512bw")]
15543    const fn test_mm512_cmpgt_epu8_mask() {
15544        let a = _mm512_set1_epi8(2);
15545        let b = _mm512_set1_epi8(1);
15546        let m = _mm512_cmpgt_epu8_mask(a, b);
15547        assert_eq!(
15548            m,
15549            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15550        );
15551    }
15552
15553    #[simd_test(enable = "avx512bw")]
15554    const fn test_mm512_mask_cmpgt_epu8_mask() {
15555        let a = _mm512_set1_epi8(2);
15556        let b = _mm512_set1_epi8(1);
15557        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15558        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
15559        assert_eq!(
15560            r,
15561            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15562        );
15563    }
15564
15565    #[simd_test(enable = "avx512bw,avx512vl")]
15566    const fn test_mm256_cmpgt_epu8_mask() {
15567        let a = _mm256_set1_epi8(2);
15568        let b = _mm256_set1_epi8(1);
15569        let m = _mm256_cmpgt_epu8_mask(a, b);
15570        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15571    }
15572
15573    #[simd_test(enable = "avx512bw,avx512vl")]
15574    const fn test_mm256_mask_cmpgt_epu8_mask() {
15575        let a = _mm256_set1_epi8(2);
15576        let b = _mm256_set1_epi8(1);
15577        let mask = 0b01010101_01010101_01010101_01010101;
15578        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
15579        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15580    }
15581
15582    #[simd_test(enable = "avx512bw,avx512vl")]
15583    const fn test_mm_cmpgt_epu8_mask() {
15584        let a = _mm_set1_epi8(2);
15585        let b = _mm_set1_epi8(1);
15586        let m = _mm_cmpgt_epu8_mask(a, b);
15587        assert_eq!(m, 0b11111111_11111111);
15588    }
15589
15590    #[simd_test(enable = "avx512bw,avx512vl")]
15591    const fn test_mm_mask_cmpgt_epu8_mask() {
15592        let a = _mm_set1_epi8(2);
15593        let b = _mm_set1_epi8(1);
15594        let mask = 0b01010101_01010101;
15595        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
15596        assert_eq!(r, 0b01010101_01010101);
15597    }
15598
15599    #[simd_test(enable = "avx512bw")]
15600    const fn test_mm512_cmpgt_epi16_mask() {
15601        let a = _mm512_set1_epi16(2);
15602        let b = _mm512_set1_epi16(-1);
15603        let m = _mm512_cmpgt_epi16_mask(a, b);
15604        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15605    }
15606
15607    #[simd_test(enable = "avx512bw")]
15608    const fn test_mm512_mask_cmpgt_epi16_mask() {
15609        let a = _mm512_set1_epi16(2);
15610        let b = _mm512_set1_epi16(-1);
15611        let mask = 0b01010101_01010101_01010101_01010101;
15612        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
15613        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15614    }
15615
15616    #[simd_test(enable = "avx512bw,avx512vl")]
15617    const fn test_mm256_cmpgt_epi16_mask() {
15618        let a = _mm256_set1_epi16(2);
15619        let b = _mm256_set1_epi16(-1);
15620        let m = _mm256_cmpgt_epi16_mask(a, b);
15621        assert_eq!(m, 0b11111111_11111111);
15622    }
15623
15624    #[simd_test(enable = "avx512bw,avx512vl")]
15625    const fn test_mm256_mask_cmpgt_epi16_mask() {
15626        let a = _mm256_set1_epi16(2);
15627        let b = _mm256_set1_epi16(-1);
15628        let mask = 0b001010101_01010101;
15629        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
15630        assert_eq!(r, 0b01010101_01010101);
15631    }
15632
15633    #[simd_test(enable = "avx512bw,avx512vl")]
15634    const fn test_mm_cmpgt_epi16_mask() {
15635        let a = _mm_set1_epi16(2);
15636        let b = _mm_set1_epi16(-1);
15637        let m = _mm_cmpgt_epi16_mask(a, b);
15638        assert_eq!(m, 0b11111111);
15639    }
15640
15641    #[simd_test(enable = "avx512bw,avx512vl")]
15642    const fn test_mm_mask_cmpgt_epi16_mask() {
15643        let a = _mm_set1_epi16(2);
15644        let b = _mm_set1_epi16(-1);
15645        let mask = 0b01010101;
15646        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
15647        assert_eq!(r, 0b01010101);
15648    }
15649
15650    #[simd_test(enable = "avx512bw")]
15651    const fn test_mm512_cmpgt_epi8_mask() {
15652        let a = _mm512_set1_epi8(2);
15653        let b = _mm512_set1_epi8(-1);
15654        let m = _mm512_cmpgt_epi8_mask(a, b);
15655        assert_eq!(
15656            m,
15657            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15658        );
15659    }
15660
15661    #[simd_test(enable = "avx512bw")]
15662    const fn test_mm512_mask_cmpgt_epi8_mask() {
15663        let a = _mm512_set1_epi8(2);
15664        let b = _mm512_set1_epi8(-1);
15665        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15666        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
15667        assert_eq!(
15668            r,
15669            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15670        );
15671    }
15672
15673    #[simd_test(enable = "avx512bw,avx512vl")]
15674    const fn test_mm256_cmpgt_epi8_mask() {
15675        let a = _mm256_set1_epi8(2);
15676        let b = _mm256_set1_epi8(-1);
15677        let m = _mm256_cmpgt_epi8_mask(a, b);
15678        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15679    }
15680
15681    #[simd_test(enable = "avx512bw,avx512vl")]
15682    const fn test_mm256_mask_cmpgt_epi8_mask() {
15683        let a = _mm256_set1_epi8(2);
15684        let b = _mm256_set1_epi8(-1);
15685        let mask = 0b01010101_01010101_01010101_01010101;
15686        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
15687        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15688    }
15689
15690    #[simd_test(enable = "avx512bw,avx512vl")]
15691    const fn test_mm_cmpgt_epi8_mask() {
15692        let a = _mm_set1_epi8(2);
15693        let b = _mm_set1_epi8(-1);
15694        let m = _mm_cmpgt_epi8_mask(a, b);
15695        assert_eq!(m, 0b11111111_11111111);
15696    }
15697
15698    #[simd_test(enable = "avx512bw,avx512vl")]
15699    const fn test_mm_mask_cmpgt_epi8_mask() {
15700        let a = _mm_set1_epi8(2);
15701        let b = _mm_set1_epi8(-1);
15702        let mask = 0b01010101_01010101;
15703        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
15704        assert_eq!(r, 0b01010101_01010101);
15705    }
15706
15707    #[simd_test(enable = "avx512bw")]
15708    const fn test_mm512_cmple_epu16_mask() {
15709        let a = _mm512_set1_epi16(-1);
15710        let b = _mm512_set1_epi16(-1);
15711        let m = _mm512_cmple_epu16_mask(a, b);
15712        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15713    }
15714
15715    #[simd_test(enable = "avx512bw")]
15716    const fn test_mm512_mask_cmple_epu16_mask() {
15717        let a = _mm512_set1_epi16(-1);
15718        let b = _mm512_set1_epi16(-1);
15719        let mask = 0b01010101_01010101_01010101_01010101;
15720        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
15721        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15722    }
15723
15724    #[simd_test(enable = "avx512bw,avx512vl")]
15725    const fn test_mm256_cmple_epu16_mask() {
15726        let a = _mm256_set1_epi16(-1);
15727        let b = _mm256_set1_epi16(-1);
15728        let m = _mm256_cmple_epu16_mask(a, b);
15729        assert_eq!(m, 0b11111111_11111111);
15730    }
15731
15732    #[simd_test(enable = "avx512bw,avx512vl")]
15733    const fn test_mm256_mask_cmple_epu16_mask() {
15734        let a = _mm256_set1_epi16(-1);
15735        let b = _mm256_set1_epi16(-1);
15736        let mask = 0b01010101_01010101;
15737        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
15738        assert_eq!(r, 0b01010101_01010101);
15739    }
15740
15741    #[simd_test(enable = "avx512bw,avx512vl")]
15742    const fn test_mm_cmple_epu16_mask() {
15743        let a = _mm_set1_epi16(-1);
15744        let b = _mm_set1_epi16(-1);
15745        let m = _mm_cmple_epu16_mask(a, b);
15746        assert_eq!(m, 0b11111111);
15747    }
15748
15749    #[simd_test(enable = "avx512bw,avx512vl")]
15750    const fn test_mm_mask_cmple_epu16_mask() {
15751        let a = _mm_set1_epi16(-1);
15752        let b = _mm_set1_epi16(-1);
15753        let mask = 0b01010101;
15754        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
15755        assert_eq!(r, 0b01010101);
15756    }
15757
15758    #[simd_test(enable = "avx512bw")]
15759    const fn test_mm512_cmple_epu8_mask() {
15760        let a = _mm512_set1_epi8(-1);
15761        let b = _mm512_set1_epi8(-1);
15762        let m = _mm512_cmple_epu8_mask(a, b);
15763        assert_eq!(
15764            m,
15765            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15766        );
15767    }
15768
15769    #[simd_test(enable = "avx512bw")]
15770    const fn test_mm512_mask_cmple_epu8_mask() {
15771        let a = _mm512_set1_epi8(-1);
15772        let b = _mm512_set1_epi8(-1);
15773        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15774        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
15775        assert_eq!(
15776            r,
15777            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15778        );
15779    }
15780
15781    #[simd_test(enable = "avx512bw,avx512vl")]
15782    const fn test_mm256_cmple_epu8_mask() {
15783        let a = _mm256_set1_epi8(-1);
15784        let b = _mm256_set1_epi8(-1);
15785        let m = _mm256_cmple_epu8_mask(a, b);
15786        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15787    }
15788
15789    #[simd_test(enable = "avx512bw,avx512vl")]
15790    const fn test_mm256_mask_cmple_epu8_mask() {
15791        let a = _mm256_set1_epi8(-1);
15792        let b = _mm256_set1_epi8(-1);
15793        let mask = 0b01010101_01010101_01010101_01010101;
15794        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
15795        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15796    }
15797
15798    #[simd_test(enable = "avx512bw,avx512vl")]
15799    const fn test_mm_cmple_epu8_mask() {
15800        let a = _mm_set1_epi8(-1);
15801        let b = _mm_set1_epi8(-1);
15802        let m = _mm_cmple_epu8_mask(a, b);
15803        assert_eq!(m, 0b11111111_11111111);
15804    }
15805
15806    #[simd_test(enable = "avx512bw,avx512vl")]
15807    const fn test_mm_mask_cmple_epu8_mask() {
15808        let a = _mm_set1_epi8(-1);
15809        let b = _mm_set1_epi8(-1);
15810        let mask = 0b01010101_01010101;
15811        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
15812        assert_eq!(r, 0b01010101_01010101);
15813    }
15814
15815    #[simd_test(enable = "avx512bw")]
15816    const fn test_mm512_cmple_epi16_mask() {
15817        let a = _mm512_set1_epi16(-1);
15818        let b = _mm512_set1_epi16(-1);
15819        let m = _mm512_cmple_epi16_mask(a, b);
15820        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15821    }
15822
15823    #[simd_test(enable = "avx512bw")]
15824    const fn test_mm512_mask_cmple_epi16_mask() {
15825        let a = _mm512_set1_epi16(-1);
15826        let b = _mm512_set1_epi16(-1);
15827        let mask = 0b01010101_01010101_01010101_01010101;
15828        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
15829        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15830    }
15831
15832    #[simd_test(enable = "avx512bw,avx512vl")]
15833    const fn test_mm256_cmple_epi16_mask() {
15834        let a = _mm256_set1_epi16(-1);
15835        let b = _mm256_set1_epi16(-1);
15836        let m = _mm256_cmple_epi16_mask(a, b);
15837        assert_eq!(m, 0b11111111_11111111);
15838    }
15839
15840    #[simd_test(enable = "avx512bw,avx512vl")]
15841    const fn test_mm256_mask_cmple_epi16_mask() {
15842        let a = _mm256_set1_epi16(-1);
15843        let b = _mm256_set1_epi16(-1);
15844        let mask = 0b01010101_01010101;
15845        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
15846        assert_eq!(r, 0b01010101_01010101);
15847    }
15848
15849    #[simd_test(enable = "avx512bw,avx512vl")]
15850    const fn test_mm_cmple_epi16_mask() {
15851        let a = _mm_set1_epi16(-1);
15852        let b = _mm_set1_epi16(-1);
15853        let m = _mm_cmple_epi16_mask(a, b);
15854        assert_eq!(m, 0b11111111);
15855    }
15856
15857    #[simd_test(enable = "avx512bw,avx512vl")]
15858    const fn test_mm_mask_cmple_epi16_mask() {
15859        let a = _mm_set1_epi16(-1);
15860        let b = _mm_set1_epi16(-1);
15861        let mask = 0b01010101;
15862        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
15863        assert_eq!(r, 0b01010101);
15864    }
15865
15866    #[simd_test(enable = "avx512bw")]
15867    const fn test_mm512_cmple_epi8_mask() {
15868        let a = _mm512_set1_epi8(-1);
15869        let b = _mm512_set1_epi8(-1);
15870        let m = _mm512_cmple_epi8_mask(a, b);
15871        assert_eq!(
15872            m,
15873            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15874        );
15875    }
15876
15877    #[simd_test(enable = "avx512bw")]
15878    const fn test_mm512_mask_cmple_epi8_mask() {
15879        let a = _mm512_set1_epi8(-1);
15880        let b = _mm512_set1_epi8(-1);
15881        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15882        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
15883        assert_eq!(
15884            r,
15885            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15886        );
15887    }
15888
15889    #[simd_test(enable = "avx512bw,avx512vl")]
15890    const fn test_mm256_cmple_epi8_mask() {
15891        let a = _mm256_set1_epi8(-1);
15892        let b = _mm256_set1_epi8(-1);
15893        let m = _mm256_cmple_epi8_mask(a, b);
15894        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15895    }
15896
15897    #[simd_test(enable = "avx512bw,avx512vl")]
15898    const fn test_mm256_mask_cmple_epi8_mask() {
15899        let a = _mm256_set1_epi8(-1);
15900        let b = _mm256_set1_epi8(-1);
15901        let mask = 0b01010101_01010101_01010101_01010101;
15902        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
15903        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15904    }
15905
15906    #[simd_test(enable = "avx512bw,avx512vl")]
15907    const fn test_mm_cmple_epi8_mask() {
15908        let a = _mm_set1_epi8(-1);
15909        let b = _mm_set1_epi8(-1);
15910        let m = _mm_cmple_epi8_mask(a, b);
15911        assert_eq!(m, 0b11111111_11111111);
15912    }
15913
15914    #[simd_test(enable = "avx512bw,avx512vl")]
15915    const fn test_mm_mask_cmple_epi8_mask() {
15916        let a = _mm_set1_epi8(-1);
15917        let b = _mm_set1_epi8(-1);
15918        let mask = 0b01010101_01010101;
15919        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
15920        assert_eq!(r, 0b01010101_01010101);
15921    }
15922
15923    #[simd_test(enable = "avx512bw")]
15924    const fn test_mm512_cmpge_epu16_mask() {
15925        let a = _mm512_set1_epi16(1);
15926        let b = _mm512_set1_epi16(1);
15927        let m = _mm512_cmpge_epu16_mask(a, b);
15928        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15929    }
15930
15931    #[simd_test(enable = "avx512bw")]
15932    const fn test_mm512_mask_cmpge_epu16_mask() {
15933        let a = _mm512_set1_epi16(1);
15934        let b = _mm512_set1_epi16(1);
15935        let mask = 0b01010101_01010101_01010101_01010101;
15936        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
15937        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15938    }
15939
15940    #[simd_test(enable = "avx512bw,avx512vl")]
15941    const fn test_mm256_cmpge_epu16_mask() {
15942        let a = _mm256_set1_epi16(1);
15943        let b = _mm256_set1_epi16(1);
15944        let m = _mm256_cmpge_epu16_mask(a, b);
15945        assert_eq!(m, 0b11111111_11111111);
15946    }
15947
15948    #[simd_test(enable = "avx512bw,avx512vl")]
15949    const fn test_mm256_mask_cmpge_epu16_mask() {
15950        let a = _mm256_set1_epi16(1);
15951        let b = _mm256_set1_epi16(1);
15952        let mask = 0b01010101_01010101;
15953        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
15954        assert_eq!(r, 0b01010101_01010101);
15955    }
15956
15957    #[simd_test(enable = "avx512bw,avx512vl")]
15958    const fn test_mm_cmpge_epu16_mask() {
15959        let a = _mm_set1_epi16(1);
15960        let b = _mm_set1_epi16(1);
15961        let m = _mm_cmpge_epu16_mask(a, b);
15962        assert_eq!(m, 0b11111111);
15963    }
15964
15965    #[simd_test(enable = "avx512bw,avx512vl")]
15966    const fn test_mm_mask_cmpge_epu16_mask() {
15967        let a = _mm_set1_epi16(1);
15968        let b = _mm_set1_epi16(1);
15969        let mask = 0b01010101;
15970        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
15971        assert_eq!(r, 0b01010101);
15972    }
15973
15974    #[simd_test(enable = "avx512bw")]
15975    const fn test_mm512_cmpge_epu8_mask() {
15976        let a = _mm512_set1_epi8(1);
15977        let b = _mm512_set1_epi8(1);
15978        let m = _mm512_cmpge_epu8_mask(a, b);
15979        assert_eq!(
15980            m,
15981            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15982        );
15983    }
15984
15985    #[simd_test(enable = "avx512bw")]
15986    const fn test_mm512_mask_cmpge_epu8_mask() {
15987        let a = _mm512_set1_epi8(1);
15988        let b = _mm512_set1_epi8(1);
15989        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15990        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
15991        assert_eq!(
15992            r,
15993            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15994        );
15995    }
15996
15997    #[simd_test(enable = "avx512bw,avx512vl")]
15998    const fn test_mm256_cmpge_epu8_mask() {
15999        let a = _mm256_set1_epi8(1);
16000        let b = _mm256_set1_epi8(1);
16001        let m = _mm256_cmpge_epu8_mask(a, b);
16002        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16003    }
16004
16005    #[simd_test(enable = "avx512bw,avx512vl")]
16006    const fn test_mm256_mask_cmpge_epu8_mask() {
16007        let a = _mm256_set1_epi8(1);
16008        let b = _mm256_set1_epi8(1);
16009        let mask = 0b01010101_01010101_01010101_01010101;
16010        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
16011        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16012    }
16013
16014    #[simd_test(enable = "avx512bw,avx512vl")]
16015    const fn test_mm_cmpge_epu8_mask() {
16016        let a = _mm_set1_epi8(1);
16017        let b = _mm_set1_epi8(1);
16018        let m = _mm_cmpge_epu8_mask(a, b);
16019        assert_eq!(m, 0b11111111_11111111);
16020    }
16021
16022    #[simd_test(enable = "avx512bw,avx512vl")]
16023    const fn test_mm_mask_cmpge_epu8_mask() {
16024        let a = _mm_set1_epi8(1);
16025        let b = _mm_set1_epi8(1);
16026        let mask = 0b01010101_01010101;
16027        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
16028        assert_eq!(r, 0b01010101_01010101);
16029    }
16030
16031    #[simd_test(enable = "avx512bw")]
16032    const fn test_mm512_cmpge_epi16_mask() {
16033        let a = _mm512_set1_epi16(-1);
16034        let b = _mm512_set1_epi16(-1);
16035        let m = _mm512_cmpge_epi16_mask(a, b);
16036        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16037    }
16038
16039    #[simd_test(enable = "avx512bw")]
16040    const fn test_mm512_mask_cmpge_epi16_mask() {
16041        let a = _mm512_set1_epi16(-1);
16042        let b = _mm512_set1_epi16(-1);
16043        let mask = 0b01010101_01010101_01010101_01010101;
16044        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
16045        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16046    }
16047
16048    #[simd_test(enable = "avx512bw,avx512vl")]
16049    const fn test_mm256_cmpge_epi16_mask() {
16050        let a = _mm256_set1_epi16(-1);
16051        let b = _mm256_set1_epi16(-1);
16052        let m = _mm256_cmpge_epi16_mask(a, b);
16053        assert_eq!(m, 0b11111111_11111111);
16054    }
16055
16056    #[simd_test(enable = "avx512bw,avx512vl")]
16057    const fn test_mm256_mask_cmpge_epi16_mask() {
16058        let a = _mm256_set1_epi16(-1);
16059        let b = _mm256_set1_epi16(-1);
16060        let mask = 0b01010101_01010101;
16061        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
16062        assert_eq!(r, 0b01010101_01010101);
16063    }
16064
16065    #[simd_test(enable = "avx512bw,avx512vl")]
16066    const fn test_mm_cmpge_epi16_mask() {
16067        let a = _mm_set1_epi16(-1);
16068        let b = _mm_set1_epi16(-1);
16069        let m = _mm_cmpge_epi16_mask(a, b);
16070        assert_eq!(m, 0b11111111);
16071    }
16072
16073    #[simd_test(enable = "avx512bw,avx512vl")]
16074    const fn test_mm_mask_cmpge_epi16_mask() {
16075        let a = _mm_set1_epi16(-1);
16076        let b = _mm_set1_epi16(-1);
16077        let mask = 0b01010101;
16078        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
16079        assert_eq!(r, 0b01010101);
16080    }
16081
16082    #[simd_test(enable = "avx512bw")]
16083    const fn test_mm512_cmpge_epi8_mask() {
16084        let a = _mm512_set1_epi8(-1);
16085        let b = _mm512_set1_epi8(-1);
16086        let m = _mm512_cmpge_epi8_mask(a, b);
16087        assert_eq!(
16088            m,
16089            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16090        );
16091    }
16092
16093    #[simd_test(enable = "avx512bw")]
16094    const fn test_mm512_mask_cmpge_epi8_mask() {
16095        let a = _mm512_set1_epi8(-1);
16096        let b = _mm512_set1_epi8(-1);
16097        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16098        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
16099        assert_eq!(
16100            r,
16101            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16102        );
16103    }
16104
16105    #[simd_test(enable = "avx512bw,avx512vl")]
16106    const fn test_mm256_cmpge_epi8_mask() {
16107        let a = _mm256_set1_epi8(-1);
16108        let b = _mm256_set1_epi8(-1);
16109        let m = _mm256_cmpge_epi8_mask(a, b);
16110        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16111    }
16112
16113    #[simd_test(enable = "avx512bw,avx512vl")]
16114    const fn test_mm256_mask_cmpge_epi8_mask() {
16115        let a = _mm256_set1_epi8(-1);
16116        let b = _mm256_set1_epi8(-1);
16117        let mask = 0b01010101_01010101_01010101_01010101;
16118        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
16119        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16120    }
16121
16122    #[simd_test(enable = "avx512bw,avx512vl")]
16123    const fn test_mm_cmpge_epi8_mask() {
16124        let a = _mm_set1_epi8(-1);
16125        let b = _mm_set1_epi8(-1);
16126        let m = _mm_cmpge_epi8_mask(a, b);
16127        assert_eq!(m, 0b11111111_11111111);
16128    }
16129
16130    #[simd_test(enable = "avx512bw,avx512vl")]
16131    const fn test_mm_mask_cmpge_epi8_mask() {
16132        let a = _mm_set1_epi8(-1);
16133        let b = _mm_set1_epi8(-1);
16134        let mask = 0b01010101_01010101;
16135        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
16136        assert_eq!(r, 0b01010101_01010101);
16137    }
16138
16139    #[simd_test(enable = "avx512bw")]
16140    const fn test_mm512_cmpeq_epu16_mask() {
16141        let a = _mm512_set1_epi16(1);
16142        let b = _mm512_set1_epi16(1);
16143        let m = _mm512_cmpeq_epu16_mask(a, b);
16144        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16145    }
16146
16147    #[simd_test(enable = "avx512bw")]
16148    const fn test_mm512_mask_cmpeq_epu16_mask() {
16149        let a = _mm512_set1_epi16(1);
16150        let b = _mm512_set1_epi16(1);
16151        let mask = 0b01010101_01010101_01010101_01010101;
16152        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
16153        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16154    }
16155
16156    #[simd_test(enable = "avx512bw,avx512vl")]
16157    const fn test_mm256_cmpeq_epu16_mask() {
16158        let a = _mm256_set1_epi16(1);
16159        let b = _mm256_set1_epi16(1);
16160        let m = _mm256_cmpeq_epu16_mask(a, b);
16161        assert_eq!(m, 0b11111111_11111111);
16162    }
16163
16164    #[simd_test(enable = "avx512bw,avx512vl")]
16165    const fn test_mm256_mask_cmpeq_epu16_mask() {
16166        let a = _mm256_set1_epi16(1);
16167        let b = _mm256_set1_epi16(1);
16168        let mask = 0b01010101_01010101;
16169        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
16170        assert_eq!(r, 0b01010101_01010101);
16171    }
16172
16173    #[simd_test(enable = "avx512bw,avx512vl")]
16174    const fn test_mm_cmpeq_epu16_mask() {
16175        let a = _mm_set1_epi16(1);
16176        let b = _mm_set1_epi16(1);
16177        let m = _mm_cmpeq_epu16_mask(a, b);
16178        assert_eq!(m, 0b11111111);
16179    }
16180
16181    #[simd_test(enable = "avx512bw,avx512vl")]
16182    const fn test_mm_mask_cmpeq_epu16_mask() {
16183        let a = _mm_set1_epi16(1);
16184        let b = _mm_set1_epi16(1);
16185        let mask = 0b01010101;
16186        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
16187        assert_eq!(r, 0b01010101);
16188    }
16189
16190    #[simd_test(enable = "avx512bw")]
16191    const fn test_mm512_cmpeq_epu8_mask() {
16192        let a = _mm512_set1_epi8(1);
16193        let b = _mm512_set1_epi8(1);
16194        let m = _mm512_cmpeq_epu8_mask(a, b);
16195        assert_eq!(
16196            m,
16197            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16198        );
16199    }
16200
16201    #[simd_test(enable = "avx512bw")]
16202    const fn test_mm512_mask_cmpeq_epu8_mask() {
16203        let a = _mm512_set1_epi8(1);
16204        let b = _mm512_set1_epi8(1);
16205        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16206        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
16207        assert_eq!(
16208            r,
16209            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16210        );
16211    }
16212
16213    #[simd_test(enable = "avx512bw,avx512vl")]
16214    const fn test_mm256_cmpeq_epu8_mask() {
16215        let a = _mm256_set1_epi8(1);
16216        let b = _mm256_set1_epi8(1);
16217        let m = _mm256_cmpeq_epu8_mask(a, b);
16218        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16219    }
16220
16221    #[simd_test(enable = "avx512bw,avx512vl")]
16222    const fn test_mm256_mask_cmpeq_epu8_mask() {
16223        let a = _mm256_set1_epi8(1);
16224        let b = _mm256_set1_epi8(1);
16225        let mask = 0b01010101_01010101_01010101_01010101;
16226        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
16227        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16228    }
16229
16230    #[simd_test(enable = "avx512bw,avx512vl")]
16231    const fn test_mm_cmpeq_epu8_mask() {
16232        let a = _mm_set1_epi8(1);
16233        let b = _mm_set1_epi8(1);
16234        let m = _mm_cmpeq_epu8_mask(a, b);
16235        assert_eq!(m, 0b11111111_11111111);
16236    }
16237
16238    #[simd_test(enable = "avx512bw,avx512vl")]
16239    const fn test_mm_mask_cmpeq_epu8_mask() {
16240        let a = _mm_set1_epi8(1);
16241        let b = _mm_set1_epi8(1);
16242        let mask = 0b01010101_01010101;
16243        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
16244        assert_eq!(r, 0b01010101_01010101);
16245    }
16246
16247    #[simd_test(enable = "avx512bw")]
16248    const fn test_mm512_cmpeq_epi16_mask() {
16249        let a = _mm512_set1_epi16(-1);
16250        let b = _mm512_set1_epi16(-1);
16251        let m = _mm512_cmpeq_epi16_mask(a, b);
16252        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16253    }
16254
16255    #[simd_test(enable = "avx512bw")]
16256    const fn test_mm512_mask_cmpeq_epi16_mask() {
16257        let a = _mm512_set1_epi16(-1);
16258        let b = _mm512_set1_epi16(-1);
16259        let mask = 0b01010101_01010101_01010101_01010101;
16260        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
16261        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16262    }
16263
16264    #[simd_test(enable = "avx512bw,avx512vl")]
16265    const fn test_mm256_cmpeq_epi16_mask() {
16266        let a = _mm256_set1_epi16(-1);
16267        let b = _mm256_set1_epi16(-1);
16268        let m = _mm256_cmpeq_epi16_mask(a, b);
16269        assert_eq!(m, 0b11111111_11111111);
16270    }
16271
16272    #[simd_test(enable = "avx512bw,avx512vl")]
16273    const fn test_mm256_mask_cmpeq_epi16_mask() {
16274        let a = _mm256_set1_epi16(-1);
16275        let b = _mm256_set1_epi16(-1);
16276        let mask = 0b01010101_01010101;
16277        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
16278        assert_eq!(r, 0b01010101_01010101);
16279    }
16280
16281    #[simd_test(enable = "avx512bw,avx512vl")]
16282    const fn test_mm_cmpeq_epi16_mask() {
16283        let a = _mm_set1_epi16(-1);
16284        let b = _mm_set1_epi16(-1);
16285        let m = _mm_cmpeq_epi16_mask(a, b);
16286        assert_eq!(m, 0b11111111);
16287    }
16288
16289    #[simd_test(enable = "avx512bw,avx512vl")]
16290    const fn test_mm_mask_cmpeq_epi16_mask() {
16291        let a = _mm_set1_epi16(-1);
16292        let b = _mm_set1_epi16(-1);
16293        let mask = 0b01010101;
16294        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
16295        assert_eq!(r, 0b01010101);
16296    }
16297
16298    #[simd_test(enable = "avx512bw")]
16299    const fn test_mm512_cmpeq_epi8_mask() {
16300        let a = _mm512_set1_epi8(-1);
16301        let b = _mm512_set1_epi8(-1);
16302        let m = _mm512_cmpeq_epi8_mask(a, b);
16303        assert_eq!(
16304            m,
16305            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16306        );
16307    }
16308
16309    #[simd_test(enable = "avx512bw")]
16310    const fn test_mm512_mask_cmpeq_epi8_mask() {
16311        let a = _mm512_set1_epi8(-1);
16312        let b = _mm512_set1_epi8(-1);
16313        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16314        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
16315        assert_eq!(
16316            r,
16317            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16318        );
16319    }
16320
16321    #[simd_test(enable = "avx512bw,avx512vl")]
16322    const fn test_mm256_cmpeq_epi8_mask() {
16323        let a = _mm256_set1_epi8(-1);
16324        let b = _mm256_set1_epi8(-1);
16325        let m = _mm256_cmpeq_epi8_mask(a, b);
16326        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16327    }
16328
16329    #[simd_test(enable = "avx512bw,avx512vl")]
16330    const fn test_mm256_mask_cmpeq_epi8_mask() {
16331        let a = _mm256_set1_epi8(-1);
16332        let b = _mm256_set1_epi8(-1);
16333        let mask = 0b01010101_01010101_01010101_01010101;
16334        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
16335        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16336    }
16337
16338    #[simd_test(enable = "avx512bw,avx512vl")]
16339    const fn test_mm_cmpeq_epi8_mask() {
16340        let a = _mm_set1_epi8(-1);
16341        let b = _mm_set1_epi8(-1);
16342        let m = _mm_cmpeq_epi8_mask(a, b);
16343        assert_eq!(m, 0b11111111_11111111);
16344    }
16345
16346    #[simd_test(enable = "avx512bw,avx512vl")]
16347    const fn test_mm_mask_cmpeq_epi8_mask() {
16348        let a = _mm_set1_epi8(-1);
16349        let b = _mm_set1_epi8(-1);
16350        let mask = 0b01010101_01010101;
16351        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
16352        assert_eq!(r, 0b01010101_01010101);
16353    }
16354
16355    #[simd_test(enable = "avx512bw")]
16356    const fn test_mm512_cmpneq_epu16_mask() {
16357        let a = _mm512_set1_epi16(2);
16358        let b = _mm512_set1_epi16(1);
16359        let m = _mm512_cmpneq_epu16_mask(a, b);
16360        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16361    }
16362
16363    #[simd_test(enable = "avx512bw")]
16364    const fn test_mm512_mask_cmpneq_epu16_mask() {
16365        let a = _mm512_set1_epi16(2);
16366        let b = _mm512_set1_epi16(1);
16367        let mask = 0b01010101_01010101_01010101_01010101;
16368        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
16369        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16370    }
16371
16372    #[simd_test(enable = "avx512bw,avx512vl")]
16373    const fn test_mm256_cmpneq_epu16_mask() {
16374        let a = _mm256_set1_epi16(2);
16375        let b = _mm256_set1_epi16(1);
16376        let m = _mm256_cmpneq_epu16_mask(a, b);
16377        assert_eq!(m, 0b11111111_11111111);
16378    }
16379
16380    #[simd_test(enable = "avx512bw,avx512vl")]
16381    const fn test_mm256_mask_cmpneq_epu16_mask() {
16382        let a = _mm256_set1_epi16(2);
16383        let b = _mm256_set1_epi16(1);
16384        let mask = 0b01010101_01010101;
16385        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
16386        assert_eq!(r, 0b01010101_01010101);
16387    }
16388
16389    #[simd_test(enable = "avx512bw,avx512vl")]
16390    const fn test_mm_cmpneq_epu16_mask() {
16391        let a = _mm_set1_epi16(2);
16392        let b = _mm_set1_epi16(1);
16393        let m = _mm_cmpneq_epu16_mask(a, b);
16394        assert_eq!(m, 0b11111111);
16395    }
16396
16397    #[simd_test(enable = "avx512bw,avx512vl")]
16398    const fn test_mm_mask_cmpneq_epu16_mask() {
16399        let a = _mm_set1_epi16(2);
16400        let b = _mm_set1_epi16(1);
16401        let mask = 0b01010101;
16402        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
16403        assert_eq!(r, 0b01010101);
16404    }
16405
16406    #[simd_test(enable = "avx512bw")]
16407    const fn test_mm512_cmpneq_epu8_mask() {
16408        let a = _mm512_set1_epi8(2);
16409        let b = _mm512_set1_epi8(1);
16410        let m = _mm512_cmpneq_epu8_mask(a, b);
16411        assert_eq!(
16412            m,
16413            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16414        );
16415    }
16416
16417    #[simd_test(enable = "avx512bw")]
16418    const fn test_mm512_mask_cmpneq_epu8_mask() {
16419        let a = _mm512_set1_epi8(2);
16420        let b = _mm512_set1_epi8(1);
16421        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16422        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
16423        assert_eq!(
16424            r,
16425            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16426        );
16427    }
16428
16429    #[simd_test(enable = "avx512bw,avx512vl")]
16430    const fn test_mm256_cmpneq_epu8_mask() {
16431        let a = _mm256_set1_epi8(2);
16432        let b = _mm256_set1_epi8(1);
16433        let m = _mm256_cmpneq_epu8_mask(a, b);
16434        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16435    }
16436
16437    #[simd_test(enable = "avx512bw,avx512vl")]
16438    const fn test_mm256_mask_cmpneq_epu8_mask() {
16439        let a = _mm256_set1_epi8(2);
16440        let b = _mm256_set1_epi8(1);
16441        let mask = 0b01010101_01010101_01010101_01010101;
16442        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
16443        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16444    }
16445
16446    #[simd_test(enable = "avx512bw,avx512vl")]
16447    const fn test_mm_cmpneq_epu8_mask() {
16448        let a = _mm_set1_epi8(2);
16449        let b = _mm_set1_epi8(1);
16450        let m = _mm_cmpneq_epu8_mask(a, b);
16451        assert_eq!(m, 0b11111111_11111111);
16452    }
16453
16454    #[simd_test(enable = "avx512bw,avx512vl")]
16455    const fn test_mm_mask_cmpneq_epu8_mask() {
16456        let a = _mm_set1_epi8(2);
16457        let b = _mm_set1_epi8(1);
16458        let mask = 0b01010101_01010101;
16459        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
16460        assert_eq!(r, 0b01010101_01010101);
16461    }
16462
16463    #[simd_test(enable = "avx512bw")]
16464    const fn test_mm512_cmpneq_epi16_mask() {
16465        let a = _mm512_set1_epi16(1);
16466        let b = _mm512_set1_epi16(-1);
16467        let m = _mm512_cmpneq_epi16_mask(a, b);
16468        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16469    }
16470
16471    #[simd_test(enable = "avx512bw")]
16472    const fn test_mm512_mask_cmpneq_epi16_mask() {
16473        let a = _mm512_set1_epi16(1);
16474        let b = _mm512_set1_epi16(-1);
16475        let mask = 0b01010101_01010101_01010101_01010101;
16476        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
16477        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16478    }
16479
16480    #[simd_test(enable = "avx512bw,avx512vl")]
16481    const fn test_mm256_cmpneq_epi16_mask() {
16482        let a = _mm256_set1_epi16(1);
16483        let b = _mm256_set1_epi16(-1);
16484        let m = _mm256_cmpneq_epi16_mask(a, b);
16485        assert_eq!(m, 0b11111111_11111111);
16486    }
16487
16488    #[simd_test(enable = "avx512bw,avx512vl")]
16489    const fn test_mm256_mask_cmpneq_epi16_mask() {
16490        let a = _mm256_set1_epi16(1);
16491        let b = _mm256_set1_epi16(-1);
16492        let mask = 0b01010101_01010101;
16493        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
16494        assert_eq!(r, 0b01010101_01010101);
16495    }
16496
16497    #[simd_test(enable = "avx512bw,avx512vl")]
16498    const fn test_mm_cmpneq_epi16_mask() {
16499        let a = _mm_set1_epi16(1);
16500        let b = _mm_set1_epi16(-1);
16501        let m = _mm_cmpneq_epi16_mask(a, b);
16502        assert_eq!(m, 0b11111111);
16503    }
16504
16505    #[simd_test(enable = "avx512bw,avx512vl")]
16506    const fn test_mm_mask_cmpneq_epi16_mask() {
16507        let a = _mm_set1_epi16(1);
16508        let b = _mm_set1_epi16(-1);
16509        let mask = 0b01010101;
16510        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
16511        assert_eq!(r, 0b01010101);
16512    }
16513
16514    #[simd_test(enable = "avx512bw")]
16515    const fn test_mm512_cmpneq_epi8_mask() {
16516        let a = _mm512_set1_epi8(1);
16517        let b = _mm512_set1_epi8(-1);
16518        let m = _mm512_cmpneq_epi8_mask(a, b);
16519        assert_eq!(
16520            m,
16521            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16522        );
16523    }
16524
16525    #[simd_test(enable = "avx512bw")]
16526    const fn test_mm512_mask_cmpneq_epi8_mask() {
16527        let a = _mm512_set1_epi8(1);
16528        let b = _mm512_set1_epi8(-1);
16529        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16530        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
16531        assert_eq!(
16532            r,
16533            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16534        );
16535    }
16536
16537    #[simd_test(enable = "avx512bw,avx512vl")]
16538    const fn test_mm256_cmpneq_epi8_mask() {
16539        let a = _mm256_set1_epi8(1);
16540        let b = _mm256_set1_epi8(-1);
16541        let m = _mm256_cmpneq_epi8_mask(a, b);
16542        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16543    }
16544
16545    #[simd_test(enable = "avx512bw,avx512vl")]
16546    const fn test_mm256_mask_cmpneq_epi8_mask() {
16547        let a = _mm256_set1_epi8(1);
16548        let b = _mm256_set1_epi8(-1);
16549        let mask = 0b01010101_01010101_01010101_01010101;
16550        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
16551        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16552    }
16553
16554    #[simd_test(enable = "avx512bw,avx512vl")]
16555    const fn test_mm_cmpneq_epi8_mask() {
16556        let a = _mm_set1_epi8(1);
16557        let b = _mm_set1_epi8(-1);
16558        let m = _mm_cmpneq_epi8_mask(a, b);
16559        assert_eq!(m, 0b11111111_11111111);
16560    }
16561
16562    #[simd_test(enable = "avx512bw,avx512vl")]
16563    const fn test_mm_mask_cmpneq_epi8_mask() {
16564        let a = _mm_set1_epi8(1);
16565        let b = _mm_set1_epi8(-1);
16566        let mask = 0b01010101_01010101;
16567        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
16568        assert_eq!(r, 0b01010101_01010101);
16569    }
16570
16571    #[simd_test(enable = "avx512bw")]
16572    const fn test_mm512_cmp_epu16_mask() {
16573        let a = _mm512_set1_epi16(0);
16574        let b = _mm512_set1_epi16(1);
16575        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16576        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16577    }
16578
16579    #[simd_test(enable = "avx512bw")]
16580    const fn test_mm512_mask_cmp_epu16_mask() {
16581        let a = _mm512_set1_epi16(0);
16582        let b = _mm512_set1_epi16(1);
16583        let mask = 0b01010101_01010101_01010101_01010101;
16584        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16585        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16586    }
16587
16588    #[simd_test(enable = "avx512bw,avx512vl")]
16589    const fn test_mm256_cmp_epu16_mask() {
16590        let a = _mm256_set1_epi16(0);
16591        let b = _mm256_set1_epi16(1);
16592        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16593        assert_eq!(m, 0b11111111_11111111);
16594    }
16595
16596    #[simd_test(enable = "avx512bw,avx512vl")]
16597    const fn test_mm256_mask_cmp_epu16_mask() {
16598        let a = _mm256_set1_epi16(0);
16599        let b = _mm256_set1_epi16(1);
16600        let mask = 0b01010101_01010101;
16601        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16602        assert_eq!(r, 0b01010101_01010101);
16603    }
16604
16605    #[simd_test(enable = "avx512bw,avx512vl")]
16606    const fn test_mm_cmp_epu16_mask() {
16607        let a = _mm_set1_epi16(0);
16608        let b = _mm_set1_epi16(1);
16609        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16610        assert_eq!(m, 0b11111111);
16611    }
16612
16613    #[simd_test(enable = "avx512bw,avx512vl")]
16614    const fn test_mm_mask_cmp_epu16_mask() {
16615        let a = _mm_set1_epi16(0);
16616        let b = _mm_set1_epi16(1);
16617        let mask = 0b01010101;
16618        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16619        assert_eq!(r, 0b01010101);
16620    }
16621
16622    #[simd_test(enable = "avx512bw")]
16623    const fn test_mm512_cmp_epu8_mask() {
16624        let a = _mm512_set1_epi8(0);
16625        let b = _mm512_set1_epi8(1);
16626        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16627        assert_eq!(
16628            m,
16629            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16630        );
16631    }
16632
16633    #[simd_test(enable = "avx512bw")]
16634    const fn test_mm512_mask_cmp_epu8_mask() {
16635        let a = _mm512_set1_epi8(0);
16636        let b = _mm512_set1_epi8(1);
16637        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16638        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16639        assert_eq!(
16640            r,
16641            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16642        );
16643    }
16644
16645    #[simd_test(enable = "avx512bw,avx512vl")]
16646    const fn test_mm256_cmp_epu8_mask() {
16647        let a = _mm256_set1_epi8(0);
16648        let b = _mm256_set1_epi8(1);
16649        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16650        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16651    }
16652
16653    #[simd_test(enable = "avx512bw,avx512vl")]
16654    const fn test_mm256_mask_cmp_epu8_mask() {
16655        let a = _mm256_set1_epi8(0);
16656        let b = _mm256_set1_epi8(1);
16657        let mask = 0b01010101_01010101_01010101_01010101;
16658        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16659        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16660    }
16661
16662    #[simd_test(enable = "avx512bw,avx512vl")]
16663    const fn test_mm_cmp_epu8_mask() {
16664        let a = _mm_set1_epi8(0);
16665        let b = _mm_set1_epi8(1);
16666        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16667        assert_eq!(m, 0b11111111_11111111);
16668    }
16669
16670    #[simd_test(enable = "avx512bw,avx512vl")]
16671    const fn test_mm_mask_cmp_epu8_mask() {
16672        let a = _mm_set1_epi8(0);
16673        let b = _mm_set1_epi8(1);
16674        let mask = 0b01010101_01010101;
16675        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16676        assert_eq!(r, 0b01010101_01010101);
16677    }
16678
16679    #[simd_test(enable = "avx512bw")]
16680    const fn test_mm512_cmp_epi16_mask() {
16681        let a = _mm512_set1_epi16(0);
16682        let b = _mm512_set1_epi16(1);
16683        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16684        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16685    }
16686
16687    #[simd_test(enable = "avx512bw")]
16688    const fn test_mm512_mask_cmp_epi16_mask() {
16689        let a = _mm512_set1_epi16(0);
16690        let b = _mm512_set1_epi16(1);
16691        let mask = 0b01010101_01010101_01010101_01010101;
16692        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16693        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16694    }
16695
16696    #[simd_test(enable = "avx512bw,avx512vl")]
16697    const fn test_mm256_cmp_epi16_mask() {
16698        let a = _mm256_set1_epi16(0);
16699        let b = _mm256_set1_epi16(1);
16700        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16701        assert_eq!(m, 0b11111111_11111111);
16702    }
16703
16704    #[simd_test(enable = "avx512bw,avx512vl")]
16705    const fn test_mm256_mask_cmp_epi16_mask() {
16706        let a = _mm256_set1_epi16(0);
16707        let b = _mm256_set1_epi16(1);
16708        let mask = 0b01010101_01010101;
16709        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16710        assert_eq!(r, 0b01010101_01010101);
16711    }
16712
16713    #[simd_test(enable = "avx512bw,avx512vl")]
16714    const fn test_mm_cmp_epi16_mask() {
16715        let a = _mm_set1_epi16(0);
16716        let b = _mm_set1_epi16(1);
16717        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16718        assert_eq!(m, 0b11111111);
16719    }
16720
16721    #[simd_test(enable = "avx512bw,avx512vl")]
16722    const fn test_mm_mask_cmp_epi16_mask() {
16723        let a = _mm_set1_epi16(0);
16724        let b = _mm_set1_epi16(1);
16725        let mask = 0b01010101;
16726        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16727        assert_eq!(r, 0b01010101);
16728    }
16729
16730    #[simd_test(enable = "avx512bw")]
16731    const fn test_mm512_cmp_epi8_mask() {
16732        let a = _mm512_set1_epi8(0);
16733        let b = _mm512_set1_epi8(1);
16734        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16735        assert_eq!(
16736            m,
16737            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16738        );
16739    }
16740
16741    #[simd_test(enable = "avx512bw")]
16742    const fn test_mm512_mask_cmp_epi8_mask() {
16743        let a = _mm512_set1_epi8(0);
16744        let b = _mm512_set1_epi8(1);
16745        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16746        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16747        assert_eq!(
16748            r,
16749            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16750        );
16751    }
16752
16753    #[simd_test(enable = "avx512bw,avx512vl")]
16754    const fn test_mm256_cmp_epi8_mask() {
16755        let a = _mm256_set1_epi8(0);
16756        let b = _mm256_set1_epi8(1);
16757        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16758        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16759    }
16760
16761    #[simd_test(enable = "avx512bw,avx512vl")]
16762    const fn test_mm256_mask_cmp_epi8_mask() {
16763        let a = _mm256_set1_epi8(0);
16764        let b = _mm256_set1_epi8(1);
16765        let mask = 0b01010101_01010101_01010101_01010101;
16766        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16767        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16768    }
16769
16770    #[simd_test(enable = "avx512bw,avx512vl")]
16771    const fn test_mm_cmp_epi8_mask() {
16772        let a = _mm_set1_epi8(0);
16773        let b = _mm_set1_epi8(1);
16774        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16775        assert_eq!(m, 0b11111111_11111111);
16776    }
16777
16778    #[simd_test(enable = "avx512bw,avx512vl")]
16779    const fn test_mm_mask_cmp_epi8_mask() {
16780        let a = _mm_set1_epi8(0);
16781        let b = _mm_set1_epi8(1);
16782        let mask = 0b01010101_01010101;
16783        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16784        assert_eq!(r, 0b01010101_01010101);
16785    }
16786
16787    #[simd_test(enable = "avx512bw,avx512vl")]
16788    const fn test_mm256_reduce_add_epi16() {
16789        let a = _mm256_set1_epi16(1);
16790        let e = _mm256_reduce_add_epi16(a);
16791        assert_eq!(16, e);
16792    }
16793
16794    #[simd_test(enable = "avx512bw,avx512vl")]
16795    const fn test_mm256_mask_reduce_add_epi16() {
16796        let a = _mm256_set1_epi16(1);
16797        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
16798        assert_eq!(8, e);
16799    }
16800
16801    #[simd_test(enable = "avx512bw,avx512vl")]
16802    const fn test_mm_reduce_add_epi16() {
16803        let a = _mm_set1_epi16(1);
16804        let e = _mm_reduce_add_epi16(a);
16805        assert_eq!(8, e);
16806    }
16807
16808    #[simd_test(enable = "avx512bw,avx512vl")]
16809    const fn test_mm_mask_reduce_add_epi16() {
16810        let a = _mm_set1_epi16(1);
16811        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
16812        assert_eq!(4, e);
16813    }
16814
16815    #[simd_test(enable = "avx512bw,avx512vl")]
16816    const fn test_mm256_reduce_add_epi8() {
16817        let a = _mm256_set1_epi8(1);
16818        let e = _mm256_reduce_add_epi8(a);
16819        assert_eq!(32, e);
16820    }
16821
16822    #[simd_test(enable = "avx512bw,avx512vl")]
16823    const fn test_mm256_mask_reduce_add_epi8() {
16824        let a = _mm256_set1_epi8(1);
16825        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
16826        assert_eq!(16, e);
16827    }
16828
16829    #[simd_test(enable = "avx512bw,avx512vl")]
16830    const fn test_mm_reduce_add_epi8() {
16831        let a = _mm_set1_epi8(1);
16832        let e = _mm_reduce_add_epi8(a);
16833        assert_eq!(16, e);
16834    }
16835
16836    #[simd_test(enable = "avx512bw,avx512vl")]
16837    const fn test_mm_mask_reduce_add_epi8() {
16838        let a = _mm_set1_epi8(1);
16839        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
16840        assert_eq!(8, e);
16841    }
16842
16843    #[simd_test(enable = "avx512bw,avx512vl")]
16844    const fn test_mm256_reduce_and_epi16() {
16845        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16846        let e = _mm256_reduce_and_epi16(a);
16847        assert_eq!(0, e);
16848    }
16849
16850    #[simd_test(enable = "avx512bw,avx512vl")]
16851    const fn test_mm256_mask_reduce_and_epi16() {
16852        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16853        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
16854        assert_eq!(1, e);
16855    }
16856
16857    #[simd_test(enable = "avx512bw,avx512vl")]
16858    const fn test_mm_reduce_and_epi16() {
16859        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16860        let e = _mm_reduce_and_epi16(a);
16861        assert_eq!(0, e);
16862    }
16863
16864    #[simd_test(enable = "avx512bw,avx512vl")]
16865    const fn test_mm_mask_reduce_and_epi16() {
16866        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16867        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
16868        assert_eq!(1, e);
16869    }
16870
16871    #[simd_test(enable = "avx512bw,avx512vl")]
16872    const fn test_mm256_reduce_and_epi8() {
16873        let a = _mm256_set_epi8(
16874            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16875            2, 2, 2,
16876        );
16877        let e = _mm256_reduce_and_epi8(a);
16878        assert_eq!(0, e);
16879    }
16880
16881    #[simd_test(enable = "avx512bw,avx512vl")]
16882    const fn test_mm256_mask_reduce_and_epi8() {
16883        let a = _mm256_set_epi8(
16884            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16885            2, 2, 2,
16886        );
16887        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
16888        assert_eq!(1, e);
16889    }
16890
16891    #[simd_test(enable = "avx512bw,avx512vl")]
16892    const fn test_mm_reduce_and_epi8() {
16893        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16894        let e = _mm_reduce_and_epi8(a);
16895        assert_eq!(0, e);
16896    }
16897
16898    #[simd_test(enable = "avx512bw,avx512vl")]
16899    const fn test_mm_mask_reduce_and_epi8() {
16900        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16901        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
16902        assert_eq!(1, e);
16903    }
16904
16905    #[simd_test(enable = "avx512bw,avx512vl")]
16906    const fn test_mm256_reduce_mul_epi16() {
16907        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16908        let e = _mm256_reduce_mul_epi16(a);
16909        assert_eq!(256, e);
16910    }
16911
16912    #[simd_test(enable = "avx512bw,avx512vl")]
16913    const fn test_mm256_mask_reduce_mul_epi16() {
16914        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16915        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
16916        assert_eq!(1, e);
16917    }
16918
16919    #[simd_test(enable = "avx512bw,avx512vl")]
16920    const fn test_mm_reduce_mul_epi16() {
16921        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
16922        let e = _mm_reduce_mul_epi16(a);
16923        assert_eq!(16, e);
16924    }
16925
16926    #[simd_test(enable = "avx512bw,avx512vl")]
16927    const fn test_mm_mask_reduce_mul_epi16() {
16928        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16929        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
16930        assert_eq!(1, e);
16931    }
16932
16933    #[simd_test(enable = "avx512bw,avx512vl")]
16934    const fn test_mm256_reduce_mul_epi8() {
16935        let a = _mm256_set_epi8(
16936            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16937            2, 2, 2,
16938        );
16939        let e = _mm256_reduce_mul_epi8(a);
16940        assert_eq!(64, e);
16941    }
16942
16943    #[simd_test(enable = "avx512bw,avx512vl")]
16944    const fn test_mm256_mask_reduce_mul_epi8() {
16945        let a = _mm256_set_epi8(
16946            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16947            2, 2, 2,
16948        );
16949        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
16950        assert_eq!(1, e);
16951    }
16952
16953    #[simd_test(enable = "avx512bw,avx512vl")]
16954    const fn test_mm_reduce_mul_epi8() {
16955        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16956        let e = _mm_reduce_mul_epi8(a);
16957        assert_eq!(8, e);
16958    }
16959
16960    #[simd_test(enable = "avx512bw,avx512vl")]
16961    const fn test_mm_mask_reduce_mul_epi8() {
16962        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16963        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
16964        assert_eq!(1, e);
16965    }
16966
16967    #[simd_test(enable = "avx512bw,avx512vl")]
16968    const fn test_mm256_reduce_max_epi16() {
16969        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16970        let e: i16 = _mm256_reduce_max_epi16(a);
16971        assert_eq!(15, e);
16972    }
16973
16974    #[simd_test(enable = "avx512bw,avx512vl")]
16975    const fn test_mm256_mask_reduce_max_epi16() {
16976        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16977        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
16978        assert_eq!(7, e);
16979    }
16980
16981    #[simd_test(enable = "avx512bw,avx512vl")]
16982    const fn test_mm_reduce_max_epi16() {
16983        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16984        let e: i16 = _mm_reduce_max_epi16(a);
16985        assert_eq!(7, e);
16986    }
16987
16988    #[simd_test(enable = "avx512bw,avx512vl")]
16989    const fn test_mm_mask_reduce_max_epi16() {
16990        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16991        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
16992        assert_eq!(3, e);
16993    }
16994
16995    #[simd_test(enable = "avx512bw,avx512vl")]
16996    const fn test_mm256_reduce_max_epi8() {
16997        let a = _mm256_set_epi8(
16998            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16999            24, 25, 26, 27, 28, 29, 30, 31,
17000        );
17001        let e: i8 = _mm256_reduce_max_epi8(a);
17002        assert_eq!(31, e);
17003    }
17004
17005    #[simd_test(enable = "avx512bw,avx512vl")]
17006    const fn test_mm256_mask_reduce_max_epi8() {
17007        let a = _mm256_set_epi8(
17008            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17009            24, 25, 26, 27, 28, 29, 30, 31,
17010        );
17011        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
17012        assert_eq!(15, e);
17013    }
17014
17015    #[simd_test(enable = "avx512bw,avx512vl")]
17016    const fn test_mm_reduce_max_epi8() {
17017        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17018        let e: i8 = _mm_reduce_max_epi8(a);
17019        assert_eq!(15, e);
17020    }
17021
17022    #[simd_test(enable = "avx512bw,avx512vl")]
17023    const fn test_mm_mask_reduce_max_epi8() {
17024        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17025        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
17026        assert_eq!(7, e);
17027    }
17028
17029    #[simd_test(enable = "avx512bw,avx512vl")]
17030    const fn test_mm256_reduce_max_epu16() {
17031        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17032        let e: u16 = _mm256_reduce_max_epu16(a);
17033        assert_eq!(15, e);
17034    }
17035
17036    #[simd_test(enable = "avx512bw,avx512vl")]
17037    const fn test_mm256_mask_reduce_max_epu16() {
17038        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17039        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
17040        assert_eq!(7, e);
17041    }
17042
17043    #[simd_test(enable = "avx512bw,avx512vl")]
17044    const fn test_mm_reduce_max_epu16() {
17045        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17046        let e: u16 = _mm_reduce_max_epu16(a);
17047        assert_eq!(7, e);
17048    }
17049
17050    #[simd_test(enable = "avx512bw,avx512vl")]
17051    const fn test_mm_mask_reduce_max_epu16() {
17052        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17053        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
17054        assert_eq!(3, e);
17055    }
17056
17057    #[simd_test(enable = "avx512bw,avx512vl")]
17058    const fn test_mm256_reduce_max_epu8() {
17059        let a = _mm256_set_epi8(
17060            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17061            24, 25, 26, 27, 28, 29, 30, 31,
17062        );
17063        let e: u8 = _mm256_reduce_max_epu8(a);
17064        assert_eq!(31, e);
17065    }
17066
17067    #[simd_test(enable = "avx512bw,avx512vl")]
17068    const fn test_mm256_mask_reduce_max_epu8() {
17069        let a = _mm256_set_epi8(
17070            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17071            24, 25, 26, 27, 28, 29, 30, 31,
17072        );
17073        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
17074        assert_eq!(15, e);
17075    }
17076
17077    #[simd_test(enable = "avx512bw,avx512vl")]
17078    const fn test_mm_reduce_max_epu8() {
17079        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17080        let e: u8 = _mm_reduce_max_epu8(a);
17081        assert_eq!(15, e);
17082    }
17083
17084    #[simd_test(enable = "avx512bw,avx512vl")]
17085    const fn test_mm_mask_reduce_max_epu8() {
17086        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17087        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
17088        assert_eq!(7, e);
17089    }
17090
17091    #[simd_test(enable = "avx512bw,avx512vl")]
17092    const fn test_mm256_reduce_min_epi16() {
17093        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17094        let e: i16 = _mm256_reduce_min_epi16(a);
17095        assert_eq!(0, e);
17096    }
17097
17098    #[simd_test(enable = "avx512bw,avx512vl")]
17099    const fn test_mm256_mask_reduce_min_epi16() {
17100        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17101        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
17102        assert_eq!(0, e);
17103    }
17104
17105    #[simd_test(enable = "avx512bw,avx512vl")]
17106    const fn test_mm_reduce_min_epi16() {
17107        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17108        let e: i16 = _mm_reduce_min_epi16(a);
17109        assert_eq!(0, e);
17110    }
17111
17112    #[simd_test(enable = "avx512bw,avx512vl")]
17113    const fn test_mm_mask_reduce_min_epi16() {
17114        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17115        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
17116        assert_eq!(0, e);
17117    }
17118
17119    #[simd_test(enable = "avx512bw,avx512vl")]
17120    const fn test_mm256_reduce_min_epi8() {
17121        let a = _mm256_set_epi8(
17122            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17123            24, 25, 26, 27, 28, 29, 30, 31,
17124        );
17125        let e: i8 = _mm256_reduce_min_epi8(a);
17126        assert_eq!(0, e);
17127    }
17128
17129    #[simd_test(enable = "avx512bw,avx512vl")]
17130    const fn test_mm256_mask_reduce_min_epi8() {
17131        let a = _mm256_set_epi8(
17132            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17133            24, 25, 26, 27, 28, 29, 30, 31,
17134        );
17135        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
17136        assert_eq!(0, e);
17137    }
17138
17139    #[simd_test(enable = "avx512bw,avx512vl")]
17140    const fn test_mm_reduce_min_epi8() {
17141        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17142        let e: i8 = _mm_reduce_min_epi8(a);
17143        assert_eq!(0, e);
17144    }
17145
17146    #[simd_test(enable = "avx512bw,avx512vl")]
17147    const fn test_mm_mask_reduce_min_epi8() {
17148        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17149        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
17150        assert_eq!(0, e);
17151    }
17152
17153    #[simd_test(enable = "avx512bw,avx512vl")]
17154    const fn test_mm256_reduce_min_epu16() {
17155        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17156        let e: u16 = _mm256_reduce_min_epu16(a);
17157        assert_eq!(0, e);
17158    }
17159
17160    #[simd_test(enable = "avx512bw,avx512vl")]
17161    const fn test_mm256_mask_reduce_min_epu16() {
17162        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17163        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
17164        assert_eq!(0, e);
17165    }
17166
17167    #[simd_test(enable = "avx512bw,avx512vl")]
17168    const fn test_mm_reduce_min_epu16() {
17169        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17170        let e: u16 = _mm_reduce_min_epu16(a);
17171        assert_eq!(0, e);
17172    }
17173
17174    #[simd_test(enable = "avx512bw,avx512vl")]
17175    const fn test_mm_mask_reduce_min_epu16() {
17176        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17177        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
17178        assert_eq!(0, e);
17179    }
17180
17181    #[simd_test(enable = "avx512bw,avx512vl")]
17182    const fn test_mm256_reduce_min_epu8() {
17183        let a = _mm256_set_epi8(
17184            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17185            24, 25, 26, 27, 28, 29, 30, 31,
17186        );
17187        let e: u8 = _mm256_reduce_min_epu8(a);
17188        assert_eq!(0, e);
17189    }
17190
17191    #[simd_test(enable = "avx512bw,avx512vl")]
17192    const fn test_mm256_mask_reduce_min_epu8() {
17193        let a = _mm256_set_epi8(
17194            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17195            24, 25, 26, 27, 28, 29, 30, 31,
17196        );
17197        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
17198        assert_eq!(0, e);
17199    }
17200
17201    #[simd_test(enable = "avx512bw,avx512vl")]
17202    const fn test_mm_reduce_min_epu8() {
17203        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17204        let e: u8 = _mm_reduce_min_epu8(a);
17205        assert_eq!(0, e);
17206    }
17207
17208    #[simd_test(enable = "avx512bw,avx512vl")]
17209    const fn test_mm_mask_reduce_min_epu8() {
17210        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17211        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
17212        assert_eq!(0, e);
17213    }
17214
17215    #[simd_test(enable = "avx512bw,avx512vl")]
17216    const fn test_mm256_reduce_or_epi16() {
17217        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17218        let e = _mm256_reduce_or_epi16(a);
17219        assert_eq!(3, e);
17220    }
17221
17222    #[simd_test(enable = "avx512bw,avx512vl")]
17223    const fn test_mm256_mask_reduce_or_epi16() {
17224        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17225        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
17226        assert_eq!(1, e);
17227    }
17228
17229    #[simd_test(enable = "avx512bw,avx512vl")]
17230    const fn test_mm_reduce_or_epi16() {
17231        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17232        let e = _mm_reduce_or_epi16(a);
17233        assert_eq!(3, e);
17234    }
17235
17236    #[simd_test(enable = "avx512bw,avx512vl")]
17237    const fn test_mm_mask_reduce_or_epi16() {
17238        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17239        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
17240        assert_eq!(1, e);
17241    }
17242
17243    #[simd_test(enable = "avx512bw,avx512vl")]
17244    const fn test_mm256_reduce_or_epi8() {
17245        let a = _mm256_set_epi8(
17246            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17247            2, 2, 2,
17248        );
17249        let e = _mm256_reduce_or_epi8(a);
17250        assert_eq!(3, e);
17251    }
17252
17253    #[simd_test(enable = "avx512bw,avx512vl")]
17254    const fn test_mm256_mask_reduce_or_epi8() {
17255        let a = _mm256_set_epi8(
17256            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17257            2, 2, 2,
17258        );
17259        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
17260        assert_eq!(1, e);
17261    }
17262
17263    #[simd_test(enable = "avx512bw,avx512vl")]
17264    const fn test_mm_reduce_or_epi8() {
17265        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17266        let e = _mm_reduce_or_epi8(a);
17267        assert_eq!(3, e);
17268    }
17269
17270    #[simd_test(enable = "avx512bw,avx512vl")]
17271    const fn test_mm_mask_reduce_or_epi8() {
17272        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17273        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
17274        assert_eq!(1, e);
17275    }
17276
17277    #[simd_test(enable = "avx512bw")]
17278    const fn test_mm512_loadu_epi16() {
17279        #[rustfmt::skip]
17280        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17281        let r = unsafe { _mm512_loadu_epi16(&a[0]) };
17282        #[rustfmt::skip]
17283        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17284        assert_eq_m512i(r, e);
17285    }
17286
17287    #[simd_test(enable = "avx512bw,avx512vl")]
17288    const fn test_mm256_loadu_epi16() {
17289        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17290        let r = unsafe { _mm256_loadu_epi16(&a[0]) };
17291        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17292        assert_eq_m256i(r, e);
17293    }
17294
17295    #[simd_test(enable = "avx512bw,avx512vl")]
17296    const fn test_mm_loadu_epi16() {
17297        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
17298        let r = unsafe { _mm_loadu_epi16(&a[0]) };
17299        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
17300        assert_eq_m128i(r, e);
17301    }
17302
17303    #[simd_test(enable = "avx512bw")]
17304    const fn test_mm512_loadu_epi8() {
17305        #[rustfmt::skip]
17306        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
17307                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17308        let r = unsafe { _mm512_loadu_epi8(&a[0]) };
17309        #[rustfmt::skip]
17310        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
17311                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17312        assert_eq_m512i(r, e);
17313    }
17314
17315    #[simd_test(enable = "avx512bw,avx512vl")]
17316    const fn test_mm256_loadu_epi8() {
17317        #[rustfmt::skip]
17318        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17319        let r = unsafe { _mm256_loadu_epi8(&a[0]) };
17320        #[rustfmt::skip]
17321        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17322        assert_eq_m256i(r, e);
17323    }
17324
17325    #[simd_test(enable = "avx512bw,avx512vl")]
17326    const fn test_mm_loadu_epi8() {
17327        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17328        let r = unsafe { _mm_loadu_epi8(&a[0]) };
17329        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17330        assert_eq_m128i(r, e);
17331    }
17332
17333    #[simd_test(enable = "avx512bw")]
17334    const fn test_mm512_storeu_epi16() {
17335        let a = _mm512_set1_epi16(9);
17336        let mut r = _mm512_undefined_epi32();
17337        unsafe {
17338            _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17339        }
17340        assert_eq_m512i(r, a);
17341    }
17342
17343    #[simd_test(enable = "avx512bw,avx512vl")]
17344    const fn test_mm256_storeu_epi16() {
17345        let a = _mm256_set1_epi16(9);
17346        let mut r = _mm256_set1_epi32(0);
17347        unsafe {
17348            _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17349        }
17350        assert_eq_m256i(r, a);
17351    }
17352
17353    #[simd_test(enable = "avx512bw,avx512vl")]
17354    const fn test_mm_storeu_epi16() {
17355        let a = _mm_set1_epi16(9);
17356        let mut r = _mm_set1_epi32(0);
17357        unsafe {
17358            _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17359        }
17360        assert_eq_m128i(r, a);
17361    }
17362
17363    #[simd_test(enable = "avx512bw")]
17364    const fn test_mm512_storeu_epi8() {
17365        let a = _mm512_set1_epi8(9);
17366        let mut r = _mm512_undefined_epi32();
17367        unsafe {
17368            _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17369        }
17370        assert_eq_m512i(r, a);
17371    }
17372
17373    #[simd_test(enable = "avx512bw,avx512vl")]
17374    const fn test_mm256_storeu_epi8() {
17375        let a = _mm256_set1_epi8(9);
17376        let mut r = _mm256_set1_epi32(0);
17377        unsafe {
17378            _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17379        }
17380        assert_eq_m256i(r, a);
17381    }
17382
17383    #[simd_test(enable = "avx512bw,avx512vl")]
17384    const fn test_mm_storeu_epi8() {
17385        let a = _mm_set1_epi8(9);
17386        let mut r = _mm_set1_epi32(0);
17387        unsafe {
17388            _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17389        }
17390        assert_eq_m128i(r, a);
17391    }
17392
17393    #[simd_test(enable = "avx512bw")]
17394    const fn test_mm512_mask_loadu_epi16() {
17395        let src = _mm512_set1_epi16(42);
17396        let a = &[
17397            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17398            24, 25, 26, 27, 28, 29, 30, 31, 32,
17399        ];
17400        let p = a.as_ptr();
17401        let m = 0b10101010_11001100_11101000_11001010;
17402        let r = unsafe { _mm512_mask_loadu_epi16(src, m, black_box(p)) };
17403        let e = &[
17404            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17405            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17406        ];
17407        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17408        assert_eq_m512i(r, e);
17409    }
17410
17411    #[simd_test(enable = "avx512bw")]
17412    const fn test_mm512_maskz_loadu_epi16() {
17413        let a = &[
17414            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17415            24, 25, 26, 27, 28, 29, 30, 31, 32,
17416        ];
17417        let p = a.as_ptr();
17418        let m = 0b10101010_11001100_11101000_11001010;
17419        let r = unsafe { _mm512_maskz_loadu_epi16(m, black_box(p)) };
17420        let e = &[
17421            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17422            26, 0, 28, 0, 30, 0, 32,
17423        ];
17424        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17425        assert_eq_m512i(r, e);
17426    }
17427
17428    #[simd_test(enable = "avx512bw")]
17429    const fn test_mm512_mask_storeu_epi16() {
17430        let mut r = [42_i16; 32];
17431        let a = &[
17432            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17433            24, 25, 26, 27, 28, 29, 30, 31, 32,
17434        ];
17435        let a = unsafe { _mm512_loadu_epi16(a.as_ptr()) };
17436        let m = 0b10101010_11001100_11101000_11001010;
17437        unsafe {
17438            _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17439        }
17440        let e = &[
17441            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17442            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17443        ];
17444        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17445        assert_eq_m512i(unsafe { _mm512_loadu_epi16(r.as_ptr()) }, e);
17446    }
17447
17448    #[simd_test(enable = "avx512bw")]
17449    const fn test_mm512_mask_loadu_epi8() {
17450        let src = _mm512_set1_epi8(42);
17451        let a = &[
17452            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17453            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17454            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17455        ];
17456        let p = a.as_ptr();
17457        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17458        let r = unsafe { _mm512_mask_loadu_epi8(src, m, black_box(p)) };
17459        let e = &[
17460            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17461            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17462            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17463        ];
17464        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17465        assert_eq_m512i(r, e);
17466    }
17467
17468    #[simd_test(enable = "avx512bw")]
17469    const fn test_mm512_maskz_loadu_epi8() {
17470        let a = &[
17471            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17472            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17473            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17474        ];
17475        let p = a.as_ptr();
17476        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17477        let r = unsafe { _mm512_maskz_loadu_epi8(m, black_box(p)) };
17478        let e = &[
17479            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17480            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
17481            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
17482        ];
17483        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17484        assert_eq_m512i(r, e);
17485    }
17486
17487    #[simd_test(enable = "avx512bw")]
17488    const fn test_mm512_mask_storeu_epi8() {
17489        let mut r = [42_i8; 64];
17490        let a = &[
17491            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17492            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17493            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17494        ];
17495        let a = unsafe { _mm512_loadu_epi8(a.as_ptr()) };
17496        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17497        unsafe {
17498            _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17499        }
17500        let e = &[
17501            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17502            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17503            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17504        ];
17505        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17506        assert_eq_m512i(unsafe { _mm512_loadu_epi8(r.as_ptr()) }, e);
17507    }
17508
17509    #[simd_test(enable = "avx512bw,avx512vl")]
17510    const fn test_mm256_mask_loadu_epi16() {
17511        let src = _mm256_set1_epi16(42);
17512        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17513        let p = a.as_ptr();
17514        let m = 0b11101000_11001010;
17515        let r = unsafe { _mm256_mask_loadu_epi16(src, m, black_box(p)) };
17516        let e = &[
17517            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17518        ];
17519        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17520        assert_eq_m256i(r, e);
17521    }
17522
17523    #[simd_test(enable = "avx512bw,avx512vl")]
17524    const fn test_mm256_maskz_loadu_epi16() {
17525        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17526        let p = a.as_ptr();
17527        let m = 0b11101000_11001010;
17528        let r = unsafe { _mm256_maskz_loadu_epi16(m, black_box(p)) };
17529        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17530        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17531        assert_eq_m256i(r, e);
17532    }
17533
17534    #[simd_test(enable = "avx512bw,avx512vl")]
17535    const fn test_mm256_mask_storeu_epi16() {
17536        let mut r = [42_i16; 16];
17537        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17538        let a = unsafe { _mm256_loadu_epi16(a.as_ptr()) };
17539        let m = 0b11101000_11001010;
17540        unsafe {
17541            _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17542        }
17543        let e = &[
17544            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17545        ];
17546        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17547        assert_eq_m256i(unsafe { _mm256_loadu_epi16(r.as_ptr()) }, e);
17548    }
17549
17550    #[simd_test(enable = "avx512bw,avx512vl")]
17551    const fn test_mm256_mask_loadu_epi8() {
17552        let src = _mm256_set1_epi8(42);
17553        let a = &[
17554            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17555            24, 25, 26, 27, 28, 29, 30, 31, 32,
17556        ];
17557        let p = a.as_ptr();
17558        let m = 0b10101010_11001100_11101000_11001010;
17559        let r = unsafe { _mm256_mask_loadu_epi8(src, m, black_box(p)) };
17560        let e = &[
17561            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17562            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17563        ];
17564        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17565        assert_eq_m256i(r, e);
17566    }
17567
17568    #[simd_test(enable = "avx512bw,avx512vl")]
17569    const fn test_mm256_maskz_loadu_epi8() {
17570        let a = &[
17571            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17572            24, 25, 26, 27, 28, 29, 30, 31, 32,
17573        ];
17574        let p = a.as_ptr();
17575        let m = 0b10101010_11001100_11101000_11001010;
17576        let r = unsafe { _mm256_maskz_loadu_epi8(m, black_box(p)) };
17577        let e = &[
17578            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17579            26, 0, 28, 0, 30, 0, 32,
17580        ];
17581        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17582        assert_eq_m256i(r, e);
17583    }
17584
17585    #[simd_test(enable = "avx512bw,avx512vl")]
17586    const fn test_mm256_mask_storeu_epi8() {
17587        let mut r = [42_i8; 32];
17588        let a = &[
17589            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17590            24, 25, 26, 27, 28, 29, 30, 31, 32,
17591        ];
17592        let a = unsafe { _mm256_loadu_epi8(a.as_ptr()) };
17593        let m = 0b10101010_11001100_11101000_11001010;
17594        unsafe {
17595            _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17596        }
17597        let e = &[
17598            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17599            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17600        ];
17601        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17602        assert_eq_m256i(unsafe { _mm256_loadu_epi8(r.as_ptr()) }, e);
17603    }
17604
17605    #[simd_test(enable = "avx512bw,avx512vl")]
17606    const fn test_mm_mask_loadu_epi16() {
17607        let src = _mm_set1_epi16(42);
17608        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17609        let p = a.as_ptr();
17610        let m = 0b11001010;
17611        let r = unsafe { _mm_mask_loadu_epi16(src, m, black_box(p)) };
17612        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17613        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17614        assert_eq_m128i(r, e);
17615    }
17616
17617    #[simd_test(enable = "avx512bw,avx512vl")]
17618    const fn test_mm_maskz_loadu_epi16() {
17619        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17620        let p = a.as_ptr();
17621        let m = 0b11001010;
17622        let r = unsafe { _mm_maskz_loadu_epi16(m, black_box(p)) };
17623        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
17624        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17625        assert_eq_m128i(r, e);
17626    }
17627
17628    #[simd_test(enable = "avx512bw,avx512vl")]
17629    const fn test_mm_mask_storeu_epi16() {
17630        let mut r = [42_i16; 8];
17631        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17632        let a = unsafe { _mm_loadu_epi16(a.as_ptr()) };
17633        let m = 0b11001010;
17634        unsafe { _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a) };
17635        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17636        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17637        assert_eq_m128i(unsafe { _mm_loadu_epi16(r.as_ptr()) }, e);
17638    }
17639
17640    #[simd_test(enable = "avx512bw,avx512vl")]
17641    const fn test_mm_mask_loadu_epi8() {
17642        let src = _mm_set1_epi8(42);
17643        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17644        let p = a.as_ptr();
17645        let m = 0b11101000_11001010;
17646        let r = unsafe { _mm_mask_loadu_epi8(src, m, black_box(p)) };
17647        let e = &[
17648            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17649        ];
17650        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17651        assert_eq_m128i(r, e);
17652    }
17653
17654    #[simd_test(enable = "avx512bw,avx512vl")]
17655    const fn test_mm_maskz_loadu_epi8() {
17656        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17657        let p = a.as_ptr();
17658        let m = 0b11101000_11001010;
17659        let r = unsafe { _mm_maskz_loadu_epi8(m, black_box(p)) };
17660        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17661        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17662        assert_eq_m128i(r, e);
17663    }
17664
17665    #[simd_test(enable = "avx512bw,avx512vl")]
17666    const fn test_mm_mask_storeu_epi8() {
17667        let mut r = [42_i8; 16];
17668        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17669        let a = unsafe { _mm_loadu_epi8(a.as_ptr()) };
17670        let m = 0b11101000_11001010;
17671        unsafe { _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a) };
17672        let e = &[
17673            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17674        ];
17675        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17676        assert_eq_m128i(unsafe { _mm_loadu_epi8(r.as_ptr()) }, e);
17677    }
17678
17679    #[simd_test(enable = "avx512bw")]
17680    fn test_mm512_madd_epi16() {
17681        let a = _mm512_set1_epi16(1);
17682        let b = _mm512_set1_epi16(1);
17683        let r = _mm512_madd_epi16(a, b);
17684        let e = _mm512_set1_epi32(2);
17685        assert_eq_m512i(r, e);
17686    }
17687
17688    #[simd_test(enable = "avx512bw")]
17689    fn test_mm512_mask_madd_epi16() {
17690        let a = _mm512_set1_epi16(1);
17691        let b = _mm512_set1_epi16(1);
17692        let r = _mm512_mask_madd_epi16(a, 0, a, b);
17693        assert_eq_m512i(r, a);
17694        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
17695        let e = _mm512_set_epi32(
17696            1 << 16 | 1,
17697            1 << 16 | 1,
17698            1 << 16 | 1,
17699            1 << 16 | 1,
17700            1 << 16 | 1,
17701            1 << 16 | 1,
17702            1 << 16 | 1,
17703            1 << 16 | 1,
17704            1 << 16 | 1,
17705            1 << 16 | 1,
17706            1 << 16 | 1,
17707            1 << 16 | 1,
17708            2,
17709            2,
17710            2,
17711            2,
17712        );
17713        assert_eq_m512i(r, e);
17714    }
17715
17716    #[simd_test(enable = "avx512bw")]
17717    fn test_mm512_maskz_madd_epi16() {
17718        let a = _mm512_set1_epi16(1);
17719        let b = _mm512_set1_epi16(1);
17720        let r = _mm512_maskz_madd_epi16(0, a, b);
17721        assert_eq_m512i(r, _mm512_setzero_si512());
17722        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
17723        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
17724        assert_eq_m512i(r, e);
17725    }
17726
17727    #[simd_test(enable = "avx512bw,avx512vl")]
17728    fn test_mm256_mask_madd_epi16() {
17729        let a = _mm256_set1_epi16(1);
17730        let b = _mm256_set1_epi16(1);
17731        let r = _mm256_mask_madd_epi16(a, 0, a, b);
17732        assert_eq_m256i(r, a);
17733        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
17734        let e = _mm256_set_epi32(
17735            1 << 16 | 1,
17736            1 << 16 | 1,
17737            1 << 16 | 1,
17738            1 << 16 | 1,
17739            2,
17740            2,
17741            2,
17742            2,
17743        );
17744        assert_eq_m256i(r, e);
17745    }
17746
17747    #[simd_test(enable = "avx512bw,avx512vl")]
17748    fn test_mm256_maskz_madd_epi16() {
17749        let a = _mm256_set1_epi16(1);
17750        let b = _mm256_set1_epi16(1);
17751        let r = _mm256_maskz_madd_epi16(0, a, b);
17752        assert_eq_m256i(r, _mm256_setzero_si256());
17753        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
17754        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
17755        assert_eq_m256i(r, e);
17756    }
17757
17758    #[simd_test(enable = "avx512bw,avx512vl")]
17759    fn test_mm_mask_madd_epi16() {
17760        let a = _mm_set1_epi16(1);
17761        let b = _mm_set1_epi16(1);
17762        let r = _mm_mask_madd_epi16(a, 0, a, b);
17763        assert_eq_m128i(r, a);
17764        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
17765        let e = _mm_set_epi32(2, 2, 2, 2);
17766        assert_eq_m128i(r, e);
17767    }
17768
17769    #[simd_test(enable = "avx512bw,avx512vl")]
17770    fn test_mm_maskz_madd_epi16() {
17771        let a = _mm_set1_epi16(1);
17772        let b = _mm_set1_epi16(1);
17773        let r = _mm_maskz_madd_epi16(0, a, b);
17774        assert_eq_m128i(r, _mm_setzero_si128());
17775        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
17776        let e = _mm_set_epi32(2, 2, 2, 2);
17777        assert_eq_m128i(r, e);
17778    }
17779
17780    #[simd_test(enable = "avx512bw")]
17781    fn test_mm512_maddubs_epi16() {
17782        let a = _mm512_set1_epi8(1);
17783        let b = _mm512_set1_epi8(1);
17784        let r = _mm512_maddubs_epi16(a, b);
17785        let e = _mm512_set1_epi16(2);
17786        assert_eq_m512i(r, e);
17787    }
17788
17789    #[simd_test(enable = "avx512bw")]
17790    fn test_mm512_mask_maddubs_epi16() {
17791        let a = _mm512_set1_epi8(1);
17792        let b = _mm512_set1_epi8(1);
17793        let src = _mm512_set1_epi16(1);
17794        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
17795        assert_eq_m512i(r, src);
17796        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
17797        #[rustfmt::skip]
17798        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17799                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
17800        assert_eq_m512i(r, e);
17801    }
17802
17803    #[simd_test(enable = "avx512bw")]
17804    fn test_mm512_maskz_maddubs_epi16() {
17805        let a = _mm512_set1_epi8(1);
17806        let b = _mm512_set1_epi8(1);
17807        let r = _mm512_maskz_maddubs_epi16(0, a, b);
17808        assert_eq_m512i(r, _mm512_setzero_si512());
17809        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
17810        #[rustfmt::skip]
17811        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
17812                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17813        assert_eq_m512i(r, e);
17814    }
17815
17816    #[simd_test(enable = "avx512bw,avx512vl")]
17817    fn test_mm256_mask_maddubs_epi16() {
17818        let a = _mm256_set1_epi8(1);
17819        let b = _mm256_set1_epi8(1);
17820        let src = _mm256_set1_epi16(1);
17821        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
17822        assert_eq_m256i(r, src);
17823        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
17824        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17825        assert_eq_m256i(r, e);
17826    }
17827
17828    #[simd_test(enable = "avx512bw,avx512vl")]
17829    fn test_mm256_maskz_maddubs_epi16() {
17830        let a = _mm256_set1_epi8(1);
17831        let b = _mm256_set1_epi8(1);
17832        let r = _mm256_maskz_maddubs_epi16(0, a, b);
17833        assert_eq_m256i(r, _mm256_setzero_si256());
17834        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
17835        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17836        assert_eq_m256i(r, e);
17837    }
17838
17839    #[simd_test(enable = "avx512bw,avx512vl")]
17840    fn test_mm_mask_maddubs_epi16() {
17841        let a = _mm_set1_epi8(1);
17842        let b = _mm_set1_epi8(1);
17843        let src = _mm_set1_epi16(1);
17844        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
17845        assert_eq_m128i(r, src);
17846        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
17847        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17848        assert_eq_m128i(r, e);
17849    }
17850
17851    #[simd_test(enable = "avx512bw,avx512vl")]
17852    fn test_mm_maskz_maddubs_epi16() {
17853        let a = _mm_set1_epi8(1);
17854        let b = _mm_set1_epi8(1);
17855        let r = _mm_maskz_maddubs_epi16(0, a, b);
17856        assert_eq_m128i(r, _mm_setzero_si128());
17857        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
17858        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
17859        assert_eq_m128i(r, e);
17860    }
17861
17862    #[simd_test(enable = "avx512bw")]
17863    const fn test_mm512_packs_epi32() {
17864        let a = _mm512_set1_epi32(i32::MAX);
17865        let b = _mm512_set1_epi32(1);
17866        let r = _mm512_packs_epi32(a, b);
17867        #[rustfmt::skip]
17868        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
17869                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17870        assert_eq_m512i(r, e);
17871    }
17872
17873    #[simd_test(enable = "avx512bw")]
17874    const fn test_mm512_mask_packs_epi32() {
17875        let a = _mm512_set1_epi32(i32::MAX);
17876        let b = _mm512_set1_epi32(1 << 16 | 1);
17877        let r = _mm512_mask_packs_epi32(a, 0, a, b);
17878        assert_eq_m512i(r, a);
17879        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17880        #[rustfmt::skip]
17881        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17882                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17883        assert_eq_m512i(r, e);
17884    }
17885
17886    #[simd_test(enable = "avx512bw")]
17887    const fn test_mm512_maskz_packs_epi32() {
17888        let a = _mm512_set1_epi32(i32::MAX);
17889        let b = _mm512_set1_epi32(1);
17890        let r = _mm512_maskz_packs_epi32(0, a, b);
17891        assert_eq_m512i(r, _mm512_setzero_si512());
17892        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
17893        #[rustfmt::skip]
17894        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17895                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17896        assert_eq_m512i(r, e);
17897    }
17898
17899    #[simd_test(enable = "avx512bw,avx512vl")]
17900    const fn test_mm256_mask_packs_epi32() {
17901        let a = _mm256_set1_epi32(i32::MAX);
17902        let b = _mm256_set1_epi32(1 << 16 | 1);
17903        let r = _mm256_mask_packs_epi32(a, 0, a, b);
17904        assert_eq_m256i(r, a);
17905        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
17906        #[rustfmt::skip]
17907        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17908        assert_eq_m256i(r, e);
17909    }
17910
17911    #[simd_test(enable = "avx512bw,avx512vl")]
17912    fn test_mm256_maskz_packs_epi32() {
17913        let a = _mm256_set1_epi32(i32::MAX);
17914        let b = _mm256_set1_epi32(1);
17915        let r = _mm256_maskz_packs_epi32(0, a, b);
17916        assert_eq_m256i(r, _mm256_setzero_si256());
17917        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
17918        #[rustfmt::skip]
17919        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17920        assert_eq_m256i(r, e);
17921    }
17922
17923    #[simd_test(enable = "avx512bw,avx512vl")]
17924    const fn test_mm_mask_packs_epi32() {
17925        let a = _mm_set1_epi32(i32::MAX);
17926        let b = _mm_set1_epi32(1 << 16 | 1);
17927        let r = _mm_mask_packs_epi32(a, 0, a, b);
17928        assert_eq_m128i(r, a);
17929        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
17930        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17931        assert_eq_m128i(r, e);
17932    }
17933
17934    #[simd_test(enable = "avx512bw,avx512vl")]
17935    const fn test_mm_maskz_packs_epi32() {
17936        let a = _mm_set1_epi32(i32::MAX);
17937        let b = _mm_set1_epi32(1);
17938        let r = _mm_maskz_packs_epi32(0, a, b);
17939        assert_eq_m128i(r, _mm_setzero_si128());
17940        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
17941        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17942        assert_eq_m128i(r, e);
17943    }
17944
17945    #[simd_test(enable = "avx512bw")]
17946    const fn test_mm512_packs_epi16() {
17947        let a = _mm512_set1_epi16(i16::MAX);
17948        let b = _mm512_set1_epi16(1);
17949        let r = _mm512_packs_epi16(a, b);
17950        #[rustfmt::skip]
17951        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17952                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17953                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17954                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17955        assert_eq_m512i(r, e);
17956    }
17957
17958    #[simd_test(enable = "avx512bw")]
17959    const fn test_mm512_mask_packs_epi16() {
17960        let a = _mm512_set1_epi16(i16::MAX);
17961        let b = _mm512_set1_epi16(1 << 8 | 1);
17962        let r = _mm512_mask_packs_epi16(a, 0, a, b);
17963        assert_eq_m512i(r, a);
17964        let r = _mm512_mask_packs_epi16(
17965            b,
17966            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17967            a,
17968            b,
17969        );
17970        #[rustfmt::skip]
17971        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17972                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17973                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17974                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17975        assert_eq_m512i(r, e);
17976    }
17977
17978    #[simd_test(enable = "avx512bw")]
17979    const fn test_mm512_maskz_packs_epi16() {
17980        let a = _mm512_set1_epi16(i16::MAX);
17981        let b = _mm512_set1_epi16(1);
17982        let r = _mm512_maskz_packs_epi16(0, a, b);
17983        assert_eq_m512i(r, _mm512_setzero_si512());
17984        let r = _mm512_maskz_packs_epi16(
17985            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17986            a,
17987            b,
17988        );
17989        #[rustfmt::skip]
17990        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17991                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17992                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17993                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17994        assert_eq_m512i(r, e);
17995    }
17996
17997    #[simd_test(enable = "avx512bw,avx512vl")]
17998    const fn test_mm256_mask_packs_epi16() {
17999        let a = _mm256_set1_epi16(i16::MAX);
18000        let b = _mm256_set1_epi16(1 << 8 | 1);
18001        let r = _mm256_mask_packs_epi16(a, 0, a, b);
18002        assert_eq_m256i(r, a);
18003        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
18004        #[rustfmt::skip]
18005        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18006                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
18007        assert_eq_m256i(r, e);
18008    }
18009
18010    #[simd_test(enable = "avx512bw,avx512vl")]
18011    const fn test_mm256_maskz_packs_epi16() {
18012        let a = _mm256_set1_epi16(i16::MAX);
18013        let b = _mm256_set1_epi16(1);
18014        let r = _mm256_maskz_packs_epi16(0, a, b);
18015        assert_eq_m256i(r, _mm256_setzero_si256());
18016        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
18017        #[rustfmt::skip]
18018        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18019                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
18020        assert_eq_m256i(r, e);
18021    }
18022
18023    #[simd_test(enable = "avx512bw,avx512vl")]
18024    const fn test_mm_mask_packs_epi16() {
18025        let a = _mm_set1_epi16(i16::MAX);
18026        let b = _mm_set1_epi16(1 << 8 | 1);
18027        let r = _mm_mask_packs_epi16(a, 0, a, b);
18028        assert_eq_m128i(r, a);
18029        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
18030        #[rustfmt::skip]
18031        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
18032        assert_eq_m128i(r, e);
18033    }
18034
18035    #[simd_test(enable = "avx512bw,avx512vl")]
18036    const fn test_mm_maskz_packs_epi16() {
18037        let a = _mm_set1_epi16(i16::MAX);
18038        let b = _mm_set1_epi16(1);
18039        let r = _mm_maskz_packs_epi16(0, a, b);
18040        assert_eq_m128i(r, _mm_setzero_si128());
18041        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
18042        #[rustfmt::skip]
18043        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
18044        assert_eq_m128i(r, e);
18045    }
18046
18047    #[simd_test(enable = "avx512bw")]
18048    const fn test_mm512_packus_epi32() {
18049        let a = _mm512_set1_epi32(-1);
18050        let b = _mm512_set1_epi32(1);
18051        let r = _mm512_packus_epi32(a, b);
18052        #[rustfmt::skip]
18053        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
18054                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
18055        assert_eq_m512i(r, e);
18056    }
18057
18058    #[simd_test(enable = "avx512bw")]
18059    const fn test_mm512_mask_packus_epi32() {
18060        let a = _mm512_set1_epi32(-1);
18061        let b = _mm512_set1_epi32(1 << 16 | 1);
18062        let r = _mm512_mask_packus_epi32(a, 0, a, b);
18063        assert_eq_m512i(r, a);
18064        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
18065        #[rustfmt::skip]
18066        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18067                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18068        assert_eq_m512i(r, e);
18069    }
18070
18071    #[simd_test(enable = "avx512bw")]
18072    const fn test_mm512_maskz_packus_epi32() {
18073        let a = _mm512_set1_epi32(-1);
18074        let b = _mm512_set1_epi32(1);
18075        let r = _mm512_maskz_packus_epi32(0, a, b);
18076        assert_eq_m512i(r, _mm512_setzero_si512());
18077        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
18078        #[rustfmt::skip]
18079        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18080                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18081        assert_eq_m512i(r, e);
18082    }
18083
18084    #[simd_test(enable = "avx512bw,avx512vl")]
18085    const fn test_mm256_mask_packus_epi32() {
18086        let a = _mm256_set1_epi32(-1);
18087        let b = _mm256_set1_epi32(1 << 16 | 1);
18088        let r = _mm256_mask_packus_epi32(a, 0, a, b);
18089        assert_eq_m256i(r, a);
18090        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
18091        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18092        assert_eq_m256i(r, e);
18093    }
18094
18095    #[simd_test(enable = "avx512bw,avx512vl")]
18096    const fn test_mm256_maskz_packus_epi32() {
18097        let a = _mm256_set1_epi32(-1);
18098        let b = _mm256_set1_epi32(1);
18099        let r = _mm256_maskz_packus_epi32(0, a, b);
18100        assert_eq_m256i(r, _mm256_setzero_si256());
18101        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
18102        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18103        assert_eq_m256i(r, e);
18104    }
18105
18106    #[simd_test(enable = "avx512bw,avx512vl")]
18107    const fn test_mm_mask_packus_epi32() {
18108        let a = _mm_set1_epi32(-1);
18109        let b = _mm_set1_epi32(1 << 16 | 1);
18110        let r = _mm_mask_packus_epi32(a, 0, a, b);
18111        assert_eq_m128i(r, a);
18112        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
18113        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
18114        assert_eq_m128i(r, e);
18115    }
18116
18117    #[simd_test(enable = "avx512bw,avx512vl")]
18118    const fn test_mm_maskz_packus_epi32() {
18119        let a = _mm_set1_epi32(-1);
18120        let b = _mm_set1_epi32(1);
18121        let r = _mm_maskz_packus_epi32(0, a, b);
18122        assert_eq_m128i(r, _mm_setzero_si128());
18123        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
18124        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
18125        assert_eq_m128i(r, e);
18126    }
18127
18128    #[simd_test(enable = "avx512bw")]
18129    const fn test_mm512_packus_epi16() {
18130        let a = _mm512_set1_epi16(-1);
18131        let b = _mm512_set1_epi16(1);
18132        let r = _mm512_packus_epi16(a, b);
18133        #[rustfmt::skip]
18134        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
18135                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
18136                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
18137                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
18138        assert_eq_m512i(r, e);
18139    }
18140
18141    #[simd_test(enable = "avx512bw")]
18142    const fn test_mm512_mask_packus_epi16() {
18143        let a = _mm512_set1_epi16(-1);
18144        let b = _mm512_set1_epi16(1 << 8 | 1);
18145        let r = _mm512_mask_packus_epi16(a, 0, a, b);
18146        assert_eq_m512i(r, a);
18147        let r = _mm512_mask_packus_epi16(
18148            b,
18149            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18150            a,
18151            b,
18152        );
18153        #[rustfmt::skip]
18154        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18155                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18156                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18157                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18158        assert_eq_m512i(r, e);
18159    }
18160
18161    #[simd_test(enable = "avx512bw")]
18162    const fn test_mm512_maskz_packus_epi16() {
18163        let a = _mm512_set1_epi16(-1);
18164        let b = _mm512_set1_epi16(1);
18165        let r = _mm512_maskz_packus_epi16(0, a, b);
18166        assert_eq_m512i(r, _mm512_setzero_si512());
18167        let r = _mm512_maskz_packus_epi16(
18168            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18169            a,
18170            b,
18171        );
18172        #[rustfmt::skip]
18173        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18174                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18175                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18176                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18177        assert_eq_m512i(r, e);
18178    }
18179
18180    #[simd_test(enable = "avx512bw,avx512vl")]
18181    const fn test_mm256_mask_packus_epi16() {
18182        let a = _mm256_set1_epi16(-1);
18183        let b = _mm256_set1_epi16(1 << 8 | 1);
18184        let r = _mm256_mask_packus_epi16(a, 0, a, b);
18185        assert_eq_m256i(r, a);
18186        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
18187        #[rustfmt::skip]
18188        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18189                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18190        assert_eq_m256i(r, e);
18191    }
18192
18193    #[simd_test(enable = "avx512bw,avx512vl")]
18194    const fn test_mm256_maskz_packus_epi16() {
18195        let a = _mm256_set1_epi16(-1);
18196        let b = _mm256_set1_epi16(1);
18197        let r = _mm256_maskz_packus_epi16(0, a, b);
18198        assert_eq_m256i(r, _mm256_setzero_si256());
18199        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
18200        #[rustfmt::skip]
18201        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18202                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18203        assert_eq_m256i(r, e);
18204    }
18205
18206    #[simd_test(enable = "avx512bw,avx512vl")]
18207    const fn test_mm_mask_packus_epi16() {
18208        let a = _mm_set1_epi16(-1);
18209        let b = _mm_set1_epi16(1 << 8 | 1);
18210        let r = _mm_mask_packus_epi16(a, 0, a, b);
18211        assert_eq_m128i(r, a);
18212        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
18213        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18214        assert_eq_m128i(r, e);
18215    }
18216
18217    #[simd_test(enable = "avx512bw,avx512vl")]
18218    const fn test_mm_maskz_packus_epi16() {
18219        let a = _mm_set1_epi16(-1);
18220        let b = _mm_set1_epi16(1);
18221        let r = _mm_maskz_packus_epi16(0, a, b);
18222        assert_eq_m128i(r, _mm_setzero_si128());
18223        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
18224        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18225        assert_eq_m128i(r, e);
18226    }
18227
18228    #[simd_test(enable = "avx512bw")]
18229    const fn test_mm512_avg_epu16() {
18230        let a = _mm512_set1_epi16(1);
18231        let b = _mm512_set1_epi16(1);
18232        let r = _mm512_avg_epu16(a, b);
18233        let e = _mm512_set1_epi16(1);
18234        assert_eq_m512i(r, e);
18235    }
18236
18237    #[simd_test(enable = "avx512bw")]
18238    const fn test_mm512_mask_avg_epu16() {
18239        let a = _mm512_set1_epi16(1);
18240        let b = _mm512_set1_epi16(1);
18241        let r = _mm512_mask_avg_epu16(a, 0, a, b);
18242        assert_eq_m512i(r, a);
18243        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
18244        #[rustfmt::skip]
18245        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18246                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18247        assert_eq_m512i(r, e);
18248    }
18249
18250    #[simd_test(enable = "avx512bw")]
18251    const fn test_mm512_maskz_avg_epu16() {
18252        let a = _mm512_set1_epi16(1);
18253        let b = _mm512_set1_epi16(1);
18254        let r = _mm512_maskz_avg_epu16(0, a, b);
18255        assert_eq_m512i(r, _mm512_setzero_si512());
18256        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
18257        #[rustfmt::skip]
18258        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18259                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18260        assert_eq_m512i(r, e);
18261    }
18262
18263    #[simd_test(enable = "avx512bw,avx512vl")]
18264    const fn test_mm256_mask_avg_epu16() {
18265        let a = _mm256_set1_epi16(1);
18266        let b = _mm256_set1_epi16(1);
18267        let r = _mm256_mask_avg_epu16(a, 0, a, b);
18268        assert_eq_m256i(r, a);
18269        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
18270        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18271        assert_eq_m256i(r, e);
18272    }
18273
18274    #[simd_test(enable = "avx512bw,avx512vl")]
18275    const fn test_mm256_maskz_avg_epu16() {
18276        let a = _mm256_set1_epi16(1);
18277        let b = _mm256_set1_epi16(1);
18278        let r = _mm256_maskz_avg_epu16(0, a, b);
18279        assert_eq_m256i(r, _mm256_setzero_si256());
18280        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
18281        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18282        assert_eq_m256i(r, e);
18283    }
18284
18285    #[simd_test(enable = "avx512bw,avx512vl")]
18286    const fn test_mm_mask_avg_epu16() {
18287        let a = _mm_set1_epi16(1);
18288        let b = _mm_set1_epi16(1);
18289        let r = _mm_mask_avg_epu16(a, 0, a, b);
18290        assert_eq_m128i(r, a);
18291        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
18292        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
18293        assert_eq_m128i(r, e);
18294    }
18295
18296    #[simd_test(enable = "avx512bw,avx512vl")]
18297    const fn test_mm_maskz_avg_epu16() {
18298        let a = _mm_set1_epi16(1);
18299        let b = _mm_set1_epi16(1);
18300        let r = _mm_maskz_avg_epu16(0, a, b);
18301        assert_eq_m128i(r, _mm_setzero_si128());
18302        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
18303        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
18304        assert_eq_m128i(r, e);
18305    }
18306
18307    #[simd_test(enable = "avx512bw")]
18308    const fn test_mm512_avg_epu8() {
18309        let a = _mm512_set1_epi8(1);
18310        let b = _mm512_set1_epi8(1);
18311        let r = _mm512_avg_epu8(a, b);
18312        let e = _mm512_set1_epi8(1);
18313        assert_eq_m512i(r, e);
18314    }
18315
18316    #[simd_test(enable = "avx512bw")]
18317    const fn test_mm512_mask_avg_epu8() {
18318        let a = _mm512_set1_epi8(1);
18319        let b = _mm512_set1_epi8(1);
18320        let r = _mm512_mask_avg_epu8(a, 0, a, b);
18321        assert_eq_m512i(r, a);
18322        let r = _mm512_mask_avg_epu8(
18323            a,
18324            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18325            a,
18326            b,
18327        );
18328        #[rustfmt::skip]
18329        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18330                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18331                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18332                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18333        assert_eq_m512i(r, e);
18334    }
18335
18336    #[simd_test(enable = "avx512bw")]
18337    const fn test_mm512_maskz_avg_epu8() {
18338        let a = _mm512_set1_epi8(1);
18339        let b = _mm512_set1_epi8(1);
18340        let r = _mm512_maskz_avg_epu8(0, a, b);
18341        assert_eq_m512i(r, _mm512_setzero_si512());
18342        let r = _mm512_maskz_avg_epu8(
18343            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
18344            a,
18345            b,
18346        );
18347        #[rustfmt::skip]
18348        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18349                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18350                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18351                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18352        assert_eq_m512i(r, e);
18353    }
18354
18355    #[simd_test(enable = "avx512bw,avx512vl")]
18356    const fn test_mm256_mask_avg_epu8() {
18357        let a = _mm256_set1_epi8(1);
18358        let b = _mm256_set1_epi8(1);
18359        let r = _mm256_mask_avg_epu8(a, 0, a, b);
18360        assert_eq_m256i(r, a);
18361        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
18362        #[rustfmt::skip]
18363        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18364                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18365        assert_eq_m256i(r, e);
18366    }
18367
18368    #[simd_test(enable = "avx512bw,avx512vl")]
18369    const fn test_mm256_maskz_avg_epu8() {
18370        let a = _mm256_set1_epi8(1);
18371        let b = _mm256_set1_epi8(1);
18372        let r = _mm256_maskz_avg_epu8(0, a, b);
18373        assert_eq_m256i(r, _mm256_setzero_si256());
18374        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
18375        #[rustfmt::skip]
18376        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18377                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18378        assert_eq_m256i(r, e);
18379    }
18380
18381    #[simd_test(enable = "avx512bw,avx512vl")]
18382    const fn test_mm_mask_avg_epu8() {
18383        let a = _mm_set1_epi8(1);
18384        let b = _mm_set1_epi8(1);
18385        let r = _mm_mask_avg_epu8(a, 0, a, b);
18386        assert_eq_m128i(r, a);
18387        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
18388        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18389        assert_eq_m128i(r, e);
18390    }
18391
18392    #[simd_test(enable = "avx512bw,avx512vl")]
18393    const fn test_mm_maskz_avg_epu8() {
18394        let a = _mm_set1_epi8(1);
18395        let b = _mm_set1_epi8(1);
18396        let r = _mm_maskz_avg_epu8(0, a, b);
18397        assert_eq_m128i(r, _mm_setzero_si128());
18398        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
18399        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18400        assert_eq_m128i(r, e);
18401    }
18402
18403    #[simd_test(enable = "avx512bw")]
18404    fn test_mm512_sll_epi16() {
18405        let a = _mm512_set1_epi16(1 << 15);
18406        let count = _mm_set1_epi16(2);
18407        let r = _mm512_sll_epi16(a, count);
18408        let e = _mm512_set1_epi16(0);
18409        assert_eq_m512i(r, e);
18410    }
18411
18412    #[simd_test(enable = "avx512bw")]
18413    fn test_mm512_mask_sll_epi16() {
18414        let a = _mm512_set1_epi16(1 << 15);
18415        let count = _mm_set1_epi16(2);
18416        let r = _mm512_mask_sll_epi16(a, 0, a, count);
18417        assert_eq_m512i(r, a);
18418        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18419        let e = _mm512_set1_epi16(0);
18420        assert_eq_m512i(r, e);
18421    }
18422
18423    #[simd_test(enable = "avx512bw")]
18424    fn test_mm512_maskz_sll_epi16() {
18425        let a = _mm512_set1_epi16(1 << 15);
18426        let count = _mm_set1_epi16(2);
18427        let r = _mm512_maskz_sll_epi16(0, a, count);
18428        assert_eq_m512i(r, _mm512_setzero_si512());
18429        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
18430        let e = _mm512_set1_epi16(0);
18431        assert_eq_m512i(r, e);
18432    }
18433
18434    #[simd_test(enable = "avx512bw,avx512vl")]
18435    fn test_mm256_mask_sll_epi16() {
18436        let a = _mm256_set1_epi16(1 << 15);
18437        let count = _mm_set1_epi16(2);
18438        let r = _mm256_mask_sll_epi16(a, 0, a, count);
18439        assert_eq_m256i(r, a);
18440        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
18441        let e = _mm256_set1_epi16(0);
18442        assert_eq_m256i(r, e);
18443    }
18444
18445    #[simd_test(enable = "avx512bw,avx512vl")]
18446    fn test_mm256_maskz_sll_epi16() {
18447        let a = _mm256_set1_epi16(1 << 15);
18448        let count = _mm_set1_epi16(2);
18449        let r = _mm256_maskz_sll_epi16(0, a, count);
18450        assert_eq_m256i(r, _mm256_setzero_si256());
18451        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
18452        let e = _mm256_set1_epi16(0);
18453        assert_eq_m256i(r, e);
18454    }
18455
18456    #[simd_test(enable = "avx512bw,avx512vl")]
18457    fn test_mm_mask_sll_epi16() {
18458        let a = _mm_set1_epi16(1 << 15);
18459        let count = _mm_set1_epi16(2);
18460        let r = _mm_mask_sll_epi16(a, 0, a, count);
18461        assert_eq_m128i(r, a);
18462        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
18463        let e = _mm_set1_epi16(0);
18464        assert_eq_m128i(r, e);
18465    }
18466
18467    #[simd_test(enable = "avx512bw,avx512vl")]
18468    fn test_mm_maskz_sll_epi16() {
18469        let a = _mm_set1_epi16(1 << 15);
18470        let count = _mm_set1_epi16(2);
18471        let r = _mm_maskz_sll_epi16(0, a, count);
18472        assert_eq_m128i(r, _mm_setzero_si128());
18473        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
18474        let e = _mm_set1_epi16(0);
18475        assert_eq_m128i(r, e);
18476    }
18477
18478    #[simd_test(enable = "avx512bw")]
18479    const fn test_mm512_slli_epi16() {
18480        let a = _mm512_set1_epi16(1 << 15);
18481        let r = _mm512_slli_epi16::<1>(a);
18482        let e = _mm512_set1_epi16(0);
18483        assert_eq_m512i(r, e);
18484    }
18485
18486    #[simd_test(enable = "avx512bw")]
18487    const fn test_mm512_mask_slli_epi16() {
18488        let a = _mm512_set1_epi16(1 << 15);
18489        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
18490        assert_eq_m512i(r, a);
18491        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
18492        let e = _mm512_set1_epi16(0);
18493        assert_eq_m512i(r, e);
18494    }
18495
18496    #[simd_test(enable = "avx512bw")]
18497    const fn test_mm512_maskz_slli_epi16() {
18498        let a = _mm512_set1_epi16(1 << 15);
18499        let r = _mm512_maskz_slli_epi16::<1>(0, a);
18500        assert_eq_m512i(r, _mm512_setzero_si512());
18501        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
18502        let e = _mm512_set1_epi16(0);
18503        assert_eq_m512i(r, e);
18504    }
18505
18506    #[simd_test(enable = "avx512bw,avx512vl")]
18507    const fn test_mm256_mask_slli_epi16() {
18508        let a = _mm256_set1_epi16(1 << 15);
18509        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
18510        assert_eq_m256i(r, a);
18511        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
18512        let e = _mm256_set1_epi16(0);
18513        assert_eq_m256i(r, e);
18514    }
18515
18516    #[simd_test(enable = "avx512bw,avx512vl")]
18517    const fn test_mm256_maskz_slli_epi16() {
18518        let a = _mm256_set1_epi16(1 << 15);
18519        let r = _mm256_maskz_slli_epi16::<1>(0, a);
18520        assert_eq_m256i(r, _mm256_setzero_si256());
18521        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
18522        let e = _mm256_set1_epi16(0);
18523        assert_eq_m256i(r, e);
18524    }
18525
18526    #[simd_test(enable = "avx512bw,avx512vl")]
18527    const fn test_mm_mask_slli_epi16() {
18528        let a = _mm_set1_epi16(1 << 15);
18529        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
18530        assert_eq_m128i(r, a);
18531        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
18532        let e = _mm_set1_epi16(0);
18533        assert_eq_m128i(r, e);
18534    }
18535
18536    #[simd_test(enable = "avx512bw,avx512vl")]
18537    const fn test_mm_maskz_slli_epi16() {
18538        let a = _mm_set1_epi16(1 << 15);
18539        let r = _mm_maskz_slli_epi16::<1>(0, a);
18540        assert_eq_m128i(r, _mm_setzero_si128());
18541        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
18542        let e = _mm_set1_epi16(0);
18543        assert_eq_m128i(r, e);
18544    }
18545
18546    #[simd_test(enable = "avx512bw")]
18547    const fn test_mm512_sllv_epi16() {
18548        let a = _mm512_set1_epi16(1 << 15);
18549        let count = _mm512_set1_epi16(2);
18550        let r = _mm512_sllv_epi16(a, count);
18551        let e = _mm512_set1_epi16(0);
18552        assert_eq_m512i(r, e);
18553    }
18554
18555    #[simd_test(enable = "avx512bw")]
18556    const fn test_mm512_mask_sllv_epi16() {
18557        let a = _mm512_set1_epi16(1 << 15);
18558        let count = _mm512_set1_epi16(2);
18559        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
18560        assert_eq_m512i(r, a);
18561        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18562        let e = _mm512_set1_epi16(0);
18563        assert_eq_m512i(r, e);
18564    }
18565
18566    #[simd_test(enable = "avx512bw")]
18567    const fn test_mm512_maskz_sllv_epi16() {
18568        let a = _mm512_set1_epi16(1 << 15);
18569        let count = _mm512_set1_epi16(2);
18570        let r = _mm512_maskz_sllv_epi16(0, a, count);
18571        assert_eq_m512i(r, _mm512_setzero_si512());
18572        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18573        let e = _mm512_set1_epi16(0);
18574        assert_eq_m512i(r, e);
18575    }
18576
18577    #[simd_test(enable = "avx512bw,avx512vl")]
18578    const fn test_mm256_sllv_epi16() {
18579        let a = _mm256_set1_epi16(1 << 15);
18580        let count = _mm256_set1_epi16(2);
18581        let r = _mm256_sllv_epi16(a, count);
18582        let e = _mm256_set1_epi16(0);
18583        assert_eq_m256i(r, e);
18584    }
18585
18586    #[simd_test(enable = "avx512bw,avx512vl")]
18587    const fn test_mm256_mask_sllv_epi16() {
18588        let a = _mm256_set1_epi16(1 << 15);
18589        let count = _mm256_set1_epi16(2);
18590        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
18591        assert_eq_m256i(r, a);
18592        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
18593        let e = _mm256_set1_epi16(0);
18594        assert_eq_m256i(r, e);
18595    }
18596
18597    #[simd_test(enable = "avx512bw,avx512vl")]
18598    const fn test_mm256_maskz_sllv_epi16() {
18599        let a = _mm256_set1_epi16(1 << 15);
18600        let count = _mm256_set1_epi16(2);
18601        let r = _mm256_maskz_sllv_epi16(0, a, count);
18602        assert_eq_m256i(r, _mm256_setzero_si256());
18603        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
18604        let e = _mm256_set1_epi16(0);
18605        assert_eq_m256i(r, e);
18606    }
18607
18608    #[simd_test(enable = "avx512bw,avx512vl")]
18609    const fn test_mm_sllv_epi16() {
18610        let a = _mm_set1_epi16(1 << 15);
18611        let count = _mm_set1_epi16(2);
18612        let r = _mm_sllv_epi16(a, count);
18613        let e = _mm_set1_epi16(0);
18614        assert_eq_m128i(r, e);
18615    }
18616
18617    #[simd_test(enable = "avx512bw,avx512vl")]
18618    const fn test_mm_mask_sllv_epi16() {
18619        let a = _mm_set1_epi16(1 << 15);
18620        let count = _mm_set1_epi16(2);
18621        let r = _mm_mask_sllv_epi16(a, 0, a, count);
18622        assert_eq_m128i(r, a);
18623        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
18624        let e = _mm_set1_epi16(0);
18625        assert_eq_m128i(r, e);
18626    }
18627
18628    #[simd_test(enable = "avx512bw,avx512vl")]
18629    const fn test_mm_maskz_sllv_epi16() {
18630        let a = _mm_set1_epi16(1 << 15);
18631        let count = _mm_set1_epi16(2);
18632        let r = _mm_maskz_sllv_epi16(0, a, count);
18633        assert_eq_m128i(r, _mm_setzero_si128());
18634        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
18635        let e = _mm_set1_epi16(0);
18636        assert_eq_m128i(r, e);
18637    }
18638
18639    #[simd_test(enable = "avx512bw")]
18640    fn test_mm512_srl_epi16() {
18641        let a = _mm512_set1_epi16(1 << 1);
18642        let count = _mm_set1_epi16(2);
18643        let r = _mm512_srl_epi16(a, count);
18644        let e = _mm512_set1_epi16(0);
18645        assert_eq_m512i(r, e);
18646    }
18647
18648    #[simd_test(enable = "avx512bw")]
18649    fn test_mm512_mask_srl_epi16() {
18650        let a = _mm512_set1_epi16(1 << 1);
18651        let count = _mm_set1_epi16(2);
18652        let r = _mm512_mask_srl_epi16(a, 0, a, count);
18653        assert_eq_m512i(r, a);
18654        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18655        let e = _mm512_set1_epi16(0);
18656        assert_eq_m512i(r, e);
18657    }
18658
18659    #[simd_test(enable = "avx512bw")]
18660    fn test_mm512_maskz_srl_epi16() {
18661        let a = _mm512_set1_epi16(1 << 1);
18662        let count = _mm_set1_epi16(2);
18663        let r = _mm512_maskz_srl_epi16(0, a, count);
18664        assert_eq_m512i(r, _mm512_setzero_si512());
18665        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
18666        let e = _mm512_set1_epi16(0);
18667        assert_eq_m512i(r, e);
18668    }
18669
18670    #[simd_test(enable = "avx512bw,avx512vl")]
18671    fn test_mm256_mask_srl_epi16() {
18672        let a = _mm256_set1_epi16(1 << 1);
18673        let count = _mm_set1_epi16(2);
18674        let r = _mm256_mask_srl_epi16(a, 0, a, count);
18675        assert_eq_m256i(r, a);
18676        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
18677        let e = _mm256_set1_epi16(0);
18678        assert_eq_m256i(r, e);
18679    }
18680
18681    #[simd_test(enable = "avx512bw,avx512vl")]
18682    fn test_mm256_maskz_srl_epi16() {
18683        let a = _mm256_set1_epi16(1 << 1);
18684        let count = _mm_set1_epi16(2);
18685        let r = _mm256_maskz_srl_epi16(0, a, count);
18686        assert_eq_m256i(r, _mm256_setzero_si256());
18687        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
18688        let e = _mm256_set1_epi16(0);
18689        assert_eq_m256i(r, e);
18690    }
18691
18692    #[simd_test(enable = "avx512bw,avx512vl")]
18693    fn test_mm_mask_srl_epi16() {
18694        let a = _mm_set1_epi16(1 << 1);
18695        let count = _mm_set1_epi16(2);
18696        let r = _mm_mask_srl_epi16(a, 0, a, count);
18697        assert_eq_m128i(r, a);
18698        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
18699        let e = _mm_set1_epi16(0);
18700        assert_eq_m128i(r, e);
18701    }
18702
18703    #[simd_test(enable = "avx512bw,avx512vl")]
18704    fn test_mm_maskz_srl_epi16() {
18705        let a = _mm_set1_epi16(1 << 1);
18706        let count = _mm_set1_epi16(2);
18707        let r = _mm_maskz_srl_epi16(0, a, count);
18708        assert_eq_m128i(r, _mm_setzero_si128());
18709        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
18710        let e = _mm_set1_epi16(0);
18711        assert_eq_m128i(r, e);
18712    }
18713
18714    #[simd_test(enable = "avx512bw")]
18715    const fn test_mm512_srli_epi16() {
18716        let a = _mm512_set1_epi16(1 << 1);
18717        let r = _mm512_srli_epi16::<2>(a);
18718        let e = _mm512_set1_epi16(0);
18719        assert_eq_m512i(r, e);
18720    }
18721
18722    #[simd_test(enable = "avx512bw")]
18723    const fn test_mm512_mask_srli_epi16() {
18724        let a = _mm512_set1_epi16(1 << 1);
18725        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
18726        assert_eq_m512i(r, a);
18727        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18728        let e = _mm512_set1_epi16(0);
18729        assert_eq_m512i(r, e);
18730    }
18731
18732    #[simd_test(enable = "avx512bw")]
18733    const fn test_mm512_maskz_srli_epi16() {
18734        let a = _mm512_set1_epi16(1 << 1);
18735        let r = _mm512_maskz_srli_epi16::<2>(0, a);
18736        assert_eq_m512i(r, _mm512_setzero_si512());
18737        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18738        let e = _mm512_set1_epi16(0);
18739        assert_eq_m512i(r, e);
18740    }
18741
18742    #[simd_test(enable = "avx512bw,avx512vl")]
18743    const fn test_mm256_mask_srli_epi16() {
18744        let a = _mm256_set1_epi16(1 << 1);
18745        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
18746        assert_eq_m256i(r, a);
18747        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
18748        let e = _mm256_set1_epi16(0);
18749        assert_eq_m256i(r, e);
18750    }
18751
18752    #[simd_test(enable = "avx512bw,avx512vl")]
18753    const fn test_mm256_maskz_srli_epi16() {
18754        let a = _mm256_set1_epi16(1 << 1);
18755        let r = _mm256_maskz_srli_epi16::<2>(0, a);
18756        assert_eq_m256i(r, _mm256_setzero_si256());
18757        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
18758        let e = _mm256_set1_epi16(0);
18759        assert_eq_m256i(r, e);
18760    }
18761
18762    #[simd_test(enable = "avx512bw,avx512vl")]
18763    const fn test_mm_mask_srli_epi16() {
18764        let a = _mm_set1_epi16(1 << 1);
18765        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
18766        assert_eq_m128i(r, a);
18767        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
18768        let e = _mm_set1_epi16(0);
18769        assert_eq_m128i(r, e);
18770    }
18771
18772    #[simd_test(enable = "avx512bw,avx512vl")]
18773    const fn test_mm_maskz_srli_epi16() {
18774        let a = _mm_set1_epi16(1 << 1);
18775        let r = _mm_maskz_srli_epi16::<2>(0, a);
18776        assert_eq_m128i(r, _mm_setzero_si128());
18777        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
18778        let e = _mm_set1_epi16(0);
18779        assert_eq_m128i(r, e);
18780    }
18781
18782    #[simd_test(enable = "avx512bw")]
18783    const fn test_mm512_srlv_epi16() {
18784        let a = _mm512_set1_epi16(1 << 1);
18785        let count = _mm512_set1_epi16(2);
18786        let r = _mm512_srlv_epi16(a, count);
18787        let e = _mm512_set1_epi16(0);
18788        assert_eq_m512i(r, e);
18789    }
18790
18791    #[simd_test(enable = "avx512bw")]
18792    const fn test_mm512_mask_srlv_epi16() {
18793        let a = _mm512_set1_epi16(1 << 1);
18794        let count = _mm512_set1_epi16(2);
18795        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
18796        assert_eq_m512i(r, a);
18797        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18798        let e = _mm512_set1_epi16(0);
18799        assert_eq_m512i(r, e);
18800    }
18801
18802    #[simd_test(enable = "avx512bw")]
18803    const fn test_mm512_maskz_srlv_epi16() {
18804        let a = _mm512_set1_epi16(1 << 1);
18805        let count = _mm512_set1_epi16(2);
18806        let r = _mm512_maskz_srlv_epi16(0, a, count);
18807        assert_eq_m512i(r, _mm512_setzero_si512());
18808        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18809        let e = _mm512_set1_epi16(0);
18810        assert_eq_m512i(r, e);
18811    }
18812
18813    #[simd_test(enable = "avx512bw,avx512vl")]
18814    const fn test_mm256_srlv_epi16() {
18815        let a = _mm256_set1_epi16(1 << 1);
18816        let count = _mm256_set1_epi16(2);
18817        let r = _mm256_srlv_epi16(a, count);
18818        let e = _mm256_set1_epi16(0);
18819        assert_eq_m256i(r, e);
18820    }
18821
18822    #[simd_test(enable = "avx512bw,avx512vl")]
18823    const fn test_mm256_mask_srlv_epi16() {
18824        let a = _mm256_set1_epi16(1 << 1);
18825        let count = _mm256_set1_epi16(2);
18826        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
18827        assert_eq_m256i(r, a);
18828        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
18829        let e = _mm256_set1_epi16(0);
18830        assert_eq_m256i(r, e);
18831    }
18832
18833    #[simd_test(enable = "avx512bw,avx512vl")]
18834    const fn test_mm256_maskz_srlv_epi16() {
18835        let a = _mm256_set1_epi16(1 << 1);
18836        let count = _mm256_set1_epi16(2);
18837        let r = _mm256_maskz_srlv_epi16(0, a, count);
18838        assert_eq_m256i(r, _mm256_setzero_si256());
18839        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
18840        let e = _mm256_set1_epi16(0);
18841        assert_eq_m256i(r, e);
18842    }
18843
18844    #[simd_test(enable = "avx512bw,avx512vl")]
18845    const fn test_mm_srlv_epi16() {
18846        let a = _mm_set1_epi16(1 << 1);
18847        let count = _mm_set1_epi16(2);
18848        let r = _mm_srlv_epi16(a, count);
18849        let e = _mm_set1_epi16(0);
18850        assert_eq_m128i(r, e);
18851    }
18852
18853    #[simd_test(enable = "avx512bw,avx512vl")]
18854    const fn test_mm_mask_srlv_epi16() {
18855        let a = _mm_set1_epi16(1 << 1);
18856        let count = _mm_set1_epi16(2);
18857        let r = _mm_mask_srlv_epi16(a, 0, a, count);
18858        assert_eq_m128i(r, a);
18859        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
18860        let e = _mm_set1_epi16(0);
18861        assert_eq_m128i(r, e);
18862    }
18863
18864    #[simd_test(enable = "avx512bw,avx512vl")]
18865    const fn test_mm_maskz_srlv_epi16() {
18866        let a = _mm_set1_epi16(1 << 1);
18867        let count = _mm_set1_epi16(2);
18868        let r = _mm_maskz_srlv_epi16(0, a, count);
18869        assert_eq_m128i(r, _mm_setzero_si128());
18870        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
18871        let e = _mm_set1_epi16(0);
18872        assert_eq_m128i(r, e);
18873    }
18874
18875    #[simd_test(enable = "avx512bw")]
18876    fn test_mm512_sra_epi16() {
18877        let a = _mm512_set1_epi16(8);
18878        let count = _mm_set1_epi16(1);
18879        let r = _mm512_sra_epi16(a, count);
18880        let e = _mm512_set1_epi16(0);
18881        assert_eq_m512i(r, e);
18882    }
18883
18884    #[simd_test(enable = "avx512bw")]
18885    fn test_mm512_mask_sra_epi16() {
18886        let a = _mm512_set1_epi16(8);
18887        let count = _mm_set1_epi16(1);
18888        let r = _mm512_mask_sra_epi16(a, 0, a, count);
18889        assert_eq_m512i(r, a);
18890        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18891        let e = _mm512_set1_epi16(0);
18892        assert_eq_m512i(r, e);
18893    }
18894
18895    #[simd_test(enable = "avx512bw")]
18896    fn test_mm512_maskz_sra_epi16() {
18897        let a = _mm512_set1_epi16(8);
18898        let count = _mm_set1_epi16(1);
18899        let r = _mm512_maskz_sra_epi16(0, a, count);
18900        assert_eq_m512i(r, _mm512_setzero_si512());
18901        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
18902        let e = _mm512_set1_epi16(0);
18903        assert_eq_m512i(r, e);
18904    }
18905
18906    #[simd_test(enable = "avx512bw,avx512vl")]
18907    fn test_mm256_mask_sra_epi16() {
18908        let a = _mm256_set1_epi16(8);
18909        let count = _mm_set1_epi16(1);
18910        let r = _mm256_mask_sra_epi16(a, 0, a, count);
18911        assert_eq_m256i(r, a);
18912        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
18913        let e = _mm256_set1_epi16(0);
18914        assert_eq_m256i(r, e);
18915    }
18916
18917    #[simd_test(enable = "avx512bw,avx512vl")]
18918    fn test_mm256_maskz_sra_epi16() {
18919        let a = _mm256_set1_epi16(8);
18920        let count = _mm_set1_epi16(1);
18921        let r = _mm256_maskz_sra_epi16(0, a, count);
18922        assert_eq_m256i(r, _mm256_setzero_si256());
18923        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
18924        let e = _mm256_set1_epi16(0);
18925        assert_eq_m256i(r, e);
18926    }
18927
18928    #[simd_test(enable = "avx512bw,avx512vl")]
18929    fn test_mm_mask_sra_epi16() {
18930        let a = _mm_set1_epi16(8);
18931        let count = _mm_set1_epi16(1);
18932        let r = _mm_mask_sra_epi16(a, 0, a, count);
18933        assert_eq_m128i(r, a);
18934        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
18935        let e = _mm_set1_epi16(0);
18936        assert_eq_m128i(r, e);
18937    }
18938
18939    #[simd_test(enable = "avx512bw,avx512vl")]
18940    fn test_mm_maskz_sra_epi16() {
18941        let a = _mm_set1_epi16(8);
18942        let count = _mm_set1_epi16(1);
18943        let r = _mm_maskz_sra_epi16(0, a, count);
18944        assert_eq_m128i(r, _mm_setzero_si128());
18945        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
18946        let e = _mm_set1_epi16(0);
18947        assert_eq_m128i(r, e);
18948    }
18949
18950    #[simd_test(enable = "avx512bw")]
18951    const fn test_mm512_srai_epi16() {
18952        let a = _mm512_set1_epi16(8);
18953        let r = _mm512_srai_epi16::<2>(a);
18954        let e = _mm512_set1_epi16(2);
18955        assert_eq_m512i(r, e);
18956    }
18957
18958    #[simd_test(enable = "avx512bw")]
18959    const fn test_mm512_mask_srai_epi16() {
18960        let a = _mm512_set1_epi16(8);
18961        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
18962        assert_eq_m512i(r, a);
18963        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18964        let e = _mm512_set1_epi16(2);
18965        assert_eq_m512i(r, e);
18966    }
18967
18968    #[simd_test(enable = "avx512bw")]
18969    const fn test_mm512_maskz_srai_epi16() {
18970        let a = _mm512_set1_epi16(8);
18971        let r = _mm512_maskz_srai_epi16::<2>(0, a);
18972        assert_eq_m512i(r, _mm512_setzero_si512());
18973        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18974        let e = _mm512_set1_epi16(2);
18975        assert_eq_m512i(r, e);
18976    }
18977
18978    #[simd_test(enable = "avx512bw,avx512vl")]
18979    const fn test_mm256_mask_srai_epi16() {
18980        let a = _mm256_set1_epi16(8);
18981        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
18982        assert_eq_m256i(r, a);
18983        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
18984        let e = _mm256_set1_epi16(2);
18985        assert_eq_m256i(r, e);
18986    }
18987
18988    #[simd_test(enable = "avx512bw,avx512vl")]
18989    const fn test_mm256_maskz_srai_epi16() {
18990        let a = _mm256_set1_epi16(8);
18991        let r = _mm256_maskz_srai_epi16::<2>(0, a);
18992        assert_eq_m256i(r, _mm256_setzero_si256());
18993        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
18994        let e = _mm256_set1_epi16(2);
18995        assert_eq_m256i(r, e);
18996    }
18997
18998    #[simd_test(enable = "avx512bw,avx512vl")]
18999    const fn test_mm_mask_srai_epi16() {
19000        let a = _mm_set1_epi16(8);
19001        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
19002        assert_eq_m128i(r, a);
19003        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
19004        let e = _mm_set1_epi16(2);
19005        assert_eq_m128i(r, e);
19006    }
19007
19008    #[simd_test(enable = "avx512bw,avx512vl")]
19009    const fn test_mm_maskz_srai_epi16() {
19010        let a = _mm_set1_epi16(8);
19011        let r = _mm_maskz_srai_epi16::<2>(0, a);
19012        assert_eq_m128i(r, _mm_setzero_si128());
19013        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
19014        let e = _mm_set1_epi16(2);
19015        assert_eq_m128i(r, e);
19016    }
19017
19018    #[simd_test(enable = "avx512bw")]
19019    const fn test_mm512_srav_epi16() {
19020        let a = _mm512_set1_epi16(8);
19021        let count = _mm512_set1_epi16(2);
19022        let r = _mm512_srav_epi16(a, count);
19023        let e = _mm512_set1_epi16(2);
19024        assert_eq_m512i(r, e);
19025    }
19026
19027    #[simd_test(enable = "avx512bw")]
19028    const fn test_mm512_mask_srav_epi16() {
19029        let a = _mm512_set1_epi16(8);
19030        let count = _mm512_set1_epi16(2);
19031        let r = _mm512_mask_srav_epi16(a, 0, a, count);
19032        assert_eq_m512i(r, a);
19033        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
19034        let e = _mm512_set1_epi16(2);
19035        assert_eq_m512i(r, e);
19036    }
19037
19038    #[simd_test(enable = "avx512bw")]
19039    const fn test_mm512_maskz_srav_epi16() {
19040        let a = _mm512_set1_epi16(8);
19041        let count = _mm512_set1_epi16(2);
19042        let r = _mm512_maskz_srav_epi16(0, a, count);
19043        assert_eq_m512i(r, _mm512_setzero_si512());
19044        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
19045        let e = _mm512_set1_epi16(2);
19046        assert_eq_m512i(r, e);
19047    }
19048
19049    #[simd_test(enable = "avx512bw,avx512vl")]
19050    const fn test_mm256_srav_epi16() {
19051        let a = _mm256_set1_epi16(8);
19052        let count = _mm256_set1_epi16(2);
19053        let r = _mm256_srav_epi16(a, count);
19054        let e = _mm256_set1_epi16(2);
19055        assert_eq_m256i(r, e);
19056    }
19057
19058    #[simd_test(enable = "avx512bw,avx512vl")]
19059    const fn test_mm256_mask_srav_epi16() {
19060        let a = _mm256_set1_epi16(8);
19061        let count = _mm256_set1_epi16(2);
19062        let r = _mm256_mask_srav_epi16(a, 0, a, count);
19063        assert_eq_m256i(r, a);
19064        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
19065        let e = _mm256_set1_epi16(2);
19066        assert_eq_m256i(r, e);
19067    }
19068
19069    #[simd_test(enable = "avx512bw,avx512vl")]
19070    const fn test_mm256_maskz_srav_epi16() {
19071        let a = _mm256_set1_epi16(8);
19072        let count = _mm256_set1_epi16(2);
19073        let r = _mm256_maskz_srav_epi16(0, a, count);
19074        assert_eq_m256i(r, _mm256_setzero_si256());
19075        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
19076        let e = _mm256_set1_epi16(2);
19077        assert_eq_m256i(r, e);
19078    }
19079
19080    #[simd_test(enable = "avx512bw,avx512vl")]
19081    const fn test_mm_srav_epi16() {
19082        let a = _mm_set1_epi16(8);
19083        let count = _mm_set1_epi16(2);
19084        let r = _mm_srav_epi16(a, count);
19085        let e = _mm_set1_epi16(2);
19086        assert_eq_m128i(r, e);
19087    }
19088
19089    #[simd_test(enable = "avx512bw,avx512vl")]
19090    const fn test_mm_mask_srav_epi16() {
19091        let a = _mm_set1_epi16(8);
19092        let count = _mm_set1_epi16(2);
19093        let r = _mm_mask_srav_epi16(a, 0, a, count);
19094        assert_eq_m128i(r, a);
19095        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
19096        let e = _mm_set1_epi16(2);
19097        assert_eq_m128i(r, e);
19098    }
19099
19100    #[simd_test(enable = "avx512bw,avx512vl")]
19101    const fn test_mm_maskz_srav_epi16() {
19102        let a = _mm_set1_epi16(8);
19103        let count = _mm_set1_epi16(2);
19104        let r = _mm_maskz_srav_epi16(0, a, count);
19105        assert_eq_m128i(r, _mm_setzero_si128());
19106        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
19107        let e = _mm_set1_epi16(2);
19108        assert_eq_m128i(r, e);
19109    }
19110
19111    #[simd_test(enable = "avx512bw")]
19112    fn test_mm512_permutex2var_epi16() {
19113        #[rustfmt::skip]
19114        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19115                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19116        #[rustfmt::skip]
19117        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
19118                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
19119        let b = _mm512_set1_epi16(100);
19120        let r = _mm512_permutex2var_epi16(a, idx, b);
19121        #[rustfmt::skip]
19122        let e = _mm512_set_epi16(
19123            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
19124            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
19125        );
19126        assert_eq_m512i(r, e);
19127    }
19128
19129    #[simd_test(enable = "avx512bw")]
19130    fn test_mm512_mask_permutex2var_epi16() {
19131        #[rustfmt::skip]
19132        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19133                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19134        #[rustfmt::skip]
19135        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
19136                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
19137        let b = _mm512_set1_epi16(100);
19138        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
19139        assert_eq_m512i(r, a);
19140        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
19141        #[rustfmt::skip]
19142        let e = _mm512_set_epi16(
19143            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
19144            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
19145        );
19146        assert_eq_m512i(r, e);
19147    }
19148
19149    #[simd_test(enable = "avx512bw")]
19150    fn test_mm512_maskz_permutex2var_epi16() {
19151        #[rustfmt::skip]
19152        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19153                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19154        #[rustfmt::skip]
19155        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
19156                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
19157        let b = _mm512_set1_epi16(100);
19158        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
19159        assert_eq_m512i(r, _mm512_setzero_si512());
19160        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
19161        #[rustfmt::skip]
19162        let e = _mm512_set_epi16(
19163            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
19164            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
19165        );
19166        assert_eq_m512i(r, e);
19167    }
19168
19169    #[simd_test(enable = "avx512bw")]
19170    fn test_mm512_mask2_permutex2var_epi16() {
19171        #[rustfmt::skip]
19172        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19173                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19174        #[rustfmt::skip]
19175        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
19176                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
19177        let b = _mm512_set1_epi16(100);
19178        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
19179        assert_eq_m512i(r, idx);
19180        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
19181        #[rustfmt::skip]
19182        let e = _mm512_set_epi16(
19183            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
19184            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
19185        );
19186        assert_eq_m512i(r, e);
19187    }
19188
19189    #[simd_test(enable = "avx512bw,avx512vl")]
19190    fn test_mm256_permutex2var_epi16() {
19191        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19192        #[rustfmt::skip]
19193        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19194        let b = _mm256_set1_epi16(100);
19195        let r = _mm256_permutex2var_epi16(a, idx, b);
19196        let e = _mm256_set_epi16(
19197            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19198        );
19199        assert_eq_m256i(r, e);
19200    }
19201
19202    #[simd_test(enable = "avx512bw,avx512vl")]
19203    fn test_mm256_mask_permutex2var_epi16() {
19204        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19205        #[rustfmt::skip]
19206        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19207        let b = _mm256_set1_epi16(100);
19208        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
19209        assert_eq_m256i(r, a);
19210        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
19211        let e = _mm256_set_epi16(
19212            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19213        );
19214        assert_eq_m256i(r, e);
19215    }
19216
19217    #[simd_test(enable = "avx512bw,avx512vl")]
19218    fn test_mm256_maskz_permutex2var_epi16() {
19219        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19220        #[rustfmt::skip]
19221        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19222        let b = _mm256_set1_epi16(100);
19223        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
19224        assert_eq_m256i(r, _mm256_setzero_si256());
19225        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
19226        let e = _mm256_set_epi16(
19227            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19228        );
19229        assert_eq_m256i(r, e);
19230    }
19231
19232    #[simd_test(enable = "avx512bw,avx512vl")]
19233    fn test_mm256_mask2_permutex2var_epi16() {
19234        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19235        #[rustfmt::skip]
19236        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19237        let b = _mm256_set1_epi16(100);
19238        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
19239        assert_eq_m256i(r, idx);
19240        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
19241        #[rustfmt::skip]
19242        let e = _mm256_set_epi16(
19243            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19244        );
19245        assert_eq_m256i(r, e);
19246    }
19247
19248    #[simd_test(enable = "avx512bw,avx512vl")]
19249    fn test_mm_permutex2var_epi16() {
19250        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19251        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19252        let b = _mm_set1_epi16(100);
19253        let r = _mm_permutex2var_epi16(a, idx, b);
19254        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19255        assert_eq_m128i(r, e);
19256    }
19257
19258    #[simd_test(enable = "avx512bw,avx512vl")]
19259    fn test_mm_mask_permutex2var_epi16() {
19260        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19261        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19262        let b = _mm_set1_epi16(100);
19263        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
19264        assert_eq_m128i(r, a);
19265        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
19266        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19267        assert_eq_m128i(r, e);
19268    }
19269
19270    #[simd_test(enable = "avx512bw,avx512vl")]
19271    fn test_mm_maskz_permutex2var_epi16() {
19272        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19273        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19274        let b = _mm_set1_epi16(100);
19275        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
19276        assert_eq_m128i(r, _mm_setzero_si128());
19277        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
19278        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19279        assert_eq_m128i(r, e);
19280    }
19281
19282    #[simd_test(enable = "avx512bw,avx512vl")]
19283    fn test_mm_mask2_permutex2var_epi16() {
19284        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19285        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19286        let b = _mm_set1_epi16(100);
19287        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
19288        assert_eq_m128i(r, idx);
19289        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
19290        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19291        assert_eq_m128i(r, e);
19292    }
19293
19294    #[simd_test(enable = "avx512bw")]
19295    fn test_mm512_permutexvar_epi16() {
19296        let idx = _mm512_set1_epi16(1);
19297        #[rustfmt::skip]
19298        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19299                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19300        let r = _mm512_permutexvar_epi16(idx, a);
19301        let e = _mm512_set1_epi16(30);
19302        assert_eq_m512i(r, e);
19303    }
19304
19305    #[simd_test(enable = "avx512bw")]
19306    fn test_mm512_mask_permutexvar_epi16() {
19307        let idx = _mm512_set1_epi16(1);
19308        #[rustfmt::skip]
19309        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19310                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19311        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
19312        assert_eq_m512i(r, a);
19313        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
19314        let e = _mm512_set1_epi16(30);
19315        assert_eq_m512i(r, e);
19316    }
19317
19318    #[simd_test(enable = "avx512bw")]
19319    fn test_mm512_maskz_permutexvar_epi16() {
19320        let idx = _mm512_set1_epi16(1);
19321        #[rustfmt::skip]
19322        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19323                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19324        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
19325        assert_eq_m512i(r, _mm512_setzero_si512());
19326        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
19327        let e = _mm512_set1_epi16(30);
19328        assert_eq_m512i(r, e);
19329    }
19330
19331    #[simd_test(enable = "avx512bw,avx512vl")]
19332    fn test_mm256_permutexvar_epi16() {
19333        let idx = _mm256_set1_epi16(1);
19334        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19335        let r = _mm256_permutexvar_epi16(idx, a);
19336        let e = _mm256_set1_epi16(14);
19337        assert_eq_m256i(r, e);
19338    }
19339
19340    #[simd_test(enable = "avx512bw,avx512vl")]
19341    fn test_mm256_mask_permutexvar_epi16() {
19342        let idx = _mm256_set1_epi16(1);
19343        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19344        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
19345        assert_eq_m256i(r, a);
19346        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
19347        let e = _mm256_set1_epi16(14);
19348        assert_eq_m256i(r, e);
19349    }
19350
19351    #[simd_test(enable = "avx512bw,avx512vl")]
19352    fn test_mm256_maskz_permutexvar_epi16() {
19353        let idx = _mm256_set1_epi16(1);
19354        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19355        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
19356        assert_eq_m256i(r, _mm256_setzero_si256());
19357        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
19358        let e = _mm256_set1_epi16(14);
19359        assert_eq_m256i(r, e);
19360    }
19361
19362    #[simd_test(enable = "avx512bw,avx512vl")]
19363    fn test_mm_permutexvar_epi16() {
19364        let idx = _mm_set1_epi16(1);
19365        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19366        let r = _mm_permutexvar_epi16(idx, a);
19367        let e = _mm_set1_epi16(6);
19368        assert_eq_m128i(r, e);
19369    }
19370
19371    #[simd_test(enable = "avx512bw,avx512vl")]
19372    fn test_mm_mask_permutexvar_epi16() {
19373        let idx = _mm_set1_epi16(1);
19374        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19375        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
19376        assert_eq_m128i(r, a);
19377        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
19378        let e = _mm_set1_epi16(6);
19379        assert_eq_m128i(r, e);
19380    }
19381
19382    #[simd_test(enable = "avx512bw,avx512vl")]
19383    fn test_mm_maskz_permutexvar_epi16() {
19384        let idx = _mm_set1_epi16(1);
19385        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19386        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
19387        assert_eq_m128i(r, _mm_setzero_si128());
19388        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
19389        let e = _mm_set1_epi16(6);
19390        assert_eq_m128i(r, e);
19391    }
19392
19393    #[simd_test(enable = "avx512bw")]
19394    const fn test_mm512_mask_blend_epi16() {
19395        let a = _mm512_set1_epi16(1);
19396        let b = _mm512_set1_epi16(2);
19397        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
19398        #[rustfmt::skip]
19399        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19400                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19401        assert_eq_m512i(r, e);
19402    }
19403
19404    #[simd_test(enable = "avx512bw,avx512vl")]
19405    const fn test_mm256_mask_blend_epi16() {
19406        let a = _mm256_set1_epi16(1);
19407        let b = _mm256_set1_epi16(2);
19408        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
19409        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19410        assert_eq_m256i(r, e);
19411    }
19412
19413    #[simd_test(enable = "avx512bw,avx512vl")]
19414    const fn test_mm_mask_blend_epi16() {
19415        let a = _mm_set1_epi16(1);
19416        let b = _mm_set1_epi16(2);
19417        let r = _mm_mask_blend_epi16(0b11110000, a, b);
19418        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
19419        assert_eq_m128i(r, e);
19420    }
19421
19422    #[simd_test(enable = "avx512bw")]
19423    const fn test_mm512_mask_blend_epi8() {
19424        let a = _mm512_set1_epi8(1);
19425        let b = _mm512_set1_epi8(2);
19426        let r = _mm512_mask_blend_epi8(
19427            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
19428            a,
19429            b,
19430        );
19431        #[rustfmt::skip]
19432        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19433                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19434                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19435                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19436        assert_eq_m512i(r, e);
19437    }
19438
19439    #[simd_test(enable = "avx512bw,avx512vl")]
19440    const fn test_mm256_mask_blend_epi8() {
19441        let a = _mm256_set1_epi8(1);
19442        let b = _mm256_set1_epi8(2);
19443        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
19444        #[rustfmt::skip]
19445        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19446                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19447        assert_eq_m256i(r, e);
19448    }
19449
19450    #[simd_test(enable = "avx512bw,avx512vl")]
19451    const fn test_mm_mask_blend_epi8() {
19452        let a = _mm_set1_epi8(1);
19453        let b = _mm_set1_epi8(2);
19454        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
19455        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19456        assert_eq_m128i(r, e);
19457    }
19458
19459    #[simd_test(enable = "avx512bw")]
19460    const fn test_mm512_broadcastw_epi16() {
19461        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19462        let r = _mm512_broadcastw_epi16(a);
19463        let e = _mm512_set1_epi16(24);
19464        assert_eq_m512i(r, e);
19465    }
19466
19467    #[simd_test(enable = "avx512bw")]
19468    const fn test_mm512_mask_broadcastw_epi16() {
19469        let src = _mm512_set1_epi16(1);
19470        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19471        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
19472        assert_eq_m512i(r, src);
19473        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19474        let e = _mm512_set1_epi16(24);
19475        assert_eq_m512i(r, e);
19476    }
19477
19478    #[simd_test(enable = "avx512bw")]
19479    const fn test_mm512_maskz_broadcastw_epi16() {
19480        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19481        let r = _mm512_maskz_broadcastw_epi16(0, a);
19482        assert_eq_m512i(r, _mm512_setzero_si512());
19483        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
19484        let e = _mm512_set1_epi16(24);
19485        assert_eq_m512i(r, e);
19486    }
19487
19488    #[simd_test(enable = "avx512bw,avx512vl")]
19489    const fn test_mm256_mask_broadcastw_epi16() {
19490        let src = _mm256_set1_epi16(1);
19491        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19492        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
19493        assert_eq_m256i(r, src);
19494        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
19495        let e = _mm256_set1_epi16(24);
19496        assert_eq_m256i(r, e);
19497    }
19498
19499    #[simd_test(enable = "avx512bw,avx512vl")]
19500    const fn test_mm256_maskz_broadcastw_epi16() {
19501        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19502        let r = _mm256_maskz_broadcastw_epi16(0, a);
19503        assert_eq_m256i(r, _mm256_setzero_si256());
19504        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
19505        let e = _mm256_set1_epi16(24);
19506        assert_eq_m256i(r, e);
19507    }
19508
19509    #[simd_test(enable = "avx512bw,avx512vl")]
19510    const fn test_mm_mask_broadcastw_epi16() {
19511        let src = _mm_set1_epi16(1);
19512        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19513        let r = _mm_mask_broadcastw_epi16(src, 0, a);
19514        assert_eq_m128i(r, src);
19515        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
19516        let e = _mm_set1_epi16(24);
19517        assert_eq_m128i(r, e);
19518    }
19519
19520    #[simd_test(enable = "avx512bw,avx512vl")]
19521    const fn test_mm_maskz_broadcastw_epi16() {
19522        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19523        let r = _mm_maskz_broadcastw_epi16(0, a);
19524        assert_eq_m128i(r, _mm_setzero_si128());
19525        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
19526        let e = _mm_set1_epi16(24);
19527        assert_eq_m128i(r, e);
19528    }
19529
19530    #[simd_test(enable = "avx512bw")]
19531    const fn test_mm512_broadcastb_epi8() {
19532        let a = _mm_set_epi8(
19533            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19534        );
19535        let r = _mm512_broadcastb_epi8(a);
19536        let e = _mm512_set1_epi8(32);
19537        assert_eq_m512i(r, e);
19538    }
19539
19540    #[simd_test(enable = "avx512bw")]
19541    const fn test_mm512_mask_broadcastb_epi8() {
19542        let src = _mm512_set1_epi8(1);
19543        let a = _mm_set_epi8(
19544            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19545        );
19546        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
19547        assert_eq_m512i(r, src);
19548        let r = _mm512_mask_broadcastb_epi8(
19549            src,
19550            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19551            a,
19552        );
19553        let e = _mm512_set1_epi8(32);
19554        assert_eq_m512i(r, e);
19555    }
19556
19557    #[simd_test(enable = "avx512bw")]
19558    const fn test_mm512_maskz_broadcastb_epi8() {
19559        let a = _mm_set_epi8(
19560            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19561        );
19562        let r = _mm512_maskz_broadcastb_epi8(0, a);
19563        assert_eq_m512i(r, _mm512_setzero_si512());
19564        let r = _mm512_maskz_broadcastb_epi8(
19565            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19566            a,
19567        );
19568        let e = _mm512_set1_epi8(32);
19569        assert_eq_m512i(r, e);
19570    }
19571
19572    #[simd_test(enable = "avx512bw,avx512vl")]
19573    const fn test_mm256_mask_broadcastb_epi8() {
19574        let src = _mm256_set1_epi8(1);
19575        let a = _mm_set_epi8(
19576            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19577        );
19578        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
19579        assert_eq_m256i(r, src);
19580        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19581        let e = _mm256_set1_epi8(32);
19582        assert_eq_m256i(r, e);
19583    }
19584
19585    #[simd_test(enable = "avx512bw,avx512vl")]
19586    const fn test_mm256_maskz_broadcastb_epi8() {
19587        let a = _mm_set_epi8(
19588            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19589        );
19590        let r = _mm256_maskz_broadcastb_epi8(0, a);
19591        assert_eq_m256i(r, _mm256_setzero_si256());
19592        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
19593        let e = _mm256_set1_epi8(32);
19594        assert_eq_m256i(r, e);
19595    }
19596
19597    #[simd_test(enable = "avx512bw,avx512vl")]
19598    const fn test_mm_mask_broadcastb_epi8() {
19599        let src = _mm_set1_epi8(1);
19600        let a = _mm_set_epi8(
19601            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19602        );
19603        let r = _mm_mask_broadcastb_epi8(src, 0, a);
19604        assert_eq_m128i(r, src);
19605        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
19606        let e = _mm_set1_epi8(32);
19607        assert_eq_m128i(r, e);
19608    }
19609
19610    #[simd_test(enable = "avx512bw,avx512vl")]
19611    const fn test_mm_maskz_broadcastb_epi8() {
19612        let a = _mm_set_epi8(
19613            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19614        );
19615        let r = _mm_maskz_broadcastb_epi8(0, a);
19616        assert_eq_m128i(r, _mm_setzero_si128());
19617        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
19618        let e = _mm_set1_epi8(32);
19619        assert_eq_m128i(r, e);
19620    }
19621
19622    #[simd_test(enable = "avx512bw")]
19623    const fn test_mm512_unpackhi_epi16() {
19624        #[rustfmt::skip]
19625        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19626                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19627        #[rustfmt::skip]
19628        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19629                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19630        let r = _mm512_unpackhi_epi16(a, b);
19631        #[rustfmt::skip]
19632        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19633                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19634        assert_eq_m512i(r, e);
19635    }
19636
19637    #[simd_test(enable = "avx512bw")]
19638    const fn test_mm512_mask_unpackhi_epi16() {
19639        #[rustfmt::skip]
19640        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19641                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19642        #[rustfmt::skip]
19643        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19644                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19645        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
19646        assert_eq_m512i(r, a);
19647        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19648        #[rustfmt::skip]
19649        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19650                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19651        assert_eq_m512i(r, e);
19652    }
19653
19654    #[simd_test(enable = "avx512bw")]
19655    const fn test_mm512_maskz_unpackhi_epi16() {
19656        #[rustfmt::skip]
19657        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19658                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19659        #[rustfmt::skip]
19660        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19661                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19662        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
19663        assert_eq_m512i(r, _mm512_setzero_si512());
19664        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
19665        #[rustfmt::skip]
19666        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19667                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19668        assert_eq_m512i(r, e);
19669    }
19670
19671    #[simd_test(enable = "avx512bw,avx512vl")]
19672    const fn test_mm256_mask_unpackhi_epi16() {
19673        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19674        let b = _mm256_set_epi16(
19675            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19676        );
19677        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
19678        assert_eq_m256i(r, a);
19679        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
19680        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19681        assert_eq_m256i(r, e);
19682    }
19683
19684    #[simd_test(enable = "avx512bw,avx512vl")]
19685    const fn test_mm256_maskz_unpackhi_epi16() {
19686        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19687        let b = _mm256_set_epi16(
19688            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19689        );
19690        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
19691        assert_eq_m256i(r, _mm256_setzero_si256());
19692        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
19693        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19694        assert_eq_m256i(r, e);
19695    }
19696
19697    #[simd_test(enable = "avx512bw,avx512vl")]
19698    const fn test_mm_mask_unpackhi_epi16() {
19699        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19700        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19701        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
19702        assert_eq_m128i(r, a);
19703        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
19704        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19705        assert_eq_m128i(r, e);
19706    }
19707
19708    #[simd_test(enable = "avx512bw,avx512vl")]
19709    const fn test_mm_maskz_unpackhi_epi16() {
19710        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19711        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19712        let r = _mm_maskz_unpackhi_epi16(0, a, b);
19713        assert_eq_m128i(r, _mm_setzero_si128());
19714        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
19715        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19716        assert_eq_m128i(r, e);
19717    }
19718
19719    #[simd_test(enable = "avx512bw")]
19720    const fn test_mm512_unpackhi_epi8() {
19721        #[rustfmt::skip]
19722        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19723                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19724                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19725                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19726        #[rustfmt::skip]
19727        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19728                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19729                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19730                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19731        let r = _mm512_unpackhi_epi8(a, b);
19732        #[rustfmt::skip]
19733        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19734                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19735                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19736                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19737        assert_eq_m512i(r, e);
19738    }
19739
19740    #[simd_test(enable = "avx512bw")]
19741    const fn test_mm512_mask_unpackhi_epi8() {
19742        #[rustfmt::skip]
19743        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19744                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19745                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19746                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19747        #[rustfmt::skip]
19748        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19749                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19750                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19751                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19752        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
19753        assert_eq_m512i(r, a);
19754        let r = _mm512_mask_unpackhi_epi8(
19755            a,
19756            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19757            a,
19758            b,
19759        );
19760        #[rustfmt::skip]
19761        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19762                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19763                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19764                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19765        assert_eq_m512i(r, e);
19766    }
19767
19768    #[simd_test(enable = "avx512bw")]
19769    const fn test_mm512_maskz_unpackhi_epi8() {
19770        #[rustfmt::skip]
19771        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19772                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19773                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19774                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19775        #[rustfmt::skip]
19776        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19777                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19778                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19779                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19780        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
19781        assert_eq_m512i(r, _mm512_setzero_si512());
19782        let r = _mm512_maskz_unpackhi_epi8(
19783            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19784            a,
19785            b,
19786        );
19787        #[rustfmt::skip]
19788        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19789                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19790                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19791                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19792        assert_eq_m512i(r, e);
19793    }
19794
19795    #[simd_test(enable = "avx512bw,avx512vl")]
19796    const fn test_mm256_mask_unpackhi_epi8() {
19797        #[rustfmt::skip]
19798        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19799                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19800        #[rustfmt::skip]
19801        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19802                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19803        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
19804        assert_eq_m256i(r, a);
19805        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19806        #[rustfmt::skip]
19807        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19808                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19809        assert_eq_m256i(r, e);
19810    }
19811
19812    #[simd_test(enable = "avx512bw,avx512vl")]
19813    const fn test_mm256_maskz_unpackhi_epi8() {
19814        #[rustfmt::skip]
19815        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19816                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19817        #[rustfmt::skip]
19818        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19819                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19820        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
19821        assert_eq_m256i(r, _mm256_setzero_si256());
19822        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
19823        #[rustfmt::skip]
19824        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19825                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19826        assert_eq_m256i(r, e);
19827    }
19828
19829    #[simd_test(enable = "avx512bw,avx512vl")]
19830    const fn test_mm_mask_unpackhi_epi8() {
19831        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19832        let b = _mm_set_epi8(
19833            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19834        );
19835        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
19836        assert_eq_m128i(r, a);
19837        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
19838        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19839        assert_eq_m128i(r, e);
19840    }
19841
19842    #[simd_test(enable = "avx512bw,avx512vl")]
19843    const fn test_mm_maskz_unpackhi_epi8() {
19844        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19845        let b = _mm_set_epi8(
19846            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19847        );
19848        let r = _mm_maskz_unpackhi_epi8(0, a, b);
19849        assert_eq_m128i(r, _mm_setzero_si128());
19850        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
19851        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19852        assert_eq_m128i(r, e);
19853    }
19854
19855    #[simd_test(enable = "avx512bw")]
19856    const fn test_mm512_unpacklo_epi16() {
19857        #[rustfmt::skip]
19858        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19859                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19860        #[rustfmt::skip]
19861        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19862                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19863        let r = _mm512_unpacklo_epi16(a, b);
19864        #[rustfmt::skip]
19865        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19866                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19867        assert_eq_m512i(r, e);
19868    }
19869
19870    #[simd_test(enable = "avx512bw")]
19871    const fn test_mm512_mask_unpacklo_epi16() {
19872        #[rustfmt::skip]
19873        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19874                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19875        #[rustfmt::skip]
19876        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19877                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19878        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
19879        assert_eq_m512i(r, a);
19880        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19881        #[rustfmt::skip]
19882        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19883                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19884        assert_eq_m512i(r, e);
19885    }
19886
19887    #[simd_test(enable = "avx512bw")]
19888    const fn test_mm512_maskz_unpacklo_epi16() {
19889        #[rustfmt::skip]
19890        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19891                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19892        #[rustfmt::skip]
19893        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19894                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19895        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
19896        assert_eq_m512i(r, _mm512_setzero_si512());
19897        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
19898        #[rustfmt::skip]
19899        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19900                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19901        assert_eq_m512i(r, e);
19902    }
19903
19904    #[simd_test(enable = "avx512bw,avx512vl")]
19905    const fn test_mm256_mask_unpacklo_epi16() {
19906        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19907        let b = _mm256_set_epi16(
19908            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19909        );
19910        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
19911        assert_eq_m256i(r, a);
19912        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
19913        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19914        assert_eq_m256i(r, e);
19915    }
19916
19917    #[simd_test(enable = "avx512bw,avx512vl")]
19918    const fn test_mm256_maskz_unpacklo_epi16() {
19919        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19920        let b = _mm256_set_epi16(
19921            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19922        );
19923        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
19924        assert_eq_m256i(r, _mm256_setzero_si256());
19925        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
19926        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19927        assert_eq_m256i(r, e);
19928    }
19929
19930    #[simd_test(enable = "avx512bw,avx512vl")]
19931    const fn test_mm_mask_unpacklo_epi16() {
19932        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19933        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19934        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
19935        assert_eq_m128i(r, a);
19936        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
19937        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19938        assert_eq_m128i(r, e);
19939    }
19940
19941    #[simd_test(enable = "avx512bw,avx512vl")]
19942    const fn test_mm_maskz_unpacklo_epi16() {
19943        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19944        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19945        let r = _mm_maskz_unpacklo_epi16(0, a, b);
19946        assert_eq_m128i(r, _mm_setzero_si128());
19947        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
19948        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19949        assert_eq_m128i(r, e);
19950    }
19951
19952    #[simd_test(enable = "avx512bw")]
19953    const fn test_mm512_unpacklo_epi8() {
19954        #[rustfmt::skip]
19955        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19956                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19957                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19958                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19959        #[rustfmt::skip]
19960        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19961                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19962                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19963                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19964        let r = _mm512_unpacklo_epi8(a, b);
19965        #[rustfmt::skip]
19966        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19967                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19968                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19969                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19970        assert_eq_m512i(r, e);
19971    }
19972
19973    #[simd_test(enable = "avx512bw")]
19974    const fn test_mm512_mask_unpacklo_epi8() {
19975        #[rustfmt::skip]
19976        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19977                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19978                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19979                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19980        #[rustfmt::skip]
19981        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19982                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19983                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19984                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19985        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
19986        assert_eq_m512i(r, a);
19987        let r = _mm512_mask_unpacklo_epi8(
19988            a,
19989            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19990            a,
19991            b,
19992        );
19993        #[rustfmt::skip]
19994        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19995                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19996                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19997                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19998        assert_eq_m512i(r, e);
19999    }
20000
20001    #[simd_test(enable = "avx512bw")]
20002    const fn test_mm512_maskz_unpacklo_epi8() {
20003        #[rustfmt::skip]
20004        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
20005                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
20006                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
20007                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
20008        #[rustfmt::skip]
20009        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
20010                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
20011                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
20012                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
20013        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
20014        assert_eq_m512i(r, _mm512_setzero_si512());
20015        let r = _mm512_maskz_unpacklo_epi8(
20016            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20017            a,
20018            b,
20019        );
20020        #[rustfmt::skip]
20021        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
20022                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
20023                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
20024                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
20025        assert_eq_m512i(r, e);
20026    }
20027
20028    #[simd_test(enable = "avx512bw,avx512vl")]
20029    const fn test_mm256_mask_unpacklo_epi8() {
20030        #[rustfmt::skip]
20031        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
20032                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
20033        #[rustfmt::skip]
20034        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
20035                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
20036        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
20037        assert_eq_m256i(r, a);
20038        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
20039        #[rustfmt::skip]
20040        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
20041                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
20042        assert_eq_m256i(r, e);
20043    }
20044
20045    #[simd_test(enable = "avx512bw,avx512vl")]
20046    const fn test_mm256_maskz_unpacklo_epi8() {
20047        #[rustfmt::skip]
20048        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
20049                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
20050        #[rustfmt::skip]
20051        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
20052                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
20053        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
20054        assert_eq_m256i(r, _mm256_setzero_si256());
20055        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
20056        #[rustfmt::skip]
20057        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
20058                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
20059        assert_eq_m256i(r, e);
20060    }
20061
20062    #[simd_test(enable = "avx512bw,avx512vl")]
20063    const fn test_mm_mask_unpacklo_epi8() {
20064        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
20065        let b = _mm_set_epi8(
20066            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
20067        );
20068        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
20069        assert_eq_m128i(r, a);
20070        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
20071        let e = _mm_set_epi8(
20072            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
20073        );
20074        assert_eq_m128i(r, e);
20075    }
20076
20077    #[simd_test(enable = "avx512bw,avx512vl")]
20078    const fn test_mm_maskz_unpacklo_epi8() {
20079        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
20080        let b = _mm_set_epi8(
20081            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
20082        );
20083        let r = _mm_maskz_unpacklo_epi8(0, a, b);
20084        assert_eq_m128i(r, _mm_setzero_si128());
20085        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
20086        let e = _mm_set_epi8(
20087            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
20088        );
20089        assert_eq_m128i(r, e);
20090    }
20091
20092    #[simd_test(enable = "avx512bw")]
20093    const fn test_mm512_mask_mov_epi16() {
20094        let src = _mm512_set1_epi16(1);
20095        let a = _mm512_set1_epi16(2);
20096        let r = _mm512_mask_mov_epi16(src, 0, a);
20097        assert_eq_m512i(r, src);
20098        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20099        assert_eq_m512i(r, a);
20100    }
20101
20102    #[simd_test(enable = "avx512bw")]
20103    const fn test_mm512_maskz_mov_epi16() {
20104        let a = _mm512_set1_epi16(2);
20105        let r = _mm512_maskz_mov_epi16(0, a);
20106        assert_eq_m512i(r, _mm512_setzero_si512());
20107        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
20108        assert_eq_m512i(r, a);
20109    }
20110
20111    #[simd_test(enable = "avx512bw,avx512vl")]
20112    const fn test_mm256_mask_mov_epi16() {
20113        let src = _mm256_set1_epi16(1);
20114        let a = _mm256_set1_epi16(2);
20115        let r = _mm256_mask_mov_epi16(src, 0, a);
20116        assert_eq_m256i(r, src);
20117        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
20118        assert_eq_m256i(r, a);
20119    }
20120
20121    #[simd_test(enable = "avx512bw,avx512vl")]
20122    const fn test_mm256_maskz_mov_epi16() {
20123        let a = _mm256_set1_epi16(2);
20124        let r = _mm256_maskz_mov_epi16(0, a);
20125        assert_eq_m256i(r, _mm256_setzero_si256());
20126        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
20127        assert_eq_m256i(r, a);
20128    }
20129
20130    #[simd_test(enable = "avx512bw,avx512vl")]
20131    const fn test_mm_mask_mov_epi16() {
20132        let src = _mm_set1_epi16(1);
20133        let a = _mm_set1_epi16(2);
20134        let r = _mm_mask_mov_epi16(src, 0, a);
20135        assert_eq_m128i(r, src);
20136        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
20137        assert_eq_m128i(r, a);
20138    }
20139
20140    #[simd_test(enable = "avx512bw,avx512vl")]
20141    const fn test_mm_maskz_mov_epi16() {
20142        let a = _mm_set1_epi16(2);
20143        let r = _mm_maskz_mov_epi16(0, a);
20144        assert_eq_m128i(r, _mm_setzero_si128());
20145        let r = _mm_maskz_mov_epi16(0b11111111, a);
20146        assert_eq_m128i(r, a);
20147    }
20148
20149    #[simd_test(enable = "avx512bw")]
20150    const fn test_mm512_mask_mov_epi8() {
20151        let src = _mm512_set1_epi8(1);
20152        let a = _mm512_set1_epi8(2);
20153        let r = _mm512_mask_mov_epi8(src, 0, a);
20154        assert_eq_m512i(r, src);
20155        let r = _mm512_mask_mov_epi8(
20156            src,
20157            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20158            a,
20159        );
20160        assert_eq_m512i(r, a);
20161    }
20162
20163    #[simd_test(enable = "avx512bw")]
20164    const fn test_mm512_maskz_mov_epi8() {
20165        let a = _mm512_set1_epi8(2);
20166        let r = _mm512_maskz_mov_epi8(0, a);
20167        assert_eq_m512i(r, _mm512_setzero_si512());
20168        let r = _mm512_maskz_mov_epi8(
20169            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20170            a,
20171        );
20172        assert_eq_m512i(r, a);
20173    }
20174
20175    #[simd_test(enable = "avx512bw,avx512vl")]
20176    const fn test_mm256_mask_mov_epi8() {
20177        let src = _mm256_set1_epi8(1);
20178        let a = _mm256_set1_epi8(2);
20179        let r = _mm256_mask_mov_epi8(src, 0, a);
20180        assert_eq_m256i(r, src);
20181        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20182        assert_eq_m256i(r, a);
20183    }
20184
20185    #[simd_test(enable = "avx512bw,avx512vl")]
20186    const fn test_mm256_maskz_mov_epi8() {
20187        let a = _mm256_set1_epi8(2);
20188        let r = _mm256_maskz_mov_epi8(0, a);
20189        assert_eq_m256i(r, _mm256_setzero_si256());
20190        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
20191        assert_eq_m256i(r, a);
20192    }
20193
20194    #[simd_test(enable = "avx512bw,avx512vl")]
20195    const fn test_mm_mask_mov_epi8() {
20196        let src = _mm_set1_epi8(1);
20197        let a = _mm_set1_epi8(2);
20198        let r = _mm_mask_mov_epi8(src, 0, a);
20199        assert_eq_m128i(r, src);
20200        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
20201        assert_eq_m128i(r, a);
20202    }
20203
20204    #[simd_test(enable = "avx512bw,avx512vl")]
20205    const fn test_mm_maskz_mov_epi8() {
20206        let a = _mm_set1_epi8(2);
20207        let r = _mm_maskz_mov_epi8(0, a);
20208        assert_eq_m128i(r, _mm_setzero_si128());
20209        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
20210        assert_eq_m128i(r, a);
20211    }
20212
20213    #[simd_test(enable = "avx512bw")]
20214    const fn test_mm512_mask_set1_epi16() {
20215        let src = _mm512_set1_epi16(2);
20216        let a: i16 = 11;
20217        let r = _mm512_mask_set1_epi16(src, 0, a);
20218        assert_eq_m512i(r, src);
20219        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20220        let e = _mm512_set1_epi16(11);
20221        assert_eq_m512i(r, e);
20222    }
20223
20224    #[simd_test(enable = "avx512bw")]
20225    const fn test_mm512_maskz_set1_epi16() {
20226        let a: i16 = 11;
20227        let r = _mm512_maskz_set1_epi16(0, a);
20228        assert_eq_m512i(r, _mm512_setzero_si512());
20229        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
20230        let e = _mm512_set1_epi16(11);
20231        assert_eq_m512i(r, e);
20232    }
20233
20234    #[simd_test(enable = "avx512bw,avx512vl")]
20235    const fn test_mm256_mask_set1_epi16() {
20236        let src = _mm256_set1_epi16(2);
20237        let a: i16 = 11;
20238        let r = _mm256_mask_set1_epi16(src, 0, a);
20239        assert_eq_m256i(r, src);
20240        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
20241        let e = _mm256_set1_epi16(11);
20242        assert_eq_m256i(r, e);
20243    }
20244
20245    #[simd_test(enable = "avx512bw,avx512vl")]
20246    const fn test_mm256_maskz_set1_epi16() {
20247        let a: i16 = 11;
20248        let r = _mm256_maskz_set1_epi16(0, a);
20249        assert_eq_m256i(r, _mm256_setzero_si256());
20250        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
20251        let e = _mm256_set1_epi16(11);
20252        assert_eq_m256i(r, e);
20253    }
20254
20255    #[simd_test(enable = "avx512bw,avx512vl")]
20256    const fn test_mm_mask_set1_epi16() {
20257        let src = _mm_set1_epi16(2);
20258        let a: i16 = 11;
20259        let r = _mm_mask_set1_epi16(src, 0, a);
20260        assert_eq_m128i(r, src);
20261        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
20262        let e = _mm_set1_epi16(11);
20263        assert_eq_m128i(r, e);
20264    }
20265
20266    #[simd_test(enable = "avx512bw,avx512vl")]
20267    const fn test_mm_maskz_set1_epi16() {
20268        let a: i16 = 11;
20269        let r = _mm_maskz_set1_epi16(0, a);
20270        assert_eq_m128i(r, _mm_setzero_si128());
20271        let r = _mm_maskz_set1_epi16(0b11111111, a);
20272        let e = _mm_set1_epi16(11);
20273        assert_eq_m128i(r, e);
20274    }
20275
20276    #[simd_test(enable = "avx512bw")]
20277    const fn test_mm512_mask_set1_epi8() {
20278        let src = _mm512_set1_epi8(2);
20279        let a: i8 = 11;
20280        let r = _mm512_mask_set1_epi8(src, 0, a);
20281        assert_eq_m512i(r, src);
20282        let r = _mm512_mask_set1_epi8(
20283            src,
20284            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20285            a,
20286        );
20287        let e = _mm512_set1_epi8(11);
20288        assert_eq_m512i(r, e);
20289    }
20290
20291    #[simd_test(enable = "avx512bw")]
20292    const fn test_mm512_maskz_set1_epi8() {
20293        let a: i8 = 11;
20294        let r = _mm512_maskz_set1_epi8(0, a);
20295        assert_eq_m512i(r, _mm512_setzero_si512());
20296        let r = _mm512_maskz_set1_epi8(
20297            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20298            a,
20299        );
20300        let e = _mm512_set1_epi8(11);
20301        assert_eq_m512i(r, e);
20302    }
20303
20304    #[simd_test(enable = "avx512bw,avx512vl")]
20305    const fn test_mm256_mask_set1_epi8() {
20306        let src = _mm256_set1_epi8(2);
20307        let a: i8 = 11;
20308        let r = _mm256_mask_set1_epi8(src, 0, a);
20309        assert_eq_m256i(r, src);
20310        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20311        let e = _mm256_set1_epi8(11);
20312        assert_eq_m256i(r, e);
20313    }
20314
20315    #[simd_test(enable = "avx512bw,avx512vl")]
20316    const fn test_mm256_maskz_set1_epi8() {
20317        let a: i8 = 11;
20318        let r = _mm256_maskz_set1_epi8(0, a);
20319        assert_eq_m256i(r, _mm256_setzero_si256());
20320        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
20321        let e = _mm256_set1_epi8(11);
20322        assert_eq_m256i(r, e);
20323    }
20324
20325    #[simd_test(enable = "avx512bw,avx512vl")]
20326    const fn test_mm_mask_set1_epi8() {
20327        let src = _mm_set1_epi8(2);
20328        let a: i8 = 11;
20329        let r = _mm_mask_set1_epi8(src, 0, a);
20330        assert_eq_m128i(r, src);
20331        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
20332        let e = _mm_set1_epi8(11);
20333        assert_eq_m128i(r, e);
20334    }
20335
20336    #[simd_test(enable = "avx512bw,avx512vl")]
20337    const fn test_mm_maskz_set1_epi8() {
20338        let a: i8 = 11;
20339        let r = _mm_maskz_set1_epi8(0, a);
20340        assert_eq_m128i(r, _mm_setzero_si128());
20341        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
20342        let e = _mm_set1_epi8(11);
20343        assert_eq_m128i(r, e);
20344    }
20345
20346    #[simd_test(enable = "avx512bw")]
20347    const fn test_mm512_shufflelo_epi16() {
20348        #[rustfmt::skip]
20349        let a = _mm512_set_epi16(
20350            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20351            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20352        );
20353        #[rustfmt::skip]
20354        let e = _mm512_set_epi16(
20355            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20356            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20357        );
20358        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
20359        assert_eq_m512i(r, e);
20360    }
20361
20362    #[simd_test(enable = "avx512bw")]
20363    const fn test_mm512_mask_shufflelo_epi16() {
20364        #[rustfmt::skip]
20365        let a = _mm512_set_epi16(
20366            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20367            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20368        );
20369        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20370        assert_eq_m512i(r, a);
20371        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
20372            a,
20373            0b11111111_11111111_11111111_11111111,
20374            a,
20375        );
20376        #[rustfmt::skip]
20377        let e = _mm512_set_epi16(
20378            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20379            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20380        );
20381        assert_eq_m512i(r, e);
20382    }
20383
20384    #[simd_test(enable = "avx512bw")]
20385    const fn test_mm512_maskz_shufflelo_epi16() {
20386        #[rustfmt::skip]
20387        let a = _mm512_set_epi16(
20388            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20389            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20390        );
20391        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20392        assert_eq_m512i(r, _mm512_setzero_si512());
20393        let r =
20394            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20395        #[rustfmt::skip]
20396        let e = _mm512_set_epi16(
20397            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20398            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20399        );
20400        assert_eq_m512i(r, e);
20401    }
20402
20403    #[simd_test(enable = "avx512bw,avx512vl")]
20404    const fn test_mm256_mask_shufflelo_epi16() {
20405        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20406        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20407        assert_eq_m256i(r, a);
20408        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20409        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20410        assert_eq_m256i(r, e);
20411    }
20412
20413    #[simd_test(enable = "avx512bw,avx512vl")]
20414    const fn test_mm256_maskz_shufflelo_epi16() {
20415        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20416        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20417        assert_eq_m256i(r, _mm256_setzero_si256());
20418        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20419        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20420        assert_eq_m256i(r, e);
20421    }
20422
20423    #[simd_test(enable = "avx512bw,avx512vl")]
20424    const fn test_mm_mask_shufflelo_epi16() {
20425        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20426        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20427        assert_eq_m128i(r, a);
20428        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20429        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20430        assert_eq_m128i(r, e);
20431    }
20432
20433    #[simd_test(enable = "avx512bw,avx512vl")]
20434    const fn test_mm_maskz_shufflelo_epi16() {
20435        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20436        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20437        assert_eq_m128i(r, _mm_setzero_si128());
20438        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
20439        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20440        assert_eq_m128i(r, e);
20441    }
20442
20443    #[simd_test(enable = "avx512bw")]
20444    const fn test_mm512_shufflehi_epi16() {
20445        #[rustfmt::skip]
20446        let a = _mm512_set_epi16(
20447            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20448            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20449        );
20450        #[rustfmt::skip]
20451        let e = _mm512_set_epi16(
20452            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20453            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20454        );
20455        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
20456        assert_eq_m512i(r, e);
20457    }
20458
20459    #[simd_test(enable = "avx512bw")]
20460    const fn test_mm512_mask_shufflehi_epi16() {
20461        #[rustfmt::skip]
20462        let a = _mm512_set_epi16(
20463            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20464            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20465        );
20466        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20467        assert_eq_m512i(r, a);
20468        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
20469            a,
20470            0b11111111_11111111_11111111_11111111,
20471            a,
20472        );
20473        #[rustfmt::skip]
20474        let e = _mm512_set_epi16(
20475            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20476            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20477        );
20478        assert_eq_m512i(r, e);
20479    }
20480
20481    #[simd_test(enable = "avx512bw")]
20482    const fn test_mm512_maskz_shufflehi_epi16() {
20483        #[rustfmt::skip]
20484        let a = _mm512_set_epi16(
20485            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20486            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20487        );
20488        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20489        assert_eq_m512i(r, _mm512_setzero_si512());
20490        let r =
20491            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20492        #[rustfmt::skip]
20493        let e = _mm512_set_epi16(
20494            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20495            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20496        );
20497        assert_eq_m512i(r, e);
20498    }
20499
20500    #[simd_test(enable = "avx512bw,avx512vl")]
20501    const fn test_mm256_mask_shufflehi_epi16() {
20502        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20503        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20504        assert_eq_m256i(r, a);
20505        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20506        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20507        assert_eq_m256i(r, e);
20508    }
20509
20510    #[simd_test(enable = "avx512bw,avx512vl")]
20511    const fn test_mm256_maskz_shufflehi_epi16() {
20512        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20513        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20514        assert_eq_m256i(r, _mm256_setzero_si256());
20515        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20516        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20517        assert_eq_m256i(r, e);
20518    }
20519
20520    #[simd_test(enable = "avx512bw,avx512vl")]
20521    const fn test_mm_mask_shufflehi_epi16() {
20522        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20523        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20524        assert_eq_m128i(r, a);
20525        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20526        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20527        assert_eq_m128i(r, e);
20528    }
20529
20530    #[simd_test(enable = "avx512bw,avx512vl")]
20531    const fn test_mm_maskz_shufflehi_epi16() {
20532        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20533        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20534        assert_eq_m128i(r, _mm_setzero_si128());
20535        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
20536        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20537        assert_eq_m128i(r, e);
20538    }
20539
20540    #[simd_test(enable = "avx512bw")]
20541    fn test_mm512_shuffle_epi8() {
20542        #[rustfmt::skip]
20543        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20544                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20545                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20546                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20547        let b = _mm512_set1_epi8(1);
20548        let r = _mm512_shuffle_epi8(a, b);
20549        #[rustfmt::skip]
20550        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20551                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20552                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20553                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20554        assert_eq_m512i(r, e);
20555    }
20556
20557    #[simd_test(enable = "avx512bw")]
20558    fn test_mm512_mask_shuffle_epi8() {
20559        #[rustfmt::skip]
20560        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20561                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20562                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20563                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20564        let b = _mm512_set1_epi8(1);
20565        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
20566        assert_eq_m512i(r, a);
20567        let r = _mm512_mask_shuffle_epi8(
20568            a,
20569            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20570            a,
20571            b,
20572        );
20573        #[rustfmt::skip]
20574        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20575                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20576                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20577                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20578        assert_eq_m512i(r, e);
20579    }
20580
20581    #[simd_test(enable = "avx512bw")]
20582    fn test_mm512_maskz_shuffle_epi8() {
20583        #[rustfmt::skip]
20584        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20585                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20586                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20587                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20588        let b = _mm512_set1_epi8(1);
20589        let r = _mm512_maskz_shuffle_epi8(0, a, b);
20590        assert_eq_m512i(r, _mm512_setzero_si512());
20591        let r = _mm512_maskz_shuffle_epi8(
20592            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20593            a,
20594            b,
20595        );
20596        #[rustfmt::skip]
20597        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20598                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20599                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20600                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20601        assert_eq_m512i(r, e);
20602    }
20603
20604    #[simd_test(enable = "avx512bw,avx512vl")]
20605    fn test_mm256_mask_shuffle_epi8() {
20606        #[rustfmt::skip]
20607        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20608                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20609        let b = _mm256_set1_epi8(1);
20610        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
20611        assert_eq_m256i(r, a);
20612        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
20613        #[rustfmt::skip]
20614        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20615                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20616        assert_eq_m256i(r, e);
20617    }
20618
20619    #[simd_test(enable = "avx512bw,avx512vl")]
20620    fn test_mm256_maskz_shuffle_epi8() {
20621        #[rustfmt::skip]
20622        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20623                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20624        let b = _mm256_set1_epi8(1);
20625        let r = _mm256_maskz_shuffle_epi8(0, a, b);
20626        assert_eq_m256i(r, _mm256_setzero_si256());
20627        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
20628        #[rustfmt::skip]
20629        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20630                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20631        assert_eq_m256i(r, e);
20632    }
20633
20634    #[simd_test(enable = "avx512bw,avx512vl")]
20635    fn test_mm_mask_shuffle_epi8() {
20636        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20637        let b = _mm_set1_epi8(1);
20638        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
20639        assert_eq_m128i(r, a);
20640        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
20641        let e = _mm_set_epi8(
20642            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20643        );
20644        assert_eq_m128i(r, e);
20645    }
20646
20647    #[simd_test(enable = "avx512bw,avx512vl")]
20648    fn test_mm_maskz_shuffle_epi8() {
20649        #[rustfmt::skip]
20650        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
20651        let b = _mm_set1_epi8(1);
20652        let r = _mm_maskz_shuffle_epi8(0, a, b);
20653        assert_eq_m128i(r, _mm_setzero_si128());
20654        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
20655        let e = _mm_set_epi8(
20656            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20657        );
20658        assert_eq_m128i(r, e);
20659    }
20660
20661    #[simd_test(enable = "avx512bw")]
20662    const fn test_mm512_test_epi16_mask() {
20663        let a = _mm512_set1_epi16(1 << 0);
20664        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20665        let r = _mm512_test_epi16_mask(a, b);
20666        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20667        assert_eq!(r, e);
20668    }
20669
20670    #[simd_test(enable = "avx512bw")]
20671    const fn test_mm512_mask_test_epi16_mask() {
20672        let a = _mm512_set1_epi16(1 << 0);
20673        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20674        let r = _mm512_mask_test_epi16_mask(0, a, b);
20675        assert_eq!(r, 0);
20676        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20677        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20678        assert_eq!(r, e);
20679    }
20680
20681    #[simd_test(enable = "avx512bw,avx512vl")]
20682    const fn test_mm256_test_epi16_mask() {
20683        let a = _mm256_set1_epi16(1 << 0);
20684        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20685        let r = _mm256_test_epi16_mask(a, b);
20686        let e: __mmask16 = 0b11111111_11111111;
20687        assert_eq!(r, e);
20688    }
20689
20690    #[simd_test(enable = "avx512bw,avx512vl")]
20691    const fn test_mm256_mask_test_epi16_mask() {
20692        let a = _mm256_set1_epi16(1 << 0);
20693        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20694        let r = _mm256_mask_test_epi16_mask(0, a, b);
20695        assert_eq!(r, 0);
20696        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
20697        let e: __mmask16 = 0b11111111_11111111;
20698        assert_eq!(r, e);
20699    }
20700
20701    #[simd_test(enable = "avx512bw,avx512vl")]
20702    const fn test_mm_test_epi16_mask() {
20703        let a = _mm_set1_epi16(1 << 0);
20704        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20705        let r = _mm_test_epi16_mask(a, b);
20706        let e: __mmask8 = 0b11111111;
20707        assert_eq!(r, e);
20708    }
20709
20710    #[simd_test(enable = "avx512bw,avx512vl")]
20711    const fn test_mm_mask_test_epi16_mask() {
20712        let a = _mm_set1_epi16(1 << 0);
20713        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20714        let r = _mm_mask_test_epi16_mask(0, a, b);
20715        assert_eq!(r, 0);
20716        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
20717        let e: __mmask8 = 0b11111111;
20718        assert_eq!(r, e);
20719    }
20720
20721    #[simd_test(enable = "avx512bw")]
20722    const fn test_mm512_test_epi8_mask() {
20723        let a = _mm512_set1_epi8(1 << 0);
20724        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20725        let r = _mm512_test_epi8_mask(a, b);
20726        let e: __mmask64 =
20727            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20728        assert_eq!(r, e);
20729    }
20730
20731    #[simd_test(enable = "avx512bw")]
20732    const fn test_mm512_mask_test_epi8_mask() {
20733        let a = _mm512_set1_epi8(1 << 0);
20734        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20735        let r = _mm512_mask_test_epi8_mask(0, a, b);
20736        assert_eq!(r, 0);
20737        let r = _mm512_mask_test_epi8_mask(
20738            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20739            a,
20740            b,
20741        );
20742        let e: __mmask64 =
20743            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20744        assert_eq!(r, e);
20745    }
20746
20747    #[simd_test(enable = "avx512bw,avx512vl")]
20748    const fn test_mm256_test_epi8_mask() {
20749        let a = _mm256_set1_epi8(1 << 0);
20750        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20751        let r = _mm256_test_epi8_mask(a, b);
20752        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20753        assert_eq!(r, e);
20754    }
20755
20756    #[simd_test(enable = "avx512bw,avx512vl")]
20757    const fn test_mm256_mask_test_epi8_mask() {
20758        let a = _mm256_set1_epi8(1 << 0);
20759        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20760        let r = _mm256_mask_test_epi8_mask(0, a, b);
20761        assert_eq!(r, 0);
20762        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20763        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20764        assert_eq!(r, e);
20765    }
20766
20767    #[simd_test(enable = "avx512bw,avx512vl")]
20768    const fn test_mm_test_epi8_mask() {
20769        let a = _mm_set1_epi8(1 << 0);
20770        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20771        let r = _mm_test_epi8_mask(a, b);
20772        let e: __mmask16 = 0b11111111_11111111;
20773        assert_eq!(r, e);
20774    }
20775
20776    #[simd_test(enable = "avx512bw,avx512vl")]
20777    const fn test_mm_mask_test_epi8_mask() {
20778        let a = _mm_set1_epi8(1 << 0);
20779        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20780        let r = _mm_mask_test_epi8_mask(0, a, b);
20781        assert_eq!(r, 0);
20782        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
20783        let e: __mmask16 = 0b11111111_11111111;
20784        assert_eq!(r, e);
20785    }
20786
20787    #[simd_test(enable = "avx512bw")]
20788    const fn test_mm512_testn_epi16_mask() {
20789        let a = _mm512_set1_epi16(1 << 0);
20790        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20791        let r = _mm512_testn_epi16_mask(a, b);
20792        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20793        assert_eq!(r, e);
20794    }
20795
20796    #[simd_test(enable = "avx512bw")]
20797    const fn test_mm512_mask_testn_epi16_mask() {
20798        let a = _mm512_set1_epi16(1 << 0);
20799        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20800        let r = _mm512_mask_testn_epi16_mask(0, a, b);
20801        assert_eq!(r, 0);
20802        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20803        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20804        assert_eq!(r, e);
20805    }
20806
20807    #[simd_test(enable = "avx512bw,avx512vl")]
20808    const fn test_mm256_testn_epi16_mask() {
20809        let a = _mm256_set1_epi16(1 << 0);
20810        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20811        let r = _mm256_testn_epi16_mask(a, b);
20812        let e: __mmask16 = 0b00000000_00000000;
20813        assert_eq!(r, e);
20814    }
20815
20816    #[simd_test(enable = "avx512bw,avx512vl")]
20817    const fn test_mm256_mask_testn_epi16_mask() {
20818        let a = _mm256_set1_epi16(1 << 0);
20819        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20820        let r = _mm256_mask_testn_epi16_mask(0, a, b);
20821        assert_eq!(r, 0);
20822        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
20823        let e: __mmask16 = 0b00000000_00000000;
20824        assert_eq!(r, e);
20825    }
20826
20827    #[simd_test(enable = "avx512bw,avx512vl")]
20828    const fn test_mm_testn_epi16_mask() {
20829        let a = _mm_set1_epi16(1 << 0);
20830        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20831        let r = _mm_testn_epi16_mask(a, b);
20832        let e: __mmask8 = 0b00000000;
20833        assert_eq!(r, e);
20834    }
20835
20836    #[simd_test(enable = "avx512bw,avx512vl")]
20837    const fn test_mm_mask_testn_epi16_mask() {
20838        let a = _mm_set1_epi16(1 << 0);
20839        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20840        let r = _mm_mask_testn_epi16_mask(0, a, b);
20841        assert_eq!(r, 0);
20842        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
20843        let e: __mmask8 = 0b00000000;
20844        assert_eq!(r, e);
20845    }
20846
20847    #[simd_test(enable = "avx512bw")]
20848    const fn test_mm512_testn_epi8_mask() {
20849        let a = _mm512_set1_epi8(1 << 0);
20850        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20851        let r = _mm512_testn_epi8_mask(a, b);
20852        let e: __mmask64 =
20853            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20854        assert_eq!(r, e);
20855    }
20856
20857    #[simd_test(enable = "avx512bw")]
20858    const fn test_mm512_mask_testn_epi8_mask() {
20859        let a = _mm512_set1_epi8(1 << 0);
20860        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20861        let r = _mm512_mask_testn_epi8_mask(0, a, b);
20862        assert_eq!(r, 0);
20863        let r = _mm512_mask_testn_epi8_mask(
20864            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20865            a,
20866            b,
20867        );
20868        let e: __mmask64 =
20869            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20870        assert_eq!(r, e);
20871    }
20872
20873    #[simd_test(enable = "avx512bw,avx512vl")]
20874    const fn test_mm256_testn_epi8_mask() {
20875        let a = _mm256_set1_epi8(1 << 0);
20876        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20877        let r = _mm256_testn_epi8_mask(a, b);
20878        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20879        assert_eq!(r, e);
20880    }
20881
20882    #[simd_test(enable = "avx512bw,avx512vl")]
20883    const fn test_mm256_mask_testn_epi8_mask() {
20884        let a = _mm256_set1_epi8(1 << 0);
20885        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20886        let r = _mm256_mask_testn_epi8_mask(0, a, b);
20887        assert_eq!(r, 0);
20888        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20889        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20890        assert_eq!(r, e);
20891    }
20892
20893    #[simd_test(enable = "avx512bw,avx512vl")]
20894    const fn test_mm_testn_epi8_mask() {
20895        let a = _mm_set1_epi8(1 << 0);
20896        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20897        let r = _mm_testn_epi8_mask(a, b);
20898        let e: __mmask16 = 0b00000000_00000000;
20899        assert_eq!(r, e);
20900    }
20901
20902    #[simd_test(enable = "avx512bw,avx512vl")]
20903    const fn test_mm_mask_testn_epi8_mask() {
20904        let a = _mm_set1_epi8(1 << 0);
20905        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20906        let r = _mm_mask_testn_epi8_mask(0, a, b);
20907        assert_eq!(r, 0);
20908        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
20909        let e: __mmask16 = 0b00000000_00000000;
20910        assert_eq!(r, e);
20911    }
20912
20913    #[simd_test(enable = "avx512bw")]
20914    const fn test_store_mask64() {
20915        let a: __mmask64 =
20916            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20917        let mut r = 0;
20918        unsafe {
20919            _store_mask64(&mut r, a);
20920        }
20921        assert_eq!(r, a);
20922    }
20923
20924    #[simd_test(enable = "avx512bw")]
20925    const fn test_store_mask32() {
20926        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
20927        let mut r = 0;
20928        unsafe {
20929            _store_mask32(&mut r, a);
20930        }
20931        assert_eq!(r, a);
20932    }
20933
20934    #[simd_test(enable = "avx512bw")]
20935    const fn test_load_mask64() {
20936        let p: __mmask64 =
20937            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20938        let r = unsafe { _load_mask64(&p) };
20939        let e: __mmask64 =
20940            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20941        assert_eq!(r, e);
20942    }
20943
20944    #[simd_test(enable = "avx512bw")]
20945    const fn test_load_mask32() {
20946        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
20947        let r = unsafe { _load_mask32(&p) };
20948        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
20949        assert_eq!(r, e);
20950    }
20951
20952    #[simd_test(enable = "avx512bw")]
20953    fn test_mm512_sad_epu8() {
20954        let a = _mm512_set1_epi8(2);
20955        let b = _mm512_set1_epi8(4);
20956        let r = _mm512_sad_epu8(a, b);
20957        let e = _mm512_set1_epi64(16);
20958        assert_eq_m512i(r, e);
20959    }
20960
20961    #[simd_test(enable = "avx512bw")]
20962    fn test_mm512_dbsad_epu8() {
20963        let a = _mm512_set1_epi8(2);
20964        let b = _mm512_set1_epi8(4);
20965        let r = _mm512_dbsad_epu8::<0>(a, b);
20966        let e = _mm512_set1_epi16(8);
20967        assert_eq_m512i(r, e);
20968    }
20969
20970    #[simd_test(enable = "avx512bw")]
20971    fn test_mm512_mask_dbsad_epu8() {
20972        let src = _mm512_set1_epi16(1);
20973        let a = _mm512_set1_epi8(2);
20974        let b = _mm512_set1_epi8(4);
20975        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
20976        assert_eq_m512i(r, src);
20977        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
20978        let e = _mm512_set1_epi16(8);
20979        assert_eq_m512i(r, e);
20980    }
20981
20982    #[simd_test(enable = "avx512bw")]
20983    fn test_mm512_maskz_dbsad_epu8() {
20984        let a = _mm512_set1_epi8(2);
20985        let b = _mm512_set1_epi8(4);
20986        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
20987        assert_eq_m512i(r, _mm512_setzero_si512());
20988        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
20989        let e = _mm512_set1_epi16(8);
20990        assert_eq_m512i(r, e);
20991    }
20992
20993    #[simd_test(enable = "avx512bw,avx512vl")]
20994    fn test_mm256_dbsad_epu8() {
20995        let a = _mm256_set1_epi8(2);
20996        let b = _mm256_set1_epi8(4);
20997        let r = _mm256_dbsad_epu8::<0>(a, b);
20998        let e = _mm256_set1_epi16(8);
20999        assert_eq_m256i(r, e);
21000    }
21001
21002    #[simd_test(enable = "avx512bw,avx512vl")]
21003    fn test_mm256_mask_dbsad_epu8() {
21004        let src = _mm256_set1_epi16(1);
21005        let a = _mm256_set1_epi8(2);
21006        let b = _mm256_set1_epi8(4);
21007        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
21008        assert_eq_m256i(r, src);
21009        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
21010        let e = _mm256_set1_epi16(8);
21011        assert_eq_m256i(r, e);
21012    }
21013
21014    #[simd_test(enable = "avx512bw,avx512vl")]
21015    fn test_mm256_maskz_dbsad_epu8() {
21016        let a = _mm256_set1_epi8(2);
21017        let b = _mm256_set1_epi8(4);
21018        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
21019        assert_eq_m256i(r, _mm256_setzero_si256());
21020        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
21021        let e = _mm256_set1_epi16(8);
21022        assert_eq_m256i(r, e);
21023    }
21024
21025    #[simd_test(enable = "avx512bw,avx512vl")]
21026    fn test_mm_dbsad_epu8() {
21027        let a = _mm_set1_epi8(2);
21028        let b = _mm_set1_epi8(4);
21029        let r = _mm_dbsad_epu8::<0>(a, b);
21030        let e = _mm_set1_epi16(8);
21031        assert_eq_m128i(r, e);
21032    }
21033
21034    #[simd_test(enable = "avx512bw,avx512vl")]
21035    fn test_mm_mask_dbsad_epu8() {
21036        let src = _mm_set1_epi16(1);
21037        let a = _mm_set1_epi8(2);
21038        let b = _mm_set1_epi8(4);
21039        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
21040        assert_eq_m128i(r, src);
21041        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
21042        let e = _mm_set1_epi16(8);
21043        assert_eq_m128i(r, e);
21044    }
21045
21046    #[simd_test(enable = "avx512bw,avx512vl")]
21047    fn test_mm_maskz_dbsad_epu8() {
21048        let a = _mm_set1_epi8(2);
21049        let b = _mm_set1_epi8(4);
21050        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
21051        assert_eq_m128i(r, _mm_setzero_si128());
21052        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
21053        let e = _mm_set1_epi16(8);
21054        assert_eq_m128i(r, e);
21055    }
21056
21057    #[simd_test(enable = "avx512bw")]
21058    const fn test_mm512_movepi16_mask() {
21059        let a = _mm512_set1_epi16(1 << 15);
21060        let r = _mm512_movepi16_mask(a);
21061        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21062        assert_eq!(r, e);
21063    }
21064
21065    #[simd_test(enable = "avx512bw,avx512vl")]
21066    const fn test_mm256_movepi16_mask() {
21067        let a = _mm256_set1_epi16(1 << 15);
21068        let r = _mm256_movepi16_mask(a);
21069        let e: __mmask16 = 0b11111111_11111111;
21070        assert_eq!(r, e);
21071    }
21072
21073    #[simd_test(enable = "avx512bw,avx512vl")]
21074    const fn test_mm_movepi16_mask() {
21075        let a = _mm_set1_epi16(1 << 15);
21076        let r = _mm_movepi16_mask(a);
21077        let e: __mmask8 = 0b11111111;
21078        assert_eq!(r, e);
21079    }
21080
21081    #[simd_test(enable = "avx512bw")]
21082    const fn test_mm512_movepi8_mask() {
21083        let a = _mm512_set1_epi8(1 << 7);
21084        let r = _mm512_movepi8_mask(a);
21085        let e: __mmask64 =
21086            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21087        assert_eq!(r, e);
21088    }
21089
21090    #[simd_test(enable = "avx512bw,avx512vl")]
21091    const fn test_mm256_movepi8_mask() {
21092        let a = _mm256_set1_epi8(1 << 7);
21093        let r = _mm256_movepi8_mask(a);
21094        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21095        assert_eq!(r, e);
21096    }
21097
21098    #[simd_test(enable = "avx512bw,avx512vl")]
21099    const fn test_mm_movepi8_mask() {
21100        let a = _mm_set1_epi8(1 << 7);
21101        let r = _mm_movepi8_mask(a);
21102        let e: __mmask16 = 0b11111111_11111111;
21103        assert_eq!(r, e);
21104    }
21105
21106    #[simd_test(enable = "avx512bw")]
21107    const fn test_mm512_movm_epi16() {
21108        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
21109        let r = _mm512_movm_epi16(a);
21110        let e = _mm512_set1_epi16(
21111            1 << 15
21112                | 1 << 14
21113                | 1 << 13
21114                | 1 << 12
21115                | 1 << 11
21116                | 1 << 10
21117                | 1 << 9
21118                | 1 << 8
21119                | 1 << 7
21120                | 1 << 6
21121                | 1 << 5
21122                | 1 << 4
21123                | 1 << 3
21124                | 1 << 2
21125                | 1 << 1
21126                | 1 << 0,
21127        );
21128        assert_eq_m512i(r, e);
21129    }
21130
21131    #[simd_test(enable = "avx512bw,avx512vl")]
21132    const fn test_mm256_movm_epi16() {
21133        let a: __mmask16 = 0b11111111_11111111;
21134        let r = _mm256_movm_epi16(a);
21135        let e = _mm256_set1_epi16(
21136            1 << 15
21137                | 1 << 14
21138                | 1 << 13
21139                | 1 << 12
21140                | 1 << 11
21141                | 1 << 10
21142                | 1 << 9
21143                | 1 << 8
21144                | 1 << 7
21145                | 1 << 6
21146                | 1 << 5
21147                | 1 << 4
21148                | 1 << 3
21149                | 1 << 2
21150                | 1 << 1
21151                | 1 << 0,
21152        );
21153        assert_eq_m256i(r, e);
21154    }
21155
21156    #[simd_test(enable = "avx512bw,avx512vl")]
21157    const fn test_mm_movm_epi16() {
21158        let a: __mmask8 = 0b11111111;
21159        let r = _mm_movm_epi16(a);
21160        let e = _mm_set1_epi16(
21161            1 << 15
21162                | 1 << 14
21163                | 1 << 13
21164                | 1 << 12
21165                | 1 << 11
21166                | 1 << 10
21167                | 1 << 9
21168                | 1 << 8
21169                | 1 << 7
21170                | 1 << 6
21171                | 1 << 5
21172                | 1 << 4
21173                | 1 << 3
21174                | 1 << 2
21175                | 1 << 1
21176                | 1 << 0,
21177        );
21178        assert_eq_m128i(r, e);
21179    }
21180
21181    #[simd_test(enable = "avx512bw")]
21182    const fn test_mm512_movm_epi8() {
21183        let a: __mmask64 =
21184            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21185        let r = _mm512_movm_epi8(a);
21186        let e =
21187            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21188        assert_eq_m512i(r, e);
21189    }
21190
21191    #[simd_test(enable = "avx512bw,avx512vl")]
21192    const fn test_mm256_movm_epi8() {
21193        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
21194        let r = _mm256_movm_epi8(a);
21195        let e =
21196            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21197        assert_eq_m256i(r, e);
21198    }
21199
21200    #[simd_test(enable = "avx512bw,avx512vl")]
21201    const fn test_mm_movm_epi8() {
21202        let a: __mmask16 = 0b11111111_11111111;
21203        let r = _mm_movm_epi8(a);
21204        let e =
21205            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21206        assert_eq_m128i(r, e);
21207    }
21208
21209    #[simd_test(enable = "avx512bw")]
21210    const fn test_cvtmask32_u32() {
21211        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
21212        let r = _cvtmask32_u32(a);
21213        let e: u32 = 0b11001100_00110011_01100110_10011001;
21214        assert_eq!(r, e);
21215    }
21216
21217    #[simd_test(enable = "avx512bw")]
21218    const fn test_cvtu32_mask32() {
21219        let a: u32 = 0b11001100_00110011_01100110_10011001;
21220        let r = _cvtu32_mask32(a);
21221        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
21222        assert_eq!(r, e);
21223    }
21224
21225    #[simd_test(enable = "avx512bw")]
21226    const fn test_kadd_mask32() {
21227        let a: __mmask32 = 11;
21228        let b: __mmask32 = 22;
21229        let r = _kadd_mask32(a, b);
21230        let e: __mmask32 = 33;
21231        assert_eq!(r, e);
21232    }
21233
21234    #[simd_test(enable = "avx512bw")]
21235    const fn test_kadd_mask64() {
21236        let a: __mmask64 = 11;
21237        let b: __mmask64 = 22;
21238        let r = _kadd_mask64(a, b);
21239        let e: __mmask64 = 33;
21240        assert_eq!(r, e);
21241    }
21242
21243    #[simd_test(enable = "avx512bw")]
21244    const fn test_kand_mask32() {
21245        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21246        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21247        let r = _kand_mask32(a, b);
21248        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
21249        assert_eq!(r, e);
21250    }
21251
21252    #[simd_test(enable = "avx512bw")]
21253    const fn test_kand_mask64() {
21254        let a: __mmask64 =
21255            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21256        let b: __mmask64 =
21257            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21258        let r = _kand_mask64(a, b);
21259        let e: __mmask64 =
21260            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21261        assert_eq!(r, e);
21262    }
21263
21264    #[simd_test(enable = "avx512bw")]
21265    const fn test_knot_mask32() {
21266        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21267        let r = _knot_mask32(a);
21268        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
21269        assert_eq!(r, e);
21270    }
21271
21272    #[simd_test(enable = "avx512bw")]
21273    const fn test_knot_mask64() {
21274        let a: __mmask64 =
21275            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21276        let r = _knot_mask64(a);
21277        let e: __mmask64 =
21278            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21279        assert_eq!(r, e);
21280    }
21281
21282    #[simd_test(enable = "avx512bw")]
21283    const fn test_kandn_mask32() {
21284        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21285        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21286        let r = _kandn_mask32(a, b);
21287        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21288        assert_eq!(r, e);
21289    }
21290
21291    #[simd_test(enable = "avx512bw")]
21292    const fn test_kandn_mask64() {
21293        let a: __mmask64 =
21294            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21295        let b: __mmask64 =
21296            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21297        let r = _kandn_mask64(a, b);
21298        let e: __mmask64 =
21299            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21300        assert_eq!(r, e);
21301    }
21302
21303    #[simd_test(enable = "avx512bw")]
21304    const fn test_kor_mask32() {
21305        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21306        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21307        let r = _kor_mask32(a, b);
21308        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21309        assert_eq!(r, e);
21310    }
21311
21312    #[simd_test(enable = "avx512bw")]
21313    const fn test_kor_mask64() {
21314        let a: __mmask64 =
21315            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21316        let b: __mmask64 =
21317            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21318        let r = _kor_mask64(a, b);
21319        let e: __mmask64 =
21320            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21321        assert_eq!(r, e);
21322    }
21323
21324    #[simd_test(enable = "avx512bw")]
21325    const fn test_kxor_mask32() {
21326        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21327        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21328        let r = _kxor_mask32(a, b);
21329        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21330        assert_eq!(r, e);
21331    }
21332
21333    #[simd_test(enable = "avx512bw")]
21334    const fn test_kxor_mask64() {
21335        let a: __mmask64 =
21336            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21337        let b: __mmask64 =
21338            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21339        let r = _kxor_mask64(a, b);
21340        let e: __mmask64 =
21341            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21342        assert_eq!(r, e);
21343    }
21344
21345    #[simd_test(enable = "avx512bw")]
21346    const fn test_kxnor_mask32() {
21347        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21348        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21349        let r = _kxnor_mask32(a, b);
21350        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21351        assert_eq!(r, e);
21352    }
21353
21354    #[simd_test(enable = "avx512bw")]
21355    const fn test_kxnor_mask64() {
21356        let a: __mmask64 =
21357            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21358        let b: __mmask64 =
21359            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21360        let r = _kxnor_mask64(a, b);
21361        let e: __mmask64 =
21362            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21363        assert_eq!(r, e);
21364    }
21365
21366    #[simd_test(enable = "avx512bw")]
21367    const fn test_kortest_mask32_u8() {
21368        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21369        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21370        let mut all_ones: u8 = 0;
21371        let r = unsafe { _kortest_mask32_u8(a, b, &mut all_ones) };
21372        assert_eq!(r, 0);
21373        assert_eq!(all_ones, 1);
21374    }
21375
21376    #[simd_test(enable = "avx512bw")]
21377    const fn test_kortest_mask64_u8() {
21378        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21379        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21380        let mut all_ones: u8 = 0;
21381        let r = unsafe { _kortest_mask64_u8(a, b, &mut all_ones) };
21382        assert_eq!(r, 0);
21383        assert_eq!(all_ones, 0);
21384    }
21385
21386    #[simd_test(enable = "avx512bw")]
21387    const fn test_kortestc_mask32_u8() {
21388        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21389        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21390        let r = _kortestc_mask32_u8(a, b);
21391        assert_eq!(r, 1);
21392    }
21393
21394    #[simd_test(enable = "avx512bw")]
21395    const fn test_kortestc_mask64_u8() {
21396        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21397        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21398        let r = _kortestc_mask64_u8(a, b);
21399        assert_eq!(r, 0);
21400    }
21401
21402    #[simd_test(enable = "avx512bw")]
21403    const fn test_kortestz_mask32_u8() {
21404        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21405        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21406        let r = _kortestz_mask32_u8(a, b);
21407        assert_eq!(r, 0);
21408    }
21409
21410    #[simd_test(enable = "avx512bw")]
21411    const fn test_kortestz_mask64_u8() {
21412        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21413        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21414        let r = _kortestz_mask64_u8(a, b);
21415        assert_eq!(r, 0);
21416    }
21417
21418    #[simd_test(enable = "avx512bw")]
21419    const fn test_kshiftli_mask32() {
21420        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21421        let r = _kshiftli_mask32::<3>(a);
21422        let e: __mmask32 = 0b0100101101001011_0100101101001000;
21423        assert_eq!(r, e);
21424
21425        let r = _kshiftli_mask32::<31>(a);
21426        let e: __mmask32 = 0b1000000000000000_0000000000000000;
21427        assert_eq!(r, e);
21428
21429        let r = _kshiftli_mask32::<32>(a);
21430        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21431        assert_eq!(r, e);
21432
21433        let r = _kshiftli_mask32::<33>(a);
21434        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21435        assert_eq!(r, e);
21436    }
21437
21438    #[simd_test(enable = "avx512bw")]
21439    const fn test_kshiftli_mask64() {
21440        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21441        let r = _kshiftli_mask64::<3>(a);
21442        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
21443        assert_eq!(r, e);
21444
21445        let r = _kshiftli_mask64::<63>(a);
21446        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
21447        assert_eq!(r, e);
21448
21449        let r = _kshiftli_mask64::<64>(a);
21450        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21451        assert_eq!(r, e);
21452
21453        let r = _kshiftli_mask64::<65>(a);
21454        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21455        assert_eq!(r, e);
21456    }
21457
21458    #[simd_test(enable = "avx512bw")]
21459    const fn test_kshiftri_mask32() {
21460        let a: __mmask32 = 0b1010100101101001_0110100101101001;
21461        let r = _kshiftri_mask32::<3>(a);
21462        let e: __mmask32 = 0b0001010100101101_0010110100101101;
21463        assert_eq!(r, e);
21464
21465        let r = _kshiftri_mask32::<31>(a);
21466        let e: __mmask32 = 0b0000000000000000_0000000000000001;
21467        assert_eq!(r, e);
21468
21469        let r = _kshiftri_mask32::<32>(a);
21470        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21471        assert_eq!(r, e);
21472
21473        let r = _kshiftri_mask32::<33>(a);
21474        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21475        assert_eq!(r, e);
21476    }
21477
21478    #[simd_test(enable = "avx512bw")]
21479    const fn test_kshiftri_mask64() {
21480        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
21481        let r = _kshiftri_mask64::<3>(a);
21482        let e: __mmask64 = 0b1010100101101001_0110100101101001;
21483        assert_eq!(r, e);
21484
21485        let r = _kshiftri_mask64::<34>(a);
21486        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
21487        assert_eq!(r, e);
21488
21489        let r = _kshiftri_mask64::<35>(a);
21490        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21491        assert_eq!(r, e);
21492
21493        let r = _kshiftri_mask64::<64>(a);
21494        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21495        assert_eq!(r, e);
21496
21497        let r = _kshiftri_mask64::<65>(a);
21498        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21499        assert_eq!(r, e);
21500    }
21501
21502    #[simd_test(enable = "avx512bw")]
21503    const fn test_ktest_mask32_u8() {
21504        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21505        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21506        let mut and_not: u8 = 0;
21507        let r = unsafe { _ktest_mask32_u8(a, b, &mut and_not) };
21508        assert_eq!(r, 1);
21509        assert_eq!(and_not, 0);
21510    }
21511
21512    #[simd_test(enable = "avx512bw")]
21513    const fn test_ktestc_mask32_u8() {
21514        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21515        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21516        let r = _ktestc_mask32_u8(a, b);
21517        assert_eq!(r, 0);
21518    }
21519
21520    #[simd_test(enable = "avx512bw")]
21521    const fn test_ktestz_mask32_u8() {
21522        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21523        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21524        let r = _ktestz_mask32_u8(a, b);
21525        assert_eq!(r, 1);
21526    }
21527
21528    #[simd_test(enable = "avx512bw")]
21529    const fn test_ktest_mask64_u8() {
21530        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21531        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21532        let mut and_not: u8 = 0;
21533        let r = unsafe { _ktest_mask64_u8(a, b, &mut and_not) };
21534        assert_eq!(r, 1);
21535        assert_eq!(and_not, 0);
21536    }
21537
21538    #[simd_test(enable = "avx512bw")]
21539    const fn test_ktestc_mask64_u8() {
21540        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21541        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21542        let r = _ktestc_mask64_u8(a, b);
21543        assert_eq!(r, 0);
21544    }
21545
21546    #[simd_test(enable = "avx512bw")]
21547    const fn test_ktestz_mask64_u8() {
21548        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21549        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21550        let r = _ktestz_mask64_u8(a, b);
21551        assert_eq!(r, 1);
21552    }
21553
21554    #[simd_test(enable = "avx512bw")]
21555    const fn test_mm512_kunpackw() {
21556        let a: u32 = 0x00110011;
21557        let b: u32 = 0x00001011;
21558        let r = _mm512_kunpackw(a, b);
21559        let e: u32 = 0x00111011;
21560        assert_eq!(r, e);
21561    }
21562
21563    #[simd_test(enable = "avx512bw")]
21564    const fn test_mm512_kunpackd() {
21565        let a: u64 = 0x11001100_00110011;
21566        let b: u64 = 0x00101110_00001011;
21567        let r = _mm512_kunpackd(a, b);
21568        let e: u64 = 0x00110011_00001011;
21569        assert_eq!(r, e);
21570    }
21571
21572    #[simd_test(enable = "avx512bw")]
21573    const fn test_mm512_cvtepi16_epi8() {
21574        let a = _mm512_set1_epi16(2);
21575        let r = _mm512_cvtepi16_epi8(a);
21576        let e = _mm256_set1_epi8(2);
21577        assert_eq_m256i(r, e);
21578    }
21579
21580    #[simd_test(enable = "avx512bw")]
21581    const fn test_mm512_mask_cvtepi16_epi8() {
21582        let src = _mm256_set1_epi8(1);
21583        let a = _mm512_set1_epi16(2);
21584        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
21585        assert_eq_m256i(r, src);
21586        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21587        let e = _mm256_set1_epi8(2);
21588        assert_eq_m256i(r, e);
21589    }
21590
21591    #[simd_test(enable = "avx512bw")]
21592    const fn test_mm512_maskz_cvtepi16_epi8() {
21593        let a = _mm512_set1_epi16(2);
21594        let r = _mm512_maskz_cvtepi16_epi8(0, a);
21595        assert_eq_m256i(r, _mm256_setzero_si256());
21596        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21597        let e = _mm256_set1_epi8(2);
21598        assert_eq_m256i(r, e);
21599    }
21600
21601    #[simd_test(enable = "avx512bw,avx512vl")]
21602    const fn test_mm256_cvtepi16_epi8() {
21603        let a = _mm256_set1_epi16(2);
21604        let r = _mm256_cvtepi16_epi8(a);
21605        let e = _mm_set1_epi8(2);
21606        assert_eq_m128i(r, e);
21607    }
21608
21609    #[simd_test(enable = "avx512bw,avx512vl")]
21610    const fn test_mm256_mask_cvtepi16_epi8() {
21611        let src = _mm_set1_epi8(1);
21612        let a = _mm256_set1_epi16(2);
21613        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
21614        assert_eq_m128i(r, src);
21615        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
21616        let e = _mm_set1_epi8(2);
21617        assert_eq_m128i(r, e);
21618    }
21619
21620    #[simd_test(enable = "avx512bw,avx512vl")]
21621    const fn test_mm256_maskz_cvtepi16_epi8() {
21622        let a = _mm256_set1_epi16(2);
21623        let r = _mm256_maskz_cvtepi16_epi8(0, a);
21624        assert_eq_m128i(r, _mm_setzero_si128());
21625        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
21626        let e = _mm_set1_epi8(2);
21627        assert_eq_m128i(r, e);
21628    }
21629
21630    #[simd_test(enable = "avx512bw,avx512vl")]
21631    const fn test_mm_cvtepi16_epi8() {
21632        let a = _mm_set1_epi16(2);
21633        let r = _mm_cvtepi16_epi8(a);
21634        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21635        assert_eq_m128i(r, e);
21636    }
21637
21638    #[simd_test(enable = "avx512bw,avx512vl")]
21639    const fn test_mm_mask_cvtepi16_epi8() {
21640        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21641        let a = _mm_set1_epi16(2);
21642        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
21643        assert_eq_m128i(r, src);
21644        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
21645        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21646        assert_eq_m128i(r, e);
21647    }
21648
21649    #[simd_test(enable = "avx512bw,avx512vl")]
21650    const fn test_mm_maskz_cvtepi16_epi8() {
21651        let a = _mm_set1_epi16(2);
21652        let r = _mm_maskz_cvtepi16_epi8(0, a);
21653        assert_eq_m128i(r, _mm_setzero_si128());
21654        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
21655        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21656        assert_eq_m128i(r, e);
21657    }
21658
21659    #[simd_test(enable = "avx512bw")]
21660    fn test_mm512_cvtsepi16_epi8() {
21661        let a = _mm512_set1_epi16(i16::MAX);
21662        let r = _mm512_cvtsepi16_epi8(a);
21663        let e = _mm256_set1_epi8(i8::MAX);
21664        assert_eq_m256i(r, e);
21665    }
21666
21667    #[simd_test(enable = "avx512bw")]
21668    fn test_mm512_mask_cvtsepi16_epi8() {
21669        let src = _mm256_set1_epi8(1);
21670        let a = _mm512_set1_epi16(i16::MAX);
21671        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
21672        assert_eq_m256i(r, src);
21673        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21674        let e = _mm256_set1_epi8(i8::MAX);
21675        assert_eq_m256i(r, e);
21676    }
21677
21678    #[simd_test(enable = "avx512bw,avx512vl")]
21679    fn test_mm256_cvtsepi16_epi8() {
21680        let a = _mm256_set1_epi16(i16::MAX);
21681        let r = _mm256_cvtsepi16_epi8(a);
21682        let e = _mm_set1_epi8(i8::MAX);
21683        assert_eq_m128i(r, e);
21684    }
21685
21686    #[simd_test(enable = "avx512bw,avx512vl")]
21687    fn test_mm256_mask_cvtsepi16_epi8() {
21688        let src = _mm_set1_epi8(1);
21689        let a = _mm256_set1_epi16(i16::MAX);
21690        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
21691        assert_eq_m128i(r, src);
21692        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
21693        let e = _mm_set1_epi8(i8::MAX);
21694        assert_eq_m128i(r, e);
21695    }
21696
21697    #[simd_test(enable = "avx512bw,avx512vl")]
21698    fn test_mm256_maskz_cvtsepi16_epi8() {
21699        let a = _mm256_set1_epi16(i16::MAX);
21700        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
21701        assert_eq_m128i(r, _mm_setzero_si128());
21702        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
21703        let e = _mm_set1_epi8(i8::MAX);
21704        assert_eq_m128i(r, e);
21705    }
21706
21707    #[simd_test(enable = "avx512bw,avx512vl")]
21708    fn test_mm_cvtsepi16_epi8() {
21709        let a = _mm_set1_epi16(i16::MAX);
21710        let r = _mm_cvtsepi16_epi8(a);
21711        #[rustfmt::skip]
21712        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21713        assert_eq_m128i(r, e);
21714    }
21715
21716    #[simd_test(enable = "avx512bw,avx512vl")]
21717    fn test_mm_mask_cvtsepi16_epi8() {
21718        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21719        let a = _mm_set1_epi16(i16::MAX);
21720        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
21721        assert_eq_m128i(r, src);
21722        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
21723        #[rustfmt::skip]
21724        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21725        assert_eq_m128i(r, e);
21726    }
21727
21728    #[simd_test(enable = "avx512bw,avx512vl")]
21729    fn test_mm_maskz_cvtsepi16_epi8() {
21730        let a = _mm_set1_epi16(i16::MAX);
21731        let r = _mm_maskz_cvtsepi16_epi8(0, a);
21732        assert_eq_m128i(r, _mm_setzero_si128());
21733        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
21734        #[rustfmt::skip]
21735        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21736        assert_eq_m128i(r, e);
21737    }
21738
21739    #[simd_test(enable = "avx512bw")]
21740    fn test_mm512_maskz_cvtsepi16_epi8() {
21741        let a = _mm512_set1_epi16(i16::MAX);
21742        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
21743        assert_eq_m256i(r, _mm256_setzero_si256());
21744        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21745        let e = _mm256_set1_epi8(i8::MAX);
21746        assert_eq_m256i(r, e);
21747    }
21748
21749    #[simd_test(enable = "avx512bw")]
21750    fn test_mm512_cvtusepi16_epi8() {
21751        let a = _mm512_set1_epi16(i16::MIN);
21752        let r = _mm512_cvtusepi16_epi8(a);
21753        let e = _mm256_set1_epi8(-1);
21754        assert_eq_m256i(r, e);
21755    }
21756
21757    #[simd_test(enable = "avx512bw")]
21758    fn test_mm512_mask_cvtusepi16_epi8() {
21759        let src = _mm256_set1_epi8(1);
21760        let a = _mm512_set1_epi16(i16::MIN);
21761        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
21762        assert_eq_m256i(r, src);
21763        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21764        let e = _mm256_set1_epi8(-1);
21765        assert_eq_m256i(r, e);
21766    }
21767
21768    #[simd_test(enable = "avx512bw")]
21769    fn test_mm512_maskz_cvtusepi16_epi8() {
21770        let a = _mm512_set1_epi16(i16::MIN);
21771        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
21772        assert_eq_m256i(r, _mm256_setzero_si256());
21773        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21774        let e = _mm256_set1_epi8(-1);
21775        assert_eq_m256i(r, e);
21776    }
21777
21778    #[simd_test(enable = "avx512bw,avx512vl")]
21779    fn test_mm256_cvtusepi16_epi8() {
21780        let a = _mm256_set1_epi16(i16::MIN);
21781        let r = _mm256_cvtusepi16_epi8(a);
21782        let e = _mm_set1_epi8(-1);
21783        assert_eq_m128i(r, e);
21784    }
21785
21786    #[simd_test(enable = "avx512bw,avx512vl")]
21787    fn test_mm256_mask_cvtusepi16_epi8() {
21788        let src = _mm_set1_epi8(1);
21789        let a = _mm256_set1_epi16(i16::MIN);
21790        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
21791        assert_eq_m128i(r, src);
21792        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
21793        let e = _mm_set1_epi8(-1);
21794        assert_eq_m128i(r, e);
21795    }
21796
21797    #[simd_test(enable = "avx512bw,avx512vl")]
21798    fn test_mm256_maskz_cvtusepi16_epi8() {
21799        let a = _mm256_set1_epi16(i16::MIN);
21800        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
21801        assert_eq_m128i(r, _mm_setzero_si128());
21802        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
21803        let e = _mm_set1_epi8(-1);
21804        assert_eq_m128i(r, e);
21805    }
21806
21807    #[simd_test(enable = "avx512bw,avx512vl")]
21808    fn test_mm_cvtusepi16_epi8() {
21809        let a = _mm_set1_epi16(i16::MIN);
21810        let r = _mm_cvtusepi16_epi8(a);
21811        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21812        assert_eq_m128i(r, e);
21813    }
21814
21815    #[simd_test(enable = "avx512bw,avx512vl")]
21816    fn test_mm_mask_cvtusepi16_epi8() {
21817        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21818        let a = _mm_set1_epi16(i16::MIN);
21819        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
21820        assert_eq_m128i(r, src);
21821        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
21822        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21823        assert_eq_m128i(r, e);
21824    }
21825
21826    #[simd_test(enable = "avx512bw,avx512vl")]
21827    fn test_mm_maskz_cvtusepi16_epi8() {
21828        let a = _mm_set1_epi16(i16::MIN);
21829        let r = _mm_maskz_cvtusepi16_epi8(0, a);
21830        assert_eq_m128i(r, _mm_setzero_si128());
21831        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
21832        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21833        assert_eq_m128i(r, e);
21834    }
21835
21836    #[simd_test(enable = "avx512bw")]
21837    const fn test_mm512_cvtepi8_epi16() {
21838        let a = _mm256_set1_epi8(2);
21839        let r = _mm512_cvtepi8_epi16(a);
21840        let e = _mm512_set1_epi16(2);
21841        assert_eq_m512i(r, e);
21842    }
21843
21844    #[simd_test(enable = "avx512bw")]
21845    const fn test_mm512_mask_cvtepi8_epi16() {
21846        let src = _mm512_set1_epi16(1);
21847        let a = _mm256_set1_epi8(2);
21848        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
21849        assert_eq_m512i(r, src);
21850        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21851        let e = _mm512_set1_epi16(2);
21852        assert_eq_m512i(r, e);
21853    }
21854
21855    #[simd_test(enable = "avx512bw")]
21856    const fn test_mm512_maskz_cvtepi8_epi16() {
21857        let a = _mm256_set1_epi8(2);
21858        let r = _mm512_maskz_cvtepi8_epi16(0, a);
21859        assert_eq_m512i(r, _mm512_setzero_si512());
21860        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
21861        let e = _mm512_set1_epi16(2);
21862        assert_eq_m512i(r, e);
21863    }
21864
21865    #[simd_test(enable = "avx512bw,avx512vl")]
21866    const fn test_mm256_mask_cvtepi8_epi16() {
21867        let src = _mm256_set1_epi16(1);
21868        let a = _mm_set1_epi8(2);
21869        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
21870        assert_eq_m256i(r, src);
21871        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
21872        let e = _mm256_set1_epi16(2);
21873        assert_eq_m256i(r, e);
21874    }
21875
21876    #[simd_test(enable = "avx512bw,avx512vl")]
21877    const fn test_mm256_maskz_cvtepi8_epi16() {
21878        let a = _mm_set1_epi8(2);
21879        let r = _mm256_maskz_cvtepi8_epi16(0, a);
21880        assert_eq_m256i(r, _mm256_setzero_si256());
21881        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
21882        let e = _mm256_set1_epi16(2);
21883        assert_eq_m256i(r, e);
21884    }
21885
21886    #[simd_test(enable = "avx512bw,avx512vl")]
21887    const fn test_mm_mask_cvtepi8_epi16() {
21888        let src = _mm_set1_epi16(1);
21889        let a = _mm_set1_epi8(2);
21890        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
21891        assert_eq_m128i(r, src);
21892        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
21893        let e = _mm_set1_epi16(2);
21894        assert_eq_m128i(r, e);
21895    }
21896
21897    #[simd_test(enable = "avx512bw,avx512vl")]
21898    const fn test_mm_maskz_cvtepi8_epi16() {
21899        let a = _mm_set1_epi8(2);
21900        let r = _mm_maskz_cvtepi8_epi16(0, a);
21901        assert_eq_m128i(r, _mm_setzero_si128());
21902        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
21903        let e = _mm_set1_epi16(2);
21904        assert_eq_m128i(r, e);
21905    }
21906
21907    #[simd_test(enable = "avx512bw")]
21908    const fn test_mm512_cvtepu8_epi16() {
21909        let a = _mm256_set1_epi8(2);
21910        let r = _mm512_cvtepu8_epi16(a);
21911        let e = _mm512_set1_epi16(2);
21912        assert_eq_m512i(r, e);
21913    }
21914
21915    #[simd_test(enable = "avx512bw")]
21916    const fn test_mm512_mask_cvtepu8_epi16() {
21917        let src = _mm512_set1_epi16(1);
21918        let a = _mm256_set1_epi8(2);
21919        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
21920        assert_eq_m512i(r, src);
21921        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21922        let e = _mm512_set1_epi16(2);
21923        assert_eq_m512i(r, e);
21924    }
21925
21926    #[simd_test(enable = "avx512bw")]
21927    const fn test_mm512_maskz_cvtepu8_epi16() {
21928        let a = _mm256_set1_epi8(2);
21929        let r = _mm512_maskz_cvtepu8_epi16(0, a);
21930        assert_eq_m512i(r, _mm512_setzero_si512());
21931        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
21932        let e = _mm512_set1_epi16(2);
21933        assert_eq_m512i(r, e);
21934    }
21935
21936    #[simd_test(enable = "avx512bw,avx512vl")]
21937    const fn test_mm256_mask_cvtepu8_epi16() {
21938        let src = _mm256_set1_epi16(1);
21939        let a = _mm_set1_epi8(2);
21940        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
21941        assert_eq_m256i(r, src);
21942        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
21943        let e = _mm256_set1_epi16(2);
21944        assert_eq_m256i(r, e);
21945    }
21946
21947    #[simd_test(enable = "avx512bw,avx512vl")]
21948    const fn test_mm256_maskz_cvtepu8_epi16() {
21949        let a = _mm_set1_epi8(2);
21950        let r = _mm256_maskz_cvtepu8_epi16(0, a);
21951        assert_eq_m256i(r, _mm256_setzero_si256());
21952        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
21953        let e = _mm256_set1_epi16(2);
21954        assert_eq_m256i(r, e);
21955    }
21956
21957    #[simd_test(enable = "avx512bw,avx512vl")]
21958    const fn test_mm_mask_cvtepu8_epi16() {
21959        let src = _mm_set1_epi16(1);
21960        let a = _mm_set1_epi8(2);
21961        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
21962        assert_eq_m128i(r, src);
21963        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
21964        let e = _mm_set1_epi16(2);
21965        assert_eq_m128i(r, e);
21966    }
21967
21968    #[simd_test(enable = "avx512bw,avx512vl")]
21969    const fn test_mm_maskz_cvtepu8_epi16() {
21970        let a = _mm_set1_epi8(2);
21971        let r = _mm_maskz_cvtepu8_epi16(0, a);
21972        assert_eq_m128i(r, _mm_setzero_si128());
21973        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
21974        let e = _mm_set1_epi16(2);
21975        assert_eq_m128i(r, e);
21976    }
21977
21978    #[simd_test(enable = "avx512bw")]
21979    const fn test_mm512_bslli_epi128() {
21980        #[rustfmt::skip]
21981        let a = _mm512_set_epi8(
21982            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21983            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21984            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21985            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21986        );
21987        let r = _mm512_bslli_epi128::<9>(a);
21988        #[rustfmt::skip]
21989        let e = _mm512_set_epi8(
21990            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21991            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21992            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21993            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21994        );
21995        assert_eq_m512i(r, e);
21996    }
21997
21998    #[simd_test(enable = "avx512bw")]
21999    const fn test_mm512_bsrli_epi128() {
22000        #[rustfmt::skip]
22001        let a = _mm512_set_epi8(
22002            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
22003            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
22004            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
22005            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
22006        );
22007        let r = _mm512_bsrli_epi128::<3>(a);
22008        #[rustfmt::skip]
22009        let e = _mm512_set_epi8(
22010            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
22011            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
22012            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
22013            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
22014        );
22015        assert_eq_m512i(r, e);
22016    }
22017
22018    #[simd_test(enable = "avx512bw")]
22019    const fn test_mm512_alignr_epi8() {
22020        #[rustfmt::skip]
22021        let a = _mm512_set_epi8(
22022            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22023            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22024            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22025            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22026        );
22027        let b = _mm512_set1_epi8(1);
22028        let r = _mm512_alignr_epi8::<14>(a, b);
22029        #[rustfmt::skip]
22030        let e = _mm512_set_epi8(
22031            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22032            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22033            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22034            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22035        );
22036        assert_eq_m512i(r, e);
22037    }
22038
22039    #[simd_test(enable = "avx512bw")]
22040    const fn test_mm512_mask_alignr_epi8() {
22041        #[rustfmt::skip]
22042        let a = _mm512_set_epi8(
22043            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22044            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22045            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22046            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22047        );
22048        let b = _mm512_set1_epi8(1);
22049        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
22050        assert_eq_m512i(r, a);
22051        let r = _mm512_mask_alignr_epi8::<14>(
22052            a,
22053            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
22054            a,
22055            b,
22056        );
22057        #[rustfmt::skip]
22058        let e = _mm512_set_epi8(
22059            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22060            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22061            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22062            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22063        );
22064        assert_eq_m512i(r, e);
22065    }
22066
22067    #[simd_test(enable = "avx512bw")]
22068    const fn test_mm512_maskz_alignr_epi8() {
22069        #[rustfmt::skip]
22070        let a = _mm512_set_epi8(
22071            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22072            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22073            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22074            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22075        );
22076        let b = _mm512_set1_epi8(1);
22077        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
22078        assert_eq_m512i(r, _mm512_setzero_si512());
22079        let r = _mm512_maskz_alignr_epi8::<14>(
22080            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
22081            a,
22082            b,
22083        );
22084        #[rustfmt::skip]
22085        let e = _mm512_set_epi8(
22086            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22087            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22088            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22089            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22090        );
22091        assert_eq_m512i(r, e);
22092    }
22093
22094    #[simd_test(enable = "avx512bw,avx512vl")]
22095    const fn test_mm256_mask_alignr_epi8() {
22096        #[rustfmt::skip]
22097        let a = _mm256_set_epi8(
22098            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22099            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22100        );
22101        let b = _mm256_set1_epi8(1);
22102        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
22103        assert_eq_m256i(r, a);
22104        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
22105        #[rustfmt::skip]
22106        let e = _mm256_set_epi8(
22107            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22108            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22109        );
22110        assert_eq_m256i(r, e);
22111    }
22112
22113    #[simd_test(enable = "avx512bw,avx512vl")]
22114    const fn test_mm256_maskz_alignr_epi8() {
22115        #[rustfmt::skip]
22116        let a = _mm256_set_epi8(
22117            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22118            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
22119        );
22120        let b = _mm256_set1_epi8(1);
22121        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
22122        assert_eq_m256i(r, _mm256_setzero_si256());
22123        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
22124        #[rustfmt::skip]
22125        let e = _mm256_set_epi8(
22126            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22127            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
22128        );
22129        assert_eq_m256i(r, e);
22130    }
22131
22132    #[simd_test(enable = "avx512bw,avx512vl")]
22133    const fn test_mm_mask_alignr_epi8() {
22134        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
22135        let b = _mm_set1_epi8(1);
22136        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
22137        assert_eq_m128i(r, a);
22138        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
22139        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
22140        assert_eq_m128i(r, e);
22141    }
22142
22143    #[simd_test(enable = "avx512bw,avx512vl")]
22144    const fn test_mm_maskz_alignr_epi8() {
22145        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
22146        let b = _mm_set1_epi8(1);
22147        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
22148        assert_eq_m128i(r, _mm_setzero_si128());
22149        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
22150        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
22151        assert_eq_m128i(r, e);
22152    }
22153
22154    #[simd_test(enable = "avx512bw")]
22155    fn test_mm512_mask_cvtsepi16_storeu_epi8() {
22156        let a = _mm512_set1_epi16(i16::MAX);
22157        let mut r = _mm256_undefined_si256();
22158        unsafe {
22159            _mm512_mask_cvtsepi16_storeu_epi8(
22160                &mut r as *mut _ as *mut i8,
22161                0b11111111_11111111_11111111_11111111,
22162                a,
22163            );
22164        }
22165        let e = _mm256_set1_epi8(i8::MAX);
22166        assert_eq_m256i(r, e);
22167    }
22168
22169    #[simd_test(enable = "avx512bw,avx512vl")]
22170    fn test_mm256_mask_cvtsepi16_storeu_epi8() {
22171        let a = _mm256_set1_epi16(i16::MAX);
22172        let mut r = _mm_undefined_si128();
22173        unsafe {
22174            _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22175        }
22176        let e = _mm_set1_epi8(i8::MAX);
22177        assert_eq_m128i(r, e);
22178    }
22179
22180    #[simd_test(enable = "avx512bw,avx512vl")]
22181    fn test_mm_mask_cvtsepi16_storeu_epi8() {
22182        let a = _mm_set1_epi16(i16::MAX);
22183        let mut r = _mm_set1_epi8(0);
22184        unsafe {
22185            _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22186        }
22187        #[rustfmt::skip]
22188        let e = _mm_set_epi8(
22189            0, 0, 0, 0, 0, 0, 0, 0,
22190            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
22191        );
22192        assert_eq_m128i(r, e);
22193    }
22194
22195    #[simd_test(enable = "avx512bw")]
22196    fn test_mm512_mask_cvtepi16_storeu_epi8() {
22197        let a = _mm512_set1_epi16(8);
22198        let mut r = _mm256_undefined_si256();
22199        unsafe {
22200            _mm512_mask_cvtepi16_storeu_epi8(
22201                &mut r as *mut _ as *mut i8,
22202                0b11111111_11111111_11111111_11111111,
22203                a,
22204            );
22205        }
22206        let e = _mm256_set1_epi8(8);
22207        assert_eq_m256i(r, e);
22208    }
22209
22210    #[simd_test(enable = "avx512bw,avx512vl")]
22211    fn test_mm256_mask_cvtepi16_storeu_epi8() {
22212        let a = _mm256_set1_epi16(8);
22213        let mut r = _mm_undefined_si128();
22214        unsafe {
22215            _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22216        }
22217        let e = _mm_set1_epi8(8);
22218        assert_eq_m128i(r, e);
22219    }
22220
22221    #[simd_test(enable = "avx512bw,avx512vl")]
22222    fn test_mm_mask_cvtepi16_storeu_epi8() {
22223        let a = _mm_set1_epi16(8);
22224        let mut r = _mm_set1_epi8(0);
22225        unsafe {
22226            _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22227        }
22228        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
22229        assert_eq_m128i(r, e);
22230    }
22231
22232    #[simd_test(enable = "avx512bw")]
22233    fn test_mm512_mask_cvtusepi16_storeu_epi8() {
22234        let a = _mm512_set1_epi16(i16::MAX);
22235        let mut r = _mm256_undefined_si256();
22236        unsafe {
22237            _mm512_mask_cvtusepi16_storeu_epi8(
22238                &mut r as *mut _ as *mut i8,
22239                0b11111111_11111111_11111111_11111111,
22240                a,
22241            );
22242        }
22243        let e = _mm256_set1_epi8(u8::MAX as i8);
22244        assert_eq_m256i(r, e);
22245    }
22246
22247    #[simd_test(enable = "avx512bw,avx512vl")]
22248    fn test_mm256_mask_cvtusepi16_storeu_epi8() {
22249        let a = _mm256_set1_epi16(i16::MAX);
22250        let mut r = _mm_undefined_si128();
22251        unsafe {
22252            _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22253        }
22254        let e = _mm_set1_epi8(u8::MAX as i8);
22255        assert_eq_m128i(r, e);
22256    }
22257
22258    #[simd_test(enable = "avx512bw,avx512vl")]
22259    fn test_mm_mask_cvtusepi16_storeu_epi8() {
22260        let a = _mm_set1_epi16(i16::MAX);
22261        let mut r = _mm_set1_epi8(0);
22262        unsafe {
22263            _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22264        }
22265        #[rustfmt::skip]
22266        let e = _mm_set_epi8(
22267            0, 0, 0, 0,
22268            0, 0, 0, 0,
22269            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22270            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22271        );
22272        assert_eq_m128i(r, e);
22273    }
22274}