core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17pub fn _mm512_abs_epi16(a: __m512i) -> __m512i {
18    unsafe {
19        let a = a.as_i16x32();
20        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
21        transmute(simd_select(cmp, a, simd_neg(a)))
22    }
23}
24
25/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26///
27/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
28#[inline]
29#[target_feature(enable = "avx512bw")]
30#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31#[cfg_attr(test, assert_instr(vpabsw))]
32pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
33    unsafe {
34        let abs = _mm512_abs_epi16(a).as_i16x32();
35        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
36    }
37}
38
39/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
42#[inline]
43#[target_feature(enable = "avx512bw")]
44#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
45#[cfg_attr(test, assert_instr(vpabsw))]
46pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
47    unsafe {
48        let abs = _mm512_abs_epi16(a).as_i16x32();
49        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
50    }
51}
52
53/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
56#[inline]
57#[target_feature(enable = "avx512bw,avx512vl")]
58#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
59#[cfg_attr(test, assert_instr(vpabsw))]
60pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
61    unsafe {
62        let abs = _mm256_abs_epi16(a).as_i16x16();
63        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
64    }
65}
66
67/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
68///
69/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
70#[inline]
71#[target_feature(enable = "avx512bw,avx512vl")]
72#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
73#[cfg_attr(test, assert_instr(vpabsw))]
74pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
75    unsafe {
76        let abs = _mm256_abs_epi16(a).as_i16x16();
77        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
78    }
79}
80
81/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
84#[inline]
85#[target_feature(enable = "avx512bw,avx512vl")]
86#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
87#[cfg_attr(test, assert_instr(vpabsw))]
88pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
89    unsafe {
90        let abs = _mm_abs_epi16(a).as_i16x8();
91        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
92    }
93}
94
95/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
96///
97/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
98#[inline]
99#[target_feature(enable = "avx512bw,avx512vl")]
100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
101#[cfg_attr(test, assert_instr(vpabsw))]
102pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
103    unsafe {
104        let abs = _mm_abs_epi16(a).as_i16x8();
105        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
106    }
107}
108
109/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
110///
111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
112#[inline]
113#[target_feature(enable = "avx512bw")]
114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
115#[cfg_attr(test, assert_instr(vpabsb))]
116pub fn _mm512_abs_epi8(a: __m512i) -> __m512i {
117    unsafe {
118        let a = a.as_i8x64();
119        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
120        transmute(simd_select(cmp, a, simd_neg(a)))
121    }
122}
123
124/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
127#[inline]
128#[target_feature(enable = "avx512bw")]
129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130#[cfg_attr(test, assert_instr(vpabsb))]
131pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
132    unsafe {
133        let abs = _mm512_abs_epi8(a).as_i8x64();
134        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
135    }
136}
137
138/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
141#[inline]
142#[target_feature(enable = "avx512bw")]
143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
144#[cfg_attr(test, assert_instr(vpabsb))]
145pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
146    unsafe {
147        let abs = _mm512_abs_epi8(a).as_i8x64();
148        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
149    }
150}
151
152/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
155#[inline]
156#[target_feature(enable = "avx512bw,avx512vl")]
157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
158#[cfg_attr(test, assert_instr(vpabsb))]
159pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
160    unsafe {
161        let abs = _mm256_abs_epi8(a).as_i8x32();
162        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
163    }
164}
165
166/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
167///
168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
169#[inline]
170#[target_feature(enable = "avx512bw,avx512vl")]
171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
172#[cfg_attr(test, assert_instr(vpabsb))]
173pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
174    unsafe {
175        let abs = _mm256_abs_epi8(a).as_i8x32();
176        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
177    }
178}
179
180/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
181///
182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
183#[inline]
184#[target_feature(enable = "avx512bw,avx512vl")]
185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
186#[cfg_attr(test, assert_instr(vpabsb))]
187pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
188    unsafe {
189        let abs = _mm_abs_epi8(a).as_i8x16();
190        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
191    }
192}
193
194/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
195///
196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
197#[inline]
198#[target_feature(enable = "avx512bw,avx512vl")]
199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
200#[cfg_attr(test, assert_instr(vpabsb))]
201pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
202    unsafe {
203        let abs = _mm_abs_epi8(a).as_i8x16();
204        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
205    }
206}
207
208/// Add packed 16-bit integers in a and b, and store the results in dst.
209///
210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
211#[inline]
212#[target_feature(enable = "avx512bw")]
213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
214#[cfg_attr(test, assert_instr(vpaddw))]
215pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
216    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
217}
218
219/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
220///
221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
222#[inline]
223#[target_feature(enable = "avx512bw")]
224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
225#[cfg_attr(test, assert_instr(vpaddw))]
226pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
227    unsafe {
228        let add = _mm512_add_epi16(a, b).as_i16x32();
229        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
230    }
231}
232
233/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
234///
235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
236#[inline]
237#[target_feature(enable = "avx512bw")]
238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
239#[cfg_attr(test, assert_instr(vpaddw))]
240pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
241    unsafe {
242        let add = _mm512_add_epi16(a, b).as_i16x32();
243        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
244    }
245}
246
247/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
248///
249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
250#[inline]
251#[target_feature(enable = "avx512bw,avx512vl")]
252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
253#[cfg_attr(test, assert_instr(vpaddw))]
254pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
255    unsafe {
256        let add = _mm256_add_epi16(a, b).as_i16x16();
257        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
258    }
259}
260
261/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
264#[inline]
265#[target_feature(enable = "avx512bw,avx512vl")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpaddw))]
268pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
269    unsafe {
270        let add = _mm256_add_epi16(a, b).as_i16x16();
271        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
272    }
273}
274
275/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
278#[inline]
279#[target_feature(enable = "avx512bw,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr(vpaddw))]
282pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
283    unsafe {
284        let add = _mm_add_epi16(a, b).as_i16x8();
285        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
286    }
287}
288
289/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
292#[inline]
293#[target_feature(enable = "avx512bw,avx512vl")]
294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
295#[cfg_attr(test, assert_instr(vpaddw))]
296pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
297    unsafe {
298        let add = _mm_add_epi16(a, b).as_i16x8();
299        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
300    }
301}
302
303/// Add packed 8-bit integers in a and b, and store the results in dst.
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
306#[inline]
307#[target_feature(enable = "avx512bw")]
308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
309#[cfg_attr(test, assert_instr(vpaddb))]
310pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
311    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
312}
313
314/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
315///
316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
317#[inline]
318#[target_feature(enable = "avx512bw")]
319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
320#[cfg_attr(test, assert_instr(vpaddb))]
321pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
322    unsafe {
323        let add = _mm512_add_epi8(a, b).as_i8x64();
324        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
325    }
326}
327
328/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
329///
330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
331#[inline]
332#[target_feature(enable = "avx512bw")]
333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
334#[cfg_attr(test, assert_instr(vpaddb))]
335pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
336    unsafe {
337        let add = _mm512_add_epi8(a, b).as_i8x64();
338        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
339    }
340}
341
342/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
343///
344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
345#[inline]
346#[target_feature(enable = "avx512bw,avx512vl")]
347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
348#[cfg_attr(test, assert_instr(vpaddb))]
349pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
350    unsafe {
351        let add = _mm256_add_epi8(a, b).as_i8x32();
352        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
353    }
354}
355
356/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
357///
358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
359#[inline]
360#[target_feature(enable = "avx512bw,avx512vl")]
361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
362#[cfg_attr(test, assert_instr(vpaddb))]
363pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
364    unsafe {
365        let add = _mm256_add_epi8(a, b).as_i8x32();
366        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
367    }
368}
369
370/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
371///
372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
373#[inline]
374#[target_feature(enable = "avx512bw,avx512vl")]
375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
376#[cfg_attr(test, assert_instr(vpaddb))]
377pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
378    unsafe {
379        let add = _mm_add_epi8(a, b).as_i8x16();
380        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
381    }
382}
383
384/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
387#[inline]
388#[target_feature(enable = "avx512bw,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vpaddb))]
391pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
392    unsafe {
393        let add = _mm_add_epi8(a, b).as_i8x16();
394        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
395    }
396}
397
398/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
399///
400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
401#[inline]
402#[target_feature(enable = "avx512bw")]
403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
404#[cfg_attr(test, assert_instr(vpaddusw))]
405pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
406    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
407}
408
409/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
410///
411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
412#[inline]
413#[target_feature(enable = "avx512bw")]
414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
415#[cfg_attr(test, assert_instr(vpaddusw))]
416pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
417    unsafe {
418        let add = _mm512_adds_epu16(a, b).as_u16x32();
419        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
420    }
421}
422
423/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
424///
425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
426#[inline]
427#[target_feature(enable = "avx512bw")]
428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429#[cfg_attr(test, assert_instr(vpaddusw))]
430pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
431    unsafe {
432        let add = _mm512_adds_epu16(a, b).as_u16x32();
433        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
434    }
435}
436
437/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
438///
439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
440#[inline]
441#[target_feature(enable = "avx512bw,avx512vl")]
442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
443#[cfg_attr(test, assert_instr(vpaddusw))]
444pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
445    unsafe {
446        let add = _mm256_adds_epu16(a, b).as_u16x16();
447        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
448    }
449}
450
451/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
454#[inline]
455#[target_feature(enable = "avx512bw,avx512vl")]
456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
457#[cfg_attr(test, assert_instr(vpaddusw))]
458pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
459    unsafe {
460        let add = _mm256_adds_epu16(a, b).as_u16x16();
461        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
462    }
463}
464
465/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
466///
467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
468#[inline]
469#[target_feature(enable = "avx512bw,avx512vl")]
470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
471#[cfg_attr(test, assert_instr(vpaddusw))]
472pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
473    unsafe {
474        let add = _mm_adds_epu16(a, b).as_u16x8();
475        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
476    }
477}
478
479/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
480///
481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
482#[inline]
483#[target_feature(enable = "avx512bw,avx512vl")]
484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
485#[cfg_attr(test, assert_instr(vpaddusw))]
486pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
487    unsafe {
488        let add = _mm_adds_epu16(a, b).as_u16x8();
489        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
490    }
491}
492
493/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
494///
495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
496#[inline]
497#[target_feature(enable = "avx512bw")]
498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
499#[cfg_attr(test, assert_instr(vpaddusb))]
500pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
501    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
502}
503
504/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
507#[inline]
508#[target_feature(enable = "avx512bw")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vpaddusb))]
511pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
512    unsafe {
513        let add = _mm512_adds_epu8(a, b).as_u8x64();
514        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
515    }
516}
517
518/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
519///
520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
521#[inline]
522#[target_feature(enable = "avx512bw")]
523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
524#[cfg_attr(test, assert_instr(vpaddusb))]
525pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
526    unsafe {
527        let add = _mm512_adds_epu8(a, b).as_u8x64();
528        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
529    }
530}
531
532/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
533///
534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
535#[inline]
536#[target_feature(enable = "avx512bw,avx512vl")]
537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
538#[cfg_attr(test, assert_instr(vpaddusb))]
539pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
540    unsafe {
541        let add = _mm256_adds_epu8(a, b).as_u8x32();
542        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
543    }
544}
545
546/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
547///
548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
549#[inline]
550#[target_feature(enable = "avx512bw,avx512vl")]
551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
552#[cfg_attr(test, assert_instr(vpaddusb))]
553pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
554    unsafe {
555        let add = _mm256_adds_epu8(a, b).as_u8x32();
556        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
557    }
558}
559
560/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
561///
562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
563#[inline]
564#[target_feature(enable = "avx512bw,avx512vl")]
565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
566#[cfg_attr(test, assert_instr(vpaddusb))]
567pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
568    unsafe {
569        let add = _mm_adds_epu8(a, b).as_u8x16();
570        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
571    }
572}
573
574/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
575///
576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
577#[inline]
578#[target_feature(enable = "avx512bw,avx512vl")]
579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
580#[cfg_attr(test, assert_instr(vpaddusb))]
581pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
582    unsafe {
583        let add = _mm_adds_epu8(a, b).as_u8x16();
584        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
585    }
586}
587
588/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
589///
590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
591#[inline]
592#[target_feature(enable = "avx512bw")]
593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
594#[cfg_attr(test, assert_instr(vpaddsw))]
595pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
596    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
597}
598
599/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
600///
601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
602#[inline]
603#[target_feature(enable = "avx512bw")]
604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
605#[cfg_attr(test, assert_instr(vpaddsw))]
606pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
607    unsafe {
608        let add = _mm512_adds_epi16(a, b).as_i16x32();
609        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
610    }
611}
612
613/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
614///
615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
616#[inline]
617#[target_feature(enable = "avx512bw")]
618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
619#[cfg_attr(test, assert_instr(vpaddsw))]
620pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
621    unsafe {
622        let add = _mm512_adds_epi16(a, b).as_i16x32();
623        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
624    }
625}
626
627/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
628///
629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
630#[inline]
631#[target_feature(enable = "avx512bw,avx512vl")]
632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
633#[cfg_attr(test, assert_instr(vpaddsw))]
634pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
635    unsafe {
636        let add = _mm256_adds_epi16(a, b).as_i16x16();
637        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
638    }
639}
640
641/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
642///
643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
644#[inline]
645#[target_feature(enable = "avx512bw,avx512vl")]
646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
647#[cfg_attr(test, assert_instr(vpaddsw))]
648pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
649    unsafe {
650        let add = _mm256_adds_epi16(a, b).as_i16x16();
651        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
652    }
653}
654
655/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
656///
657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
658#[inline]
659#[target_feature(enable = "avx512bw,avx512vl")]
660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
661#[cfg_attr(test, assert_instr(vpaddsw))]
662pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
663    unsafe {
664        let add = _mm_adds_epi16(a, b).as_i16x8();
665        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
666    }
667}
668
669/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
672#[inline]
673#[target_feature(enable = "avx512bw,avx512vl")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddsw))]
676pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
677    unsafe {
678        let add = _mm_adds_epi16(a, b).as_i16x8();
679        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
680    }
681}
682
683/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
684///
685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
686#[inline]
687#[target_feature(enable = "avx512bw")]
688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
689#[cfg_attr(test, assert_instr(vpaddsb))]
690pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
691    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
692}
693
694/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
695///
696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
697#[inline]
698#[target_feature(enable = "avx512bw")]
699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
700#[cfg_attr(test, assert_instr(vpaddsb))]
701pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
702    unsafe {
703        let add = _mm512_adds_epi8(a, b).as_i8x64();
704        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
705    }
706}
707
708/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
709///
710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
711#[inline]
712#[target_feature(enable = "avx512bw")]
713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
714#[cfg_attr(test, assert_instr(vpaddsb))]
715pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
716    unsafe {
717        let add = _mm512_adds_epi8(a, b).as_i8x64();
718        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
719    }
720}
721
722/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
723///
724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
725#[inline]
726#[target_feature(enable = "avx512bw,avx512vl")]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728#[cfg_attr(test, assert_instr(vpaddsb))]
729pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
730    unsafe {
731        let add = _mm256_adds_epi8(a, b).as_i8x32();
732        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
733    }
734}
735
736/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
737///
738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
739#[inline]
740#[target_feature(enable = "avx512bw,avx512vl")]
741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
742#[cfg_attr(test, assert_instr(vpaddsb))]
743pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
744    unsafe {
745        let add = _mm256_adds_epi8(a, b).as_i8x32();
746        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
747    }
748}
749
750/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
751///
752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
753#[inline]
754#[target_feature(enable = "avx512bw,avx512vl")]
755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
756#[cfg_attr(test, assert_instr(vpaddsb))]
757pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
758    unsafe {
759        let add = _mm_adds_epi8(a, b).as_i8x16();
760        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
761    }
762}
763
764/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
765///
766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
767#[inline]
768#[target_feature(enable = "avx512bw,avx512vl")]
769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
770#[cfg_attr(test, assert_instr(vpaddsb))]
771pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
772    unsafe {
773        let add = _mm_adds_epi8(a, b).as_i8x16();
774        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
775    }
776}
777
778/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
779///
780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
781#[inline]
782#[target_feature(enable = "avx512bw")]
783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
784#[cfg_attr(test, assert_instr(vpsubw))]
785pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
786    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
787}
788
789/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
792#[inline]
793#[target_feature(enable = "avx512bw")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpsubw))]
796pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
797    unsafe {
798        let sub = _mm512_sub_epi16(a, b).as_i16x32();
799        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
800    }
801}
802
803/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
804///
805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
806#[inline]
807#[target_feature(enable = "avx512bw")]
808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
809#[cfg_attr(test, assert_instr(vpsubw))]
810pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
811    unsafe {
812        let sub = _mm512_sub_epi16(a, b).as_i16x32();
813        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
814    }
815}
816
817/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
818///
819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
820#[inline]
821#[target_feature(enable = "avx512bw,avx512vl")]
822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
823#[cfg_attr(test, assert_instr(vpsubw))]
824pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
825    unsafe {
826        let sub = _mm256_sub_epi16(a, b).as_i16x16();
827        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
828    }
829}
830
831/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
832///
833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
834#[inline]
835#[target_feature(enable = "avx512bw,avx512vl")]
836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
837#[cfg_attr(test, assert_instr(vpsubw))]
838pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
839    unsafe {
840        let sub = _mm256_sub_epi16(a, b).as_i16x16();
841        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
842    }
843}
844
845/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
846///
847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
848#[inline]
849#[target_feature(enable = "avx512bw,avx512vl")]
850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
851#[cfg_attr(test, assert_instr(vpsubw))]
852pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
853    unsafe {
854        let sub = _mm_sub_epi16(a, b).as_i16x8();
855        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
856    }
857}
858
859/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
860///
861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
862#[inline]
863#[target_feature(enable = "avx512bw,avx512vl")]
864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
865#[cfg_attr(test, assert_instr(vpsubw))]
866pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867    unsafe {
868        let sub = _mm_sub_epi16(a, b).as_i16x8();
869        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
870    }
871}
872
873/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
876#[inline]
877#[target_feature(enable = "avx512bw")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vpsubb))]
880pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
881    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
882}
883
884/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
885///
886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
887#[inline]
888#[target_feature(enable = "avx512bw")]
889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
890#[cfg_attr(test, assert_instr(vpsubb))]
891pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
892    unsafe {
893        let sub = _mm512_sub_epi8(a, b).as_i8x64();
894        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
895    }
896}
897
898/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
899///
900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
901#[inline]
902#[target_feature(enable = "avx512bw")]
903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
904#[cfg_attr(test, assert_instr(vpsubb))]
905pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
906    unsafe {
907        let sub = _mm512_sub_epi8(a, b).as_i8x64();
908        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
909    }
910}
911
912/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
913///
914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
915#[inline]
916#[target_feature(enable = "avx512bw,avx512vl")]
917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
918#[cfg_attr(test, assert_instr(vpsubb))]
919pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
920    unsafe {
921        let sub = _mm256_sub_epi8(a, b).as_i8x32();
922        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
923    }
924}
925
926/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
927///
928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
929#[inline]
930#[target_feature(enable = "avx512bw,avx512vl")]
931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
932#[cfg_attr(test, assert_instr(vpsubb))]
933pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
934    unsafe {
935        let sub = _mm256_sub_epi8(a, b).as_i8x32();
936        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
937    }
938}
939
940/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
941///
942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
943#[inline]
944#[target_feature(enable = "avx512bw,avx512vl")]
945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
946#[cfg_attr(test, assert_instr(vpsubb))]
947pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
948    unsafe {
949        let sub = _mm_sub_epi8(a, b).as_i8x16();
950        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
951    }
952}
953
954/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
955///
956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
957#[inline]
958#[target_feature(enable = "avx512bw,avx512vl")]
959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
960#[cfg_attr(test, assert_instr(vpsubb))]
961pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
962    unsafe {
963        let sub = _mm_sub_epi8(a, b).as_i8x16();
964        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
965    }
966}
967
968/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
969///
970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
971#[inline]
972#[target_feature(enable = "avx512bw")]
973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
974#[cfg_attr(test, assert_instr(vpsubusw))]
975pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
976    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
977}
978
979/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
980///
981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
982#[inline]
983#[target_feature(enable = "avx512bw")]
984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
985#[cfg_attr(test, assert_instr(vpsubusw))]
986pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
987    unsafe {
988        let sub = _mm512_subs_epu16(a, b).as_u16x32();
989        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
990    }
991}
992
993/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
996#[inline]
997#[target_feature(enable = "avx512bw")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubusw))]
1000pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1001    unsafe {
1002        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1003        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1004    }
1005}
1006
1007/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1010#[inline]
1011#[target_feature(enable = "avx512bw,avx512vl")]
1012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1013#[cfg_attr(test, assert_instr(vpsubusw))]
1014pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1015    unsafe {
1016        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1017        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1018    }
1019}
1020
1021/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1022///
1023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1024#[inline]
1025#[target_feature(enable = "avx512bw,avx512vl")]
1026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1027#[cfg_attr(test, assert_instr(vpsubusw))]
1028pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1029    unsafe {
1030        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1031        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1032    }
1033}
1034
1035/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1036///
1037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1038#[inline]
1039#[target_feature(enable = "avx512bw,avx512vl")]
1040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1041#[cfg_attr(test, assert_instr(vpsubusw))]
1042pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1043    unsafe {
1044        let sub = _mm_subs_epu16(a, b).as_u16x8();
1045        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1046    }
1047}
1048
1049/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1050///
1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1052#[inline]
1053#[target_feature(enable = "avx512bw,avx512vl")]
1054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1055#[cfg_attr(test, assert_instr(vpsubusw))]
1056pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1057    unsafe {
1058        let sub = _mm_subs_epu16(a, b).as_u16x8();
1059        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1060    }
1061}
1062
1063/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1064///
1065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1066#[inline]
1067#[target_feature(enable = "avx512bw")]
1068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1069#[cfg_attr(test, assert_instr(vpsubusb))]
1070pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1071    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1072}
1073
1074/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1075///
1076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1077#[inline]
1078#[target_feature(enable = "avx512bw")]
1079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1080#[cfg_attr(test, assert_instr(vpsubusb))]
1081pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1082    unsafe {
1083        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1084        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1085    }
1086}
1087
1088/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1089///
1090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1091#[inline]
1092#[target_feature(enable = "avx512bw")]
1093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1094#[cfg_attr(test, assert_instr(vpsubusb))]
1095pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1096    unsafe {
1097        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1098        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1099    }
1100}
1101
1102/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1103///
1104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1105#[inline]
1106#[target_feature(enable = "avx512bw,avx512vl")]
1107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1108#[cfg_attr(test, assert_instr(vpsubusb))]
1109pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1110    unsafe {
1111        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1112        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1113    }
1114}
1115
1116/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1117///
1118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1119#[inline]
1120#[target_feature(enable = "avx512bw,avx512vl")]
1121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1122#[cfg_attr(test, assert_instr(vpsubusb))]
1123pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1124    unsafe {
1125        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1126        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1127    }
1128}
1129
1130/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1131///
1132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1133#[inline]
1134#[target_feature(enable = "avx512bw,avx512vl")]
1135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1136#[cfg_attr(test, assert_instr(vpsubusb))]
1137pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1138    unsafe {
1139        let sub = _mm_subs_epu8(a, b).as_u8x16();
1140        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1141    }
1142}
1143
1144/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1145///
1146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1147#[inline]
1148#[target_feature(enable = "avx512bw,avx512vl")]
1149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1150#[cfg_attr(test, assert_instr(vpsubusb))]
1151pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1152    unsafe {
1153        let sub = _mm_subs_epu8(a, b).as_u8x16();
1154        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1155    }
1156}
1157
1158/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1159///
1160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1161#[inline]
1162#[target_feature(enable = "avx512bw")]
1163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1164#[cfg_attr(test, assert_instr(vpsubsw))]
1165pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1166    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1167}
1168
1169/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1170///
1171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1172#[inline]
1173#[target_feature(enable = "avx512bw")]
1174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1175#[cfg_attr(test, assert_instr(vpsubsw))]
1176pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1177    unsafe {
1178        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1179        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1180    }
1181}
1182
1183/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184///
1185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1186#[inline]
1187#[target_feature(enable = "avx512bw")]
1188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1189#[cfg_attr(test, assert_instr(vpsubsw))]
1190pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1191    unsafe {
1192        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1193        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1194    }
1195}
1196
1197/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubsw))]
1204pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1205    unsafe {
1206        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1207        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1208    }
1209}
1210
1211/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1212///
1213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1214#[inline]
1215#[target_feature(enable = "avx512bw,avx512vl")]
1216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1217#[cfg_attr(test, assert_instr(vpsubsw))]
1218pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1219    unsafe {
1220        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1221        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1222    }
1223}
1224
1225/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1226///
1227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1228#[inline]
1229#[target_feature(enable = "avx512bw,avx512vl")]
1230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1231#[cfg_attr(test, assert_instr(vpsubsw))]
1232pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1233    unsafe {
1234        let sub = _mm_subs_epi16(a, b).as_i16x8();
1235        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1236    }
1237}
1238
1239/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1240///
1241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1242#[inline]
1243#[target_feature(enable = "avx512bw,avx512vl")]
1244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1245#[cfg_attr(test, assert_instr(vpsubsw))]
1246pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1247    unsafe {
1248        let sub = _mm_subs_epi16(a, b).as_i16x8();
1249        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1250    }
1251}
1252
1253/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1254///
1255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1256#[inline]
1257#[target_feature(enable = "avx512bw")]
1258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1259#[cfg_attr(test, assert_instr(vpsubsb))]
1260pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1261    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1262}
1263
1264/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1265///
1266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1267#[inline]
1268#[target_feature(enable = "avx512bw")]
1269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1270#[cfg_attr(test, assert_instr(vpsubsb))]
1271pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1272    unsafe {
1273        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1274        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1275    }
1276}
1277
1278/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1279///
1280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1281#[inline]
1282#[target_feature(enable = "avx512bw")]
1283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1284#[cfg_attr(test, assert_instr(vpsubsb))]
1285pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1286    unsafe {
1287        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1288        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1289    }
1290}
1291
1292/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1293///
1294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1295#[inline]
1296#[target_feature(enable = "avx512bw,avx512vl")]
1297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1298#[cfg_attr(test, assert_instr(vpsubsb))]
1299pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1300    unsafe {
1301        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1302        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1303    }
1304}
1305
1306/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1307///
1308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1309#[inline]
1310#[target_feature(enable = "avx512bw,avx512vl")]
1311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1312#[cfg_attr(test, assert_instr(vpsubsb))]
1313pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1314    unsafe {
1315        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1316        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1317    }
1318}
1319
1320/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1321///
1322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1323#[inline]
1324#[target_feature(enable = "avx512bw,avx512vl")]
1325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1326#[cfg_attr(test, assert_instr(vpsubsb))]
1327pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1328    unsafe {
1329        let sub = _mm_subs_epi8(a, b).as_i8x16();
1330        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1331    }
1332}
1333
1334/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1335///
1336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1337#[inline]
1338#[target_feature(enable = "avx512bw,avx512vl")]
1339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1340#[cfg_attr(test, assert_instr(vpsubsb))]
1341pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1342    unsafe {
1343        let sub = _mm_subs_epi8(a, b).as_i8x16();
1344        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1345    }
1346}
1347
1348/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1349///
1350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1351#[inline]
1352#[target_feature(enable = "avx512bw")]
1353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1354#[cfg_attr(test, assert_instr(vpmulhuw))]
1355pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1356    unsafe {
1357        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1358        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1359        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1360        transmute(simd_cast::<u32x32, u16x32>(r))
1361    }
1362}
1363
1364/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1365///
1366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1367#[inline]
1368#[target_feature(enable = "avx512bw")]
1369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1370#[cfg_attr(test, assert_instr(vpmulhuw))]
1371pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1372    unsafe {
1373        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1374        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1375    }
1376}
1377
1378/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1379///
1380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1381#[inline]
1382#[target_feature(enable = "avx512bw")]
1383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1384#[cfg_attr(test, assert_instr(vpmulhuw))]
1385pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1386    unsafe {
1387        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1388        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1389    }
1390}
1391
1392/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1393///
1394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1395#[inline]
1396#[target_feature(enable = "avx512bw,avx512vl")]
1397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1398#[cfg_attr(test, assert_instr(vpmulhuw))]
1399pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1400    unsafe {
1401        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1402        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1403    }
1404}
1405
1406/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1407///
1408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1409#[inline]
1410#[target_feature(enable = "avx512bw,avx512vl")]
1411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1412#[cfg_attr(test, assert_instr(vpmulhuw))]
1413pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1414    unsafe {
1415        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1416        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1417    }
1418}
1419
1420/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1421///
1422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1423#[inline]
1424#[target_feature(enable = "avx512bw,avx512vl")]
1425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1426#[cfg_attr(test, assert_instr(vpmulhuw))]
1427pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1428    unsafe {
1429        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1430        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1431    }
1432}
1433
1434/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1435///
1436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1437#[inline]
1438#[target_feature(enable = "avx512bw,avx512vl")]
1439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1440#[cfg_attr(test, assert_instr(vpmulhuw))]
1441pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1442    unsafe {
1443        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1444        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1445    }
1446}
1447
1448/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1449///
1450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1451#[inline]
1452#[target_feature(enable = "avx512bw")]
1453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1454#[cfg_attr(test, assert_instr(vpmulhw))]
1455pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1456    unsafe {
1457        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1458        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1459        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1460        transmute(simd_cast::<i32x32, i16x32>(r))
1461    }
1462}
1463
1464/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1465///
1466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1467#[inline]
1468#[target_feature(enable = "avx512bw")]
1469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1470#[cfg_attr(test, assert_instr(vpmulhw))]
1471pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1472    unsafe {
1473        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1474        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1475    }
1476}
1477
1478/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1479///
1480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1481#[inline]
1482#[target_feature(enable = "avx512bw")]
1483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1484#[cfg_attr(test, assert_instr(vpmulhw))]
1485pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1486    unsafe {
1487        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1488        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1489    }
1490}
1491
1492/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1493///
1494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1495#[inline]
1496#[target_feature(enable = "avx512bw,avx512vl")]
1497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1498#[cfg_attr(test, assert_instr(vpmulhw))]
1499pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1500    unsafe {
1501        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1502        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1503    }
1504}
1505
1506/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1507///
1508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1509#[inline]
1510#[target_feature(enable = "avx512bw,avx512vl")]
1511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1512#[cfg_attr(test, assert_instr(vpmulhw))]
1513pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1514    unsafe {
1515        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1516        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1517    }
1518}
1519
1520/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1521///
1522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1523#[inline]
1524#[target_feature(enable = "avx512bw,avx512vl")]
1525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1526#[cfg_attr(test, assert_instr(vpmulhw))]
1527pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1528    unsafe {
1529        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1530        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1531    }
1532}
1533
1534/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1535///
1536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1537#[inline]
1538#[target_feature(enable = "avx512bw,avx512vl")]
1539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1540#[cfg_attr(test, assert_instr(vpmulhw))]
1541pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1544        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1545    }
1546}
1547
1548/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1551#[inline]
1552#[target_feature(enable = "avx512bw")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhrsw))]
1555pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1556    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1557}
1558
1559/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1560///
1561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1562#[inline]
1563#[target_feature(enable = "avx512bw")]
1564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1565#[cfg_attr(test, assert_instr(vpmulhrsw))]
1566pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1567    unsafe {
1568        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1569        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1570    }
1571}
1572
1573/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1574///
1575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1576#[inline]
1577#[target_feature(enable = "avx512bw")]
1578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1579#[cfg_attr(test, assert_instr(vpmulhrsw))]
1580pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1581    unsafe {
1582        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1583        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1584    }
1585}
1586
1587/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1588///
1589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1590#[inline]
1591#[target_feature(enable = "avx512bw,avx512vl")]
1592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1593#[cfg_attr(test, assert_instr(vpmulhrsw))]
1594pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1595    unsafe {
1596        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1597        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1598    }
1599}
1600
1601/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1602///
1603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1604#[inline]
1605#[target_feature(enable = "avx512bw,avx512vl")]
1606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1607#[cfg_attr(test, assert_instr(vpmulhrsw))]
1608pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1609    unsafe {
1610        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1611        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1612    }
1613}
1614
1615/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhrsw))]
1622pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1623    unsafe {
1624        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1625        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1626    }
1627}
1628
1629/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1630///
1631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1632#[inline]
1633#[target_feature(enable = "avx512bw,avx512vl")]
1634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1635#[cfg_attr(test, assert_instr(vpmulhrsw))]
1636pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1637    unsafe {
1638        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1639        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1640    }
1641}
1642
1643/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1644///
1645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1646#[inline]
1647#[target_feature(enable = "avx512bw")]
1648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1649#[cfg_attr(test, assert_instr(vpmullw))]
1650pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1651    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1652}
1653
1654/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1655///
1656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1657#[inline]
1658#[target_feature(enable = "avx512bw")]
1659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1660#[cfg_attr(test, assert_instr(vpmullw))]
1661pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1662    unsafe {
1663        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1664        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1665    }
1666}
1667
1668/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1671#[inline]
1672#[target_feature(enable = "avx512bw")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmullw))]
1675pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1676    unsafe {
1677        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1678        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1679    }
1680}
1681
1682/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1683///
1684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1685#[inline]
1686#[target_feature(enable = "avx512bw,avx512vl")]
1687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1688#[cfg_attr(test, assert_instr(vpmullw))]
1689pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1690    unsafe {
1691        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1692        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1693    }
1694}
1695
1696/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1697///
1698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1699#[inline]
1700#[target_feature(enable = "avx512bw,avx512vl")]
1701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1702#[cfg_attr(test, assert_instr(vpmullw))]
1703pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1704    unsafe {
1705        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1706        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1707    }
1708}
1709
1710/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1711///
1712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1713#[inline]
1714#[target_feature(enable = "avx512bw,avx512vl")]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[cfg_attr(test, assert_instr(vpmullw))]
1717pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1718    unsafe {
1719        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1720        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1721    }
1722}
1723
1724/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1725///
1726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1727#[inline]
1728#[target_feature(enable = "avx512bw,avx512vl")]
1729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1730#[cfg_attr(test, assert_instr(vpmullw))]
1731pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1732    unsafe {
1733        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1734        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1735    }
1736}
1737
1738/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1739///
1740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1741#[inline]
1742#[target_feature(enable = "avx512bw")]
1743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1744#[cfg_attr(test, assert_instr(vpmaxuw))]
1745pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1746    unsafe {
1747        let a = a.as_u16x32();
1748        let b = b.as_u16x32();
1749        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1750    }
1751}
1752
1753/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1754///
1755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1756#[inline]
1757#[target_feature(enable = "avx512bw")]
1758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1759#[cfg_attr(test, assert_instr(vpmaxuw))]
1760pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1761    unsafe {
1762        let max = _mm512_max_epu16(a, b).as_u16x32();
1763        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1764    }
1765}
1766
1767/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1768///
1769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1770#[inline]
1771#[target_feature(enable = "avx512bw")]
1772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1773#[cfg_attr(test, assert_instr(vpmaxuw))]
1774pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1775    unsafe {
1776        let max = _mm512_max_epu16(a, b).as_u16x32();
1777        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1778    }
1779}
1780
1781/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1782///
1783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1784#[inline]
1785#[target_feature(enable = "avx512bw,avx512vl")]
1786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1787#[cfg_attr(test, assert_instr(vpmaxuw))]
1788pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1789    unsafe {
1790        let max = _mm256_max_epu16(a, b).as_u16x16();
1791        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1792    }
1793}
1794
1795/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1796///
1797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1798#[inline]
1799#[target_feature(enable = "avx512bw,avx512vl")]
1800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1801#[cfg_attr(test, assert_instr(vpmaxuw))]
1802pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1803    unsafe {
1804        let max = _mm256_max_epu16(a, b).as_u16x16();
1805        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1806    }
1807}
1808
1809/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1812#[inline]
1813#[target_feature(enable = "avx512bw,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmaxuw))]
1816pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1817    unsafe {
1818        let max = _mm_max_epu16(a, b).as_u16x8();
1819        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1820    }
1821}
1822
1823/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1824///
1825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1826#[inline]
1827#[target_feature(enable = "avx512bw,avx512vl")]
1828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1829#[cfg_attr(test, assert_instr(vpmaxuw))]
1830pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1831    unsafe {
1832        let max = _mm_max_epu16(a, b).as_u16x8();
1833        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1834    }
1835}
1836
1837/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1840#[inline]
1841#[target_feature(enable = "avx512bw")]
1842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1843#[cfg_attr(test, assert_instr(vpmaxub))]
1844pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1845    unsafe {
1846        let a = a.as_u8x64();
1847        let b = b.as_u8x64();
1848        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1849    }
1850}
1851
1852/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1853///
1854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
1855#[inline]
1856#[target_feature(enable = "avx512bw")]
1857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1858#[cfg_attr(test, assert_instr(vpmaxub))]
1859pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1860    unsafe {
1861        let max = _mm512_max_epu8(a, b).as_u8x64();
1862        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1863    }
1864}
1865
1866/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
1869#[inline]
1870#[target_feature(enable = "avx512bw")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vpmaxub))]
1873pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1874    unsafe {
1875        let max = _mm512_max_epu8(a, b).as_u8x64();
1876        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
1877    }
1878}
1879
1880/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1881///
1882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
1883#[inline]
1884#[target_feature(enable = "avx512bw,avx512vl")]
1885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1886#[cfg_attr(test, assert_instr(vpmaxub))]
1887pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1888    unsafe {
1889        let max = _mm256_max_epu8(a, b).as_u8x32();
1890        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1891    }
1892}
1893
1894/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1895///
1896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
1897#[inline]
1898#[target_feature(enable = "avx512bw,avx512vl")]
1899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1900#[cfg_attr(test, assert_instr(vpmaxub))]
1901pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1902    unsafe {
1903        let max = _mm256_max_epu8(a, b).as_u8x32();
1904        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
1905    }
1906}
1907
1908/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1909///
1910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
1911#[inline]
1912#[target_feature(enable = "avx512bw,avx512vl")]
1913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1914#[cfg_attr(test, assert_instr(vpmaxub))]
1915pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1916    unsafe {
1917        let max = _mm_max_epu8(a, b).as_u8x16();
1918        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1919    }
1920}
1921
1922/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1923///
1924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
1925#[inline]
1926#[target_feature(enable = "avx512bw,avx512vl")]
1927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1928#[cfg_attr(test, assert_instr(vpmaxub))]
1929pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1930    unsafe {
1931        let max = _mm_max_epu8(a, b).as_u8x16();
1932        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
1933    }
1934}
1935
1936/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1937///
1938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
1939#[inline]
1940#[target_feature(enable = "avx512bw")]
1941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1942#[cfg_attr(test, assert_instr(vpmaxsw))]
1943pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1944    unsafe {
1945        let a = a.as_i16x32();
1946        let b = b.as_i16x32();
1947        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1948    }
1949}
1950
1951/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1952///
1953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
1954#[inline]
1955#[target_feature(enable = "avx512bw")]
1956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1957#[cfg_attr(test, assert_instr(vpmaxsw))]
1958pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1959    unsafe {
1960        let max = _mm512_max_epi16(a, b).as_i16x32();
1961        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1962    }
1963}
1964
1965/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1966///
1967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
1968#[inline]
1969#[target_feature(enable = "avx512bw")]
1970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1971#[cfg_attr(test, assert_instr(vpmaxsw))]
1972pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1973    unsafe {
1974        let max = _mm512_max_epi16(a, b).as_i16x32();
1975        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
1976    }
1977}
1978
1979/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1980///
1981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
1982#[inline]
1983#[target_feature(enable = "avx512bw,avx512vl")]
1984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1985#[cfg_attr(test, assert_instr(vpmaxsw))]
1986pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1987    unsafe {
1988        let max = _mm256_max_epi16(a, b).as_i16x16();
1989        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1990    }
1991}
1992
1993/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1994///
1995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
1996#[inline]
1997#[target_feature(enable = "avx512bw,avx512vl")]
1998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1999#[cfg_attr(test, assert_instr(vpmaxsw))]
2000pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2001    unsafe {
2002        let max = _mm256_max_epi16(a, b).as_i16x16();
2003        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2004    }
2005}
2006
2007/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2008///
2009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2010#[inline]
2011#[target_feature(enable = "avx512bw,avx512vl")]
2012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2013#[cfg_attr(test, assert_instr(vpmaxsw))]
2014pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2015    unsafe {
2016        let max = _mm_max_epi16(a, b).as_i16x8();
2017        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2018    }
2019}
2020
2021/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2022///
2023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2024#[inline]
2025#[target_feature(enable = "avx512bw,avx512vl")]
2026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2027#[cfg_attr(test, assert_instr(vpmaxsw))]
2028pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2029    unsafe {
2030        let max = _mm_max_epi16(a, b).as_i16x8();
2031        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2032    }
2033}
2034
2035/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2036///
2037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2038#[inline]
2039#[target_feature(enable = "avx512bw")]
2040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2041#[cfg_attr(test, assert_instr(vpmaxsb))]
2042pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2043    unsafe {
2044        let a = a.as_i8x64();
2045        let b = b.as_i8x64();
2046        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
2047    }
2048}
2049
2050/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2051///
2052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2053#[inline]
2054#[target_feature(enable = "avx512bw")]
2055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2056#[cfg_attr(test, assert_instr(vpmaxsb))]
2057pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2058    unsafe {
2059        let max = _mm512_max_epi8(a, b).as_i8x64();
2060        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2061    }
2062}
2063
2064/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2065///
2066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2067#[inline]
2068#[target_feature(enable = "avx512bw")]
2069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2070#[cfg_attr(test, assert_instr(vpmaxsb))]
2071pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2072    unsafe {
2073        let max = _mm512_max_epi8(a, b).as_i8x64();
2074        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2075    }
2076}
2077
2078/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2079///
2080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2081#[inline]
2082#[target_feature(enable = "avx512bw,avx512vl")]
2083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2084#[cfg_attr(test, assert_instr(vpmaxsb))]
2085pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2086    unsafe {
2087        let max = _mm256_max_epi8(a, b).as_i8x32();
2088        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2089    }
2090}
2091
2092/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2093///
2094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2095#[inline]
2096#[target_feature(enable = "avx512bw,avx512vl")]
2097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2098#[cfg_attr(test, assert_instr(vpmaxsb))]
2099pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2100    unsafe {
2101        let max = _mm256_max_epi8(a, b).as_i8x32();
2102        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2103    }
2104}
2105
2106/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2107///
2108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2109#[inline]
2110#[target_feature(enable = "avx512bw,avx512vl")]
2111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2112#[cfg_attr(test, assert_instr(vpmaxsb))]
2113pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2114    unsafe {
2115        let max = _mm_max_epi8(a, b).as_i8x16();
2116        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2117    }
2118}
2119
2120/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2121///
2122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2123#[inline]
2124#[target_feature(enable = "avx512bw,avx512vl")]
2125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2126#[cfg_attr(test, assert_instr(vpmaxsb))]
2127pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2128    unsafe {
2129        let max = _mm_max_epi8(a, b).as_i8x16();
2130        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2131    }
2132}
2133
2134/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2135///
2136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2137#[inline]
2138#[target_feature(enable = "avx512bw")]
2139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2140#[cfg_attr(test, assert_instr(vpminuw))]
2141pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2142    unsafe {
2143        let a = a.as_u16x32();
2144        let b = b.as_u16x32();
2145        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2146    }
2147}
2148
2149/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2150///
2151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2152#[inline]
2153#[target_feature(enable = "avx512bw")]
2154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2155#[cfg_attr(test, assert_instr(vpminuw))]
2156pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2157    unsafe {
2158        let min = _mm512_min_epu16(a, b).as_u16x32();
2159        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2160    }
2161}
2162
2163/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2166#[inline]
2167#[target_feature(enable = "avx512bw")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpminuw))]
2170pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2171    unsafe {
2172        let min = _mm512_min_epu16(a, b).as_u16x32();
2173        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2174    }
2175}
2176
2177/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2178///
2179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2180#[inline]
2181#[target_feature(enable = "avx512bw,avx512vl")]
2182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2183#[cfg_attr(test, assert_instr(vpminuw))]
2184pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2185    unsafe {
2186        let min = _mm256_min_epu16(a, b).as_u16x16();
2187        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2188    }
2189}
2190
2191/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2192///
2193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2194#[inline]
2195#[target_feature(enable = "avx512bw,avx512vl")]
2196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2197#[cfg_attr(test, assert_instr(vpminuw))]
2198pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2199    unsafe {
2200        let min = _mm256_min_epu16(a, b).as_u16x16();
2201        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2202    }
2203}
2204
2205/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2208#[inline]
2209#[target_feature(enable = "avx512bw,avx512vl")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpminuw))]
2212pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2213    unsafe {
2214        let min = _mm_min_epu16(a, b).as_u16x8();
2215        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2216    }
2217}
2218
2219/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2220///
2221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2222#[inline]
2223#[target_feature(enable = "avx512bw,avx512vl")]
2224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2225#[cfg_attr(test, assert_instr(vpminuw))]
2226pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2227    unsafe {
2228        let min = _mm_min_epu16(a, b).as_u16x8();
2229        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2230    }
2231}
2232
2233/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2234///
2235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2236#[inline]
2237#[target_feature(enable = "avx512bw")]
2238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2239#[cfg_attr(test, assert_instr(vpminub))]
2240pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2241    unsafe {
2242        let a = a.as_u8x64();
2243        let b = b.as_u8x64();
2244        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2245    }
2246}
2247
2248/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2249///
2250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2251#[inline]
2252#[target_feature(enable = "avx512bw")]
2253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2254#[cfg_attr(test, assert_instr(vpminub))]
2255pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2256    unsafe {
2257        let min = _mm512_min_epu8(a, b).as_u8x64();
2258        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2259    }
2260}
2261
2262/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2265#[inline]
2266#[target_feature(enable = "avx512bw")]
2267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2268#[cfg_attr(test, assert_instr(vpminub))]
2269pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2270    unsafe {
2271        let min = _mm512_min_epu8(a, b).as_u8x64();
2272        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2273    }
2274}
2275
2276/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2277///
2278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2279#[inline]
2280#[target_feature(enable = "avx512bw,avx512vl")]
2281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2282#[cfg_attr(test, assert_instr(vpminub))]
2283pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2284    unsafe {
2285        let min = _mm256_min_epu8(a, b).as_u8x32();
2286        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2287    }
2288}
2289
2290/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2291///
2292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2293#[inline]
2294#[target_feature(enable = "avx512bw,avx512vl")]
2295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2296#[cfg_attr(test, assert_instr(vpminub))]
2297pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2298    unsafe {
2299        let min = _mm256_min_epu8(a, b).as_u8x32();
2300        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2301    }
2302}
2303
2304/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2307#[inline]
2308#[target_feature(enable = "avx512bw,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpminub))]
2311pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2312    unsafe {
2313        let min = _mm_min_epu8(a, b).as_u8x16();
2314        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2315    }
2316}
2317
2318/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2319///
2320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2321#[inline]
2322#[target_feature(enable = "avx512bw,avx512vl")]
2323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2324#[cfg_attr(test, assert_instr(vpminub))]
2325pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2326    unsafe {
2327        let min = _mm_min_epu8(a, b).as_u8x16();
2328        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2329    }
2330}
2331
2332/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2333///
2334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2335#[inline]
2336#[target_feature(enable = "avx512bw")]
2337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2338#[cfg_attr(test, assert_instr(vpminsw))]
2339pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2340    unsafe {
2341        let a = a.as_i16x32();
2342        let b = b.as_i16x32();
2343        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2344    }
2345}
2346
2347/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2348///
2349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2350#[inline]
2351#[target_feature(enable = "avx512bw")]
2352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2353#[cfg_attr(test, assert_instr(vpminsw))]
2354pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2355    unsafe {
2356        let min = _mm512_min_epi16(a, b).as_i16x32();
2357        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2358    }
2359}
2360
2361/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2364#[inline]
2365#[target_feature(enable = "avx512bw")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpminsw))]
2368pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2369    unsafe {
2370        let min = _mm512_min_epi16(a, b).as_i16x32();
2371        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2372    }
2373}
2374
2375/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2376///
2377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2378#[inline]
2379#[target_feature(enable = "avx512bw,avx512vl")]
2380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2381#[cfg_attr(test, assert_instr(vpminsw))]
2382pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2383    unsafe {
2384        let min = _mm256_min_epi16(a, b).as_i16x16();
2385        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2386    }
2387}
2388
2389/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2390///
2391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2392#[inline]
2393#[target_feature(enable = "avx512bw,avx512vl")]
2394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2395#[cfg_attr(test, assert_instr(vpminsw))]
2396pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2397    unsafe {
2398        let min = _mm256_min_epi16(a, b).as_i16x16();
2399        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2400    }
2401}
2402
2403/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2406#[inline]
2407#[target_feature(enable = "avx512bw,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpminsw))]
2410pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2411    unsafe {
2412        let min = _mm_min_epi16(a, b).as_i16x8();
2413        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2414    }
2415}
2416
2417/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2418///
2419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2420#[inline]
2421#[target_feature(enable = "avx512bw,avx512vl")]
2422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2423#[cfg_attr(test, assert_instr(vpminsw))]
2424pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2425    unsafe {
2426        let min = _mm_min_epi16(a, b).as_i16x8();
2427        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2428    }
2429}
2430
2431/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2432///
2433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2434#[inline]
2435#[target_feature(enable = "avx512bw")]
2436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2437#[cfg_attr(test, assert_instr(vpminsb))]
2438pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2439    unsafe {
2440        let a = a.as_i8x64();
2441        let b = b.as_i8x64();
2442        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2443    }
2444}
2445
2446/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2447///
2448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2449#[inline]
2450#[target_feature(enable = "avx512bw")]
2451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2452#[cfg_attr(test, assert_instr(vpminsb))]
2453pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2454    unsafe {
2455        let min = _mm512_min_epi8(a, b).as_i8x64();
2456        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2457    }
2458}
2459
2460/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2463#[inline]
2464#[target_feature(enable = "avx512bw")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpminsb))]
2467pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2468    unsafe {
2469        let min = _mm512_min_epi8(a, b).as_i8x64();
2470        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2471    }
2472}
2473
2474/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2475///
2476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2477#[inline]
2478#[target_feature(enable = "avx512bw,avx512vl")]
2479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2480#[cfg_attr(test, assert_instr(vpminsb))]
2481pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2482    unsafe {
2483        let min = _mm256_min_epi8(a, b).as_i8x32();
2484        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2485    }
2486}
2487
2488/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2489///
2490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2491#[inline]
2492#[target_feature(enable = "avx512bw,avx512vl")]
2493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2494#[cfg_attr(test, assert_instr(vpminsb))]
2495pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2496    unsafe {
2497        let min = _mm256_min_epi8(a, b).as_i8x32();
2498        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2499    }
2500}
2501
2502/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2503///
2504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2505#[inline]
2506#[target_feature(enable = "avx512bw,avx512vl")]
2507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2508#[cfg_attr(test, assert_instr(vpminsb))]
2509pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2510    unsafe {
2511        let min = _mm_min_epi8(a, b).as_i8x16();
2512        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2513    }
2514}
2515
2516/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2517///
2518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2519#[inline]
2520#[target_feature(enable = "avx512bw,avx512vl")]
2521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2522#[cfg_attr(test, assert_instr(vpminsb))]
2523pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2524    unsafe {
2525        let min = _mm_min_epi8(a, b).as_i8x16();
2526        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2527    }
2528}
2529
2530/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2531///
2532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2533#[inline]
2534#[target_feature(enable = "avx512bw")]
2535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2536#[cfg_attr(test, assert_instr(vpcmp))]
2537pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2538    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2539}
2540
2541/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2544#[inline]
2545#[target_feature(enable = "avx512bw")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpcmp))]
2548pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2549    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2550}
2551
2552/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2553///
2554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2555#[inline]
2556#[target_feature(enable = "avx512bw,avx512vl")]
2557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2558#[cfg_attr(test, assert_instr(vpcmp))]
2559pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2560    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2561}
2562
2563/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2564///
2565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2566#[inline]
2567#[target_feature(enable = "avx512bw,avx512vl")]
2568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2569#[cfg_attr(test, assert_instr(vpcmp))]
2570pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2571    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2572}
2573
2574/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2575///
2576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2577#[inline]
2578#[target_feature(enable = "avx512bw,avx512vl")]
2579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2580#[cfg_attr(test, assert_instr(vpcmp))]
2581pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2582    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2583}
2584
2585/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2586///
2587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2588#[inline]
2589#[target_feature(enable = "avx512bw,avx512vl")]
2590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2591#[cfg_attr(test, assert_instr(vpcmp))]
2592pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2593    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2594}
2595
2596/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2597///
2598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2599#[inline]
2600#[target_feature(enable = "avx512bw")]
2601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2602#[cfg_attr(test, assert_instr(vpcmp))]
2603pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2604    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2605}
2606
2607/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2608///
2609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2610#[inline]
2611#[target_feature(enable = "avx512bw")]
2612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2613#[cfg_attr(test, assert_instr(vpcmp))]
2614pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2615    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2616}
2617
2618/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2619///
2620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2621#[inline]
2622#[target_feature(enable = "avx512bw,avx512vl")]
2623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2624#[cfg_attr(test, assert_instr(vpcmp))]
2625pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2626    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2627}
2628
2629/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2632#[inline]
2633#[target_feature(enable = "avx512bw,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vpcmp))]
2636pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2637    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2638}
2639
2640/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2641///
2642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2643#[inline]
2644#[target_feature(enable = "avx512bw,avx512vl")]
2645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2646#[cfg_attr(test, assert_instr(vpcmp))]
2647pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2648    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2649}
2650
2651/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2652///
2653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2654#[inline]
2655#[target_feature(enable = "avx512bw,avx512vl")]
2656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2657#[cfg_attr(test, assert_instr(vpcmp))]
2658pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2659    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2660}
2661
2662/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2663///
2664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2665#[inline]
2666#[target_feature(enable = "avx512bw")]
2667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2668#[cfg_attr(test, assert_instr(vpcmp))]
2669pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2670    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2671}
2672
2673/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2676#[inline]
2677#[target_feature(enable = "avx512bw")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpcmp))]
2680pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2681    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2682}
2683
2684/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2685///
2686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2687#[inline]
2688#[target_feature(enable = "avx512bw,avx512vl")]
2689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2690#[cfg_attr(test, assert_instr(vpcmp))]
2691pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2692    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2693}
2694
2695/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2698#[inline]
2699#[target_feature(enable = "avx512bw,avx512vl")]
2700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2701#[cfg_attr(test, assert_instr(vpcmp))]
2702pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2703    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2704}
2705
2706/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2707///
2708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2709#[inline]
2710#[target_feature(enable = "avx512bw,avx512vl")]
2711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2712#[cfg_attr(test, assert_instr(vpcmp))]
2713pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2714    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2715}
2716
2717/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2718///
2719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2720#[inline]
2721#[target_feature(enable = "avx512bw,avx512vl")]
2722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2723#[cfg_attr(test, assert_instr(vpcmp))]
2724pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2725    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2726}
2727
2728/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2731#[inline]
2732#[target_feature(enable = "avx512bw")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpcmp))]
2735pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2736    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2737}
2738
2739/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2742#[inline]
2743#[target_feature(enable = "avx512bw")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2747    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2748}
2749
2750/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2751///
2752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2753#[inline]
2754#[target_feature(enable = "avx512bw,avx512vl")]
2755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2756#[cfg_attr(test, assert_instr(vpcmp))]
2757pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2758    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2759}
2760
2761/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2762///
2763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2764#[inline]
2765#[target_feature(enable = "avx512bw,avx512vl")]
2766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2767#[cfg_attr(test, assert_instr(vpcmp))]
2768pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2769    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2770}
2771
2772/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2773///
2774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2775#[inline]
2776#[target_feature(enable = "avx512bw,avx512vl")]
2777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2778#[cfg_attr(test, assert_instr(vpcmp))]
2779pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2780    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2781}
2782
2783/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2784///
2785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2786#[inline]
2787#[target_feature(enable = "avx512bw,avx512vl")]
2788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2789#[cfg_attr(test, assert_instr(vpcmp))]
2790pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2791    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2792}
2793
2794/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2795///
2796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2797#[inline]
2798#[target_feature(enable = "avx512bw")]
2799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2800#[cfg_attr(test, assert_instr(vpcmp))]
2801pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2802    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
2803}
2804
2805/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2806///
2807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
2808#[inline]
2809#[target_feature(enable = "avx512bw")]
2810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2811#[cfg_attr(test, assert_instr(vpcmp))]
2812pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2813    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2814}
2815
2816/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2817///
2818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
2819#[inline]
2820#[target_feature(enable = "avx512bw,avx512vl")]
2821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2822#[cfg_attr(test, assert_instr(vpcmp))]
2823pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2824    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
2825}
2826
2827/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
2830#[inline]
2831#[target_feature(enable = "avx512bw,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpcmp))]
2834pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2835    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2836}
2837
2838/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2839///
2840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
2841#[inline]
2842#[target_feature(enable = "avx512bw,avx512vl")]
2843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2844#[cfg_attr(test, assert_instr(vpcmp))]
2845pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2846    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
2847}
2848
2849/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2850///
2851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
2852#[inline]
2853#[target_feature(enable = "avx512bw,avx512vl")]
2854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2855#[cfg_attr(test, assert_instr(vpcmp))]
2856pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2857    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2858}
2859
2860/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2861///
2862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
2863#[inline]
2864#[target_feature(enable = "avx512bw")]
2865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2866#[cfg_attr(test, assert_instr(vpcmp))]
2867pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2868    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
2869}
2870
2871/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
2874#[inline]
2875#[target_feature(enable = "avx512bw")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2879    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2880}
2881
2882/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2883///
2884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
2885#[inline]
2886#[target_feature(enable = "avx512bw,avx512vl")]
2887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2888#[cfg_attr(test, assert_instr(vpcmp))]
2889pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2890    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
2891}
2892
2893/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2894///
2895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
2896#[inline]
2897#[target_feature(enable = "avx512bw,avx512vl")]
2898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2899#[cfg_attr(test, assert_instr(vpcmp))]
2900pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2901    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2902}
2903
2904/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2905///
2906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
2907#[inline]
2908#[target_feature(enable = "avx512bw,avx512vl")]
2909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2910#[cfg_attr(test, assert_instr(vpcmp))]
2911pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2912    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
2913}
2914
2915/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2916///
2917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
2918#[inline]
2919#[target_feature(enable = "avx512bw,avx512vl")]
2920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2921#[cfg_attr(test, assert_instr(vpcmp))]
2922pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2923    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2924}
2925
2926/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
2929#[inline]
2930#[target_feature(enable = "avx512bw")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpcmp))]
2933pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2934    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
2935}
2936
2937/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2938///
2939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
2940#[inline]
2941#[target_feature(enable = "avx512bw")]
2942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2943#[cfg_attr(test, assert_instr(vpcmp))]
2944pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2945    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2946}
2947
2948/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2949///
2950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
2951#[inline]
2952#[target_feature(enable = "avx512bw,avx512vl")]
2953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2954#[cfg_attr(test, assert_instr(vpcmp))]
2955pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2956    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
2957}
2958
2959/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2960///
2961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
2962#[inline]
2963#[target_feature(enable = "avx512bw,avx512vl")]
2964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2965#[cfg_attr(test, assert_instr(vpcmp))]
2966pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2967    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2968}
2969
2970/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2971///
2972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
2973#[inline]
2974#[target_feature(enable = "avx512bw,avx512vl")]
2975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2976#[cfg_attr(test, assert_instr(vpcmp))]
2977pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2978    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
2979}
2980
2981/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2982///
2983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
2984#[inline]
2985#[target_feature(enable = "avx512bw,avx512vl")]
2986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2987#[cfg_attr(test, assert_instr(vpcmp))]
2988pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2989    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2990}
2991
2992/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2993///
2994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
2995#[inline]
2996#[target_feature(enable = "avx512bw")]
2997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2998#[cfg_attr(test, assert_instr(vpcmp))]
2999pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3000    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3001}
3002
3003/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3011    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3012}
3013
3014/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3017#[inline]
3018#[target_feature(enable = "avx512bw,avx512vl")]
3019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3020#[cfg_attr(test, assert_instr(vpcmp))]
3021pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3022    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3023}
3024
3025/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3028#[inline]
3029#[target_feature(enable = "avx512bw,avx512vl")]
3030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3031#[cfg_attr(test, assert_instr(vpcmp))]
3032pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3033    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3034}
3035
3036/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3037///
3038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3039#[inline]
3040#[target_feature(enable = "avx512bw,avx512vl")]
3041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3042#[cfg_attr(test, assert_instr(vpcmp))]
3043pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3044    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3045}
3046
3047/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3048///
3049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3050#[inline]
3051#[target_feature(enable = "avx512bw,avx512vl")]
3052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3053#[cfg_attr(test, assert_instr(vpcmp))]
3054pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3055    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3056}
3057
3058/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3059///
3060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3061#[inline]
3062#[target_feature(enable = "avx512bw")]
3063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3064#[cfg_attr(test, assert_instr(vpcmp))]
3065pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3066    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3067}
3068
3069/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3070///
3071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3072#[inline]
3073#[target_feature(enable = "avx512bw")]
3074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3075#[cfg_attr(test, assert_instr(vpcmp))]
3076pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3077    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3078}
3079
3080/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3081///
3082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3083#[inline]
3084#[target_feature(enable = "avx512bw,avx512vl")]
3085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3086#[cfg_attr(test, assert_instr(vpcmp))]
3087pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3088    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3089}
3090
3091/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3092///
3093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3094#[inline]
3095#[target_feature(enable = "avx512bw,avx512vl")]
3096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3097#[cfg_attr(test, assert_instr(vpcmp))]
3098pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3099    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3100}
3101
3102/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3103///
3104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3105#[inline]
3106#[target_feature(enable = "avx512bw,avx512vl")]
3107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3108#[cfg_attr(test, assert_instr(vpcmp))]
3109pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3110    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3111}
3112
3113/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3114///
3115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3116#[inline]
3117#[target_feature(enable = "avx512bw,avx512vl")]
3118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3119#[cfg_attr(test, assert_instr(vpcmp))]
3120pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3121    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3122}
3123
3124/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3125///
3126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3127#[inline]
3128#[target_feature(enable = "avx512bw")]
3129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3130#[cfg_attr(test, assert_instr(vpcmp))]
3131pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3132    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3133}
3134
3135/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3143    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3144}
3145
3146/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3147///
3148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3149#[inline]
3150#[target_feature(enable = "avx512bw,avx512vl")]
3151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3152#[cfg_attr(test, assert_instr(vpcmp))]
3153pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3154    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3155}
3156
3157/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3158///
3159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3160#[inline]
3161#[target_feature(enable = "avx512bw,avx512vl")]
3162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3163#[cfg_attr(test, assert_instr(vpcmp))]
3164pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3165    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3166}
3167
3168/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3169///
3170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3171#[inline]
3172#[target_feature(enable = "avx512bw,avx512vl")]
3173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3174#[cfg_attr(test, assert_instr(vpcmp))]
3175pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3176    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3177}
3178
3179/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3180///
3181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3182#[inline]
3183#[target_feature(enable = "avx512bw,avx512vl")]
3184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3185#[cfg_attr(test, assert_instr(vpcmp))]
3186pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3187    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3188}
3189
3190/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3191///
3192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3193#[inline]
3194#[target_feature(enable = "avx512bw")]
3195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3196#[cfg_attr(test, assert_instr(vpcmp))]
3197pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3198    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3199}
3200
3201/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3202///
3203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3204#[inline]
3205#[target_feature(enable = "avx512bw")]
3206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3207#[cfg_attr(test, assert_instr(vpcmp))]
3208pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3209    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3210}
3211
3212/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3213///
3214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3215#[inline]
3216#[target_feature(enable = "avx512bw,avx512vl")]
3217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3218#[cfg_attr(test, assert_instr(vpcmp))]
3219pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3220    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3221}
3222
3223/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3224///
3225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3226#[inline]
3227#[target_feature(enable = "avx512bw,avx512vl")]
3228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3229#[cfg_attr(test, assert_instr(vpcmp))]
3230pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3231    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3232}
3233
3234/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3235///
3236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3237#[inline]
3238#[target_feature(enable = "avx512bw,avx512vl")]
3239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3240#[cfg_attr(test, assert_instr(vpcmp))]
3241pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3242    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3243}
3244
3245/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3246///
3247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3248#[inline]
3249#[target_feature(enable = "avx512bw,avx512vl")]
3250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3251#[cfg_attr(test, assert_instr(vpcmp))]
3252pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3253    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3254}
3255
3256/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3257///
3258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3259#[inline]
3260#[target_feature(enable = "avx512bw")]
3261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3262#[cfg_attr(test, assert_instr(vpcmp))]
3263pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3264    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3270#[inline]
3271#[target_feature(enable = "avx512bw")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3275    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3276}
3277
3278/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3279///
3280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3281#[inline]
3282#[target_feature(enable = "avx512bw,avx512vl")]
3283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3284#[cfg_attr(test, assert_instr(vpcmp))]
3285pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3286    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3287}
3288
3289/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3290///
3291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3292#[inline]
3293#[target_feature(enable = "avx512bw,avx512vl")]
3294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3295#[cfg_attr(test, assert_instr(vpcmp))]
3296pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3297    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3298}
3299
3300/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3301///
3302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3303#[inline]
3304#[target_feature(enable = "avx512bw,avx512vl")]
3305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3306#[cfg_attr(test, assert_instr(vpcmp))]
3307pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3308    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3309}
3310
3311/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3312///
3313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3314#[inline]
3315#[target_feature(enable = "avx512bw,avx512vl")]
3316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3317#[cfg_attr(test, assert_instr(vpcmp))]
3318pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3319    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3320}
3321
3322/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3323///
3324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3325#[inline]
3326#[target_feature(enable = "avx512bw")]
3327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3328#[cfg_attr(test, assert_instr(vpcmp))]
3329pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3330    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3331}
3332
3333/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3336#[inline]
3337#[target_feature(enable = "avx512bw")]
3338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3339#[cfg_attr(test, assert_instr(vpcmp))]
3340pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3341    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3342}
3343
3344/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3345///
3346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3347#[inline]
3348#[target_feature(enable = "avx512bw,avx512vl")]
3349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3350#[cfg_attr(test, assert_instr(vpcmp))]
3351pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3352    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3353}
3354
3355/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3356///
3357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3358#[inline]
3359#[target_feature(enable = "avx512bw,avx512vl")]
3360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3361#[cfg_attr(test, assert_instr(vpcmp))]
3362pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3363    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3364}
3365
3366/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3367///
3368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3369#[inline]
3370#[target_feature(enable = "avx512bw,avx512vl")]
3371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3372#[cfg_attr(test, assert_instr(vpcmp))]
3373pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3374    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3375}
3376
3377/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3378///
3379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3380#[inline]
3381#[target_feature(enable = "avx512bw,avx512vl")]
3382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3383#[cfg_attr(test, assert_instr(vpcmp))]
3384pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3385    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3386}
3387
3388/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3389///
3390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3391#[inline]
3392#[target_feature(enable = "avx512bw")]
3393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3394#[cfg_attr(test, assert_instr(vpcmp))]
3395pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3396    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3402#[inline]
3403#[target_feature(enable = "avx512bw")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3407    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3408}
3409
3410/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3413#[inline]
3414#[target_feature(enable = "avx512bw,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpcmp))]
3417pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3418    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3419}
3420
3421/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3422///
3423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3424#[inline]
3425#[target_feature(enable = "avx512bw,avx512vl")]
3426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3427#[cfg_attr(test, assert_instr(vpcmp))]
3428pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3429    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3430}
3431
3432/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3433///
3434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3435#[inline]
3436#[target_feature(enable = "avx512bw,avx512vl")]
3437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3438#[cfg_attr(test, assert_instr(vpcmp))]
3439pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3440    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3441}
3442
3443/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3444///
3445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3446#[inline]
3447#[target_feature(enable = "avx512bw,avx512vl")]
3448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3449#[cfg_attr(test, assert_instr(vpcmp))]
3450pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3451    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3452}
3453
3454/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3455///
3456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3457#[inline]
3458#[target_feature(enable = "avx512bw")]
3459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3460#[cfg_attr(test, assert_instr(vpcmp))]
3461pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3462    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3463}
3464
3465/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3466///
3467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3468#[inline]
3469#[target_feature(enable = "avx512bw")]
3470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3471#[cfg_attr(test, assert_instr(vpcmp))]
3472pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3473    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3474}
3475
3476/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3477///
3478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3479#[inline]
3480#[target_feature(enable = "avx512bw,avx512vl")]
3481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3482#[cfg_attr(test, assert_instr(vpcmp))]
3483pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3484    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3485}
3486
3487/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3488///
3489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3490#[inline]
3491#[target_feature(enable = "avx512bw,avx512vl")]
3492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3493#[cfg_attr(test, assert_instr(vpcmp))]
3494pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3495    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3496}
3497
3498/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3499///
3500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3501#[inline]
3502#[target_feature(enable = "avx512bw,avx512vl")]
3503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3504#[cfg_attr(test, assert_instr(vpcmp))]
3505pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3506    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3507}
3508
3509/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3512#[inline]
3513#[target_feature(enable = "avx512bw,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpcmp))]
3516pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3517    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3518}
3519
3520/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3521///
3522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3523#[inline]
3524#[target_feature(enable = "avx512bw")]
3525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3526#[cfg_attr(test, assert_instr(vpcmp))]
3527pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3528    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3534#[inline]
3535#[target_feature(enable = "avx512bw")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3539    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3540}
3541
3542/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3543///
3544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3545#[inline]
3546#[target_feature(enable = "avx512bw,avx512vl")]
3547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3548#[cfg_attr(test, assert_instr(vpcmp))]
3549pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3550    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3551}
3552
3553/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3554///
3555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3556#[inline]
3557#[target_feature(enable = "avx512bw,avx512vl")]
3558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3559#[cfg_attr(test, assert_instr(vpcmp))]
3560pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3561    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3562}
3563
3564/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3565///
3566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3567#[inline]
3568#[target_feature(enable = "avx512bw,avx512vl")]
3569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3570#[cfg_attr(test, assert_instr(vpcmp))]
3571pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3572    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3573}
3574
3575/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3576///
3577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3578#[inline]
3579#[target_feature(enable = "avx512bw,avx512vl")]
3580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3581#[cfg_attr(test, assert_instr(vpcmp))]
3582pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3583    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3584}
3585
3586/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3587///
3588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3589#[inline]
3590#[target_feature(enable = "avx512bw")]
3591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3592#[cfg_attr(test, assert_instr(vpcmp))]
3593pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3594    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3595}
3596
3597/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3598///
3599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3600#[inline]
3601#[target_feature(enable = "avx512bw")]
3602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3603#[cfg_attr(test, assert_instr(vpcmp))]
3604pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3605    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3606}
3607
3608/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3609///
3610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3611#[inline]
3612#[target_feature(enable = "avx512bw,avx512vl")]
3613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3614#[cfg_attr(test, assert_instr(vpcmp))]
3615pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3616    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3617}
3618
3619/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3620///
3621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3622#[inline]
3623#[target_feature(enable = "avx512bw,avx512vl")]
3624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3625#[cfg_attr(test, assert_instr(vpcmp))]
3626pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3627    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3628}
3629
3630/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3631///
3632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3633#[inline]
3634#[target_feature(enable = "avx512bw,avx512vl")]
3635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3636#[cfg_attr(test, assert_instr(vpcmp))]
3637pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3638    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3639}
3640
3641/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3642///
3643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3644#[inline]
3645#[target_feature(enable = "avx512bw,avx512vl")]
3646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3647#[cfg_attr(test, assert_instr(vpcmp))]
3648pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3649    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3650}
3651
3652/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3653///
3654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3655#[inline]
3656#[target_feature(enable = "avx512bw")]
3657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3658#[cfg_attr(test, assert_instr(vpcmp))]
3659pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3660    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3666#[inline]
3667#[target_feature(enable = "avx512bw")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3671    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3672}
3673
3674/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3675///
3676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3677#[inline]
3678#[target_feature(enable = "avx512bw,avx512vl")]
3679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3680#[cfg_attr(test, assert_instr(vpcmp))]
3681pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3682    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3683}
3684
3685/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3686///
3687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3688#[inline]
3689#[target_feature(enable = "avx512bw,avx512vl")]
3690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3691#[cfg_attr(test, assert_instr(vpcmp))]
3692pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3693    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3694}
3695
3696/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3697///
3698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3699#[inline]
3700#[target_feature(enable = "avx512bw,avx512vl")]
3701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3702#[cfg_attr(test, assert_instr(vpcmp))]
3703pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3704    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3705}
3706
3707/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3708///
3709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3710#[inline]
3711#[target_feature(enable = "avx512bw,avx512vl")]
3712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3713#[cfg_attr(test, assert_instr(vpcmp))]
3714pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3715    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3716}
3717
3718/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3719///
3720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
3721#[inline]
3722#[target_feature(enable = "avx512bw")]
3723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3724#[cfg_attr(test, assert_instr(vpcmp))]
3725pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3726    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
3727}
3728
3729/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3732#[inline]
3733#[target_feature(enable = "avx512bw")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vpcmp))]
3736pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3737    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3738}
3739
3740/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3741///
3742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
3743#[inline]
3744#[target_feature(enable = "avx512bw,avx512vl")]
3745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3746#[cfg_attr(test, assert_instr(vpcmp))]
3747pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3748    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
3749}
3750
3751/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3752///
3753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3754#[inline]
3755#[target_feature(enable = "avx512bw,avx512vl")]
3756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3757#[cfg_attr(test, assert_instr(vpcmp))]
3758pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3759    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3760}
3761
3762/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3763///
3764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
3765#[inline]
3766#[target_feature(enable = "avx512bw,avx512vl")]
3767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3768#[cfg_attr(test, assert_instr(vpcmp))]
3769pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3770    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
3771}
3772
3773/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3774///
3775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3776#[inline]
3777#[target_feature(enable = "avx512bw,avx512vl")]
3778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3779#[cfg_attr(test, assert_instr(vpcmp))]
3780pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3781    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3782}
3783
3784/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3785///
3786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
3787#[inline]
3788#[target_feature(enable = "avx512bw")]
3789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3790#[cfg_attr(test, assert_instr(vpcmp))]
3791pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3803    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3804}
3805
3806/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3807///
3808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
3809#[inline]
3810#[target_feature(enable = "avx512bw,avx512vl")]
3811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3812#[cfg_attr(test, assert_instr(vpcmp))]
3813pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3814    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
3815}
3816
3817/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3818///
3819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3820#[inline]
3821#[target_feature(enable = "avx512bw,avx512vl")]
3822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3823#[cfg_attr(test, assert_instr(vpcmp))]
3824pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3825    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3826}
3827
3828/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3829///
3830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
3831#[inline]
3832#[target_feature(enable = "avx512bw,avx512vl")]
3833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3834#[cfg_attr(test, assert_instr(vpcmp))]
3835pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3836    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
3837}
3838
3839/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3840///
3841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3842#[inline]
3843#[target_feature(enable = "avx512bw,avx512vl")]
3844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3845#[cfg_attr(test, assert_instr(vpcmp))]
3846pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3847    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3848}
3849
3850/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3851///
3852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
3853#[inline]
3854#[target_feature(enable = "avx512bw")]
3855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3856#[cfg_attr(test, assert_instr(vpcmp))]
3857pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3858    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
3859}
3860
3861/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3864#[inline]
3865#[target_feature(enable = "avx512bw")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vpcmp))]
3868pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3869    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3870}
3871
3872/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3873///
3874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
3875#[inline]
3876#[target_feature(enable = "avx512bw,avx512vl")]
3877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3878#[cfg_attr(test, assert_instr(vpcmp))]
3879pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3880    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
3881}
3882
3883/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3884///
3885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3886#[inline]
3887#[target_feature(enable = "avx512bw,avx512vl")]
3888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3889#[cfg_attr(test, assert_instr(vpcmp))]
3890pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3891    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3892}
3893
3894/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3895///
3896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
3897#[inline]
3898#[target_feature(enable = "avx512bw,avx512vl")]
3899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3900#[cfg_attr(test, assert_instr(vpcmp))]
3901pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3902    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
3903}
3904
3905/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3906///
3907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3908#[inline]
3909#[target_feature(enable = "avx512bw,avx512vl")]
3910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3911#[cfg_attr(test, assert_instr(vpcmp))]
3912pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3913    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3914}
3915
3916/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3917///
3918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
3919#[inline]
3920#[target_feature(enable = "avx512bw")]
3921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3922#[cfg_attr(test, assert_instr(vpcmp))]
3923pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3924    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3935    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3936}
3937
3938/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3939///
3940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
3941#[inline]
3942#[target_feature(enable = "avx512bw,avx512vl")]
3943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3944#[cfg_attr(test, assert_instr(vpcmp))]
3945pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3946    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
3947}
3948
3949/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3950///
3951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3952#[inline]
3953#[target_feature(enable = "avx512bw,avx512vl")]
3954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3955#[cfg_attr(test, assert_instr(vpcmp))]
3956pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3957    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3958}
3959
3960/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3961///
3962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
3963#[inline]
3964#[target_feature(enable = "avx512bw,avx512vl")]
3965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3966#[cfg_attr(test, assert_instr(vpcmp))]
3967pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3968    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
3969}
3970
3971/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3972///
3973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3974#[inline]
3975#[target_feature(enable = "avx512bw,avx512vl")]
3976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3977#[cfg_attr(test, assert_instr(vpcmp))]
3978pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3979    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3980}
3981
3982/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3983///
3984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
3985#[inline]
3986#[target_feature(enable = "avx512bw")]
3987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3988#[cfg_attr(test, assert_instr(vpcmp))]
3989pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3990    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
3991}
3992
3993/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3996#[inline]
3997#[target_feature(enable = "avx512bw")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vpcmp))]
4000pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4001    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4002}
4003
4004/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4005///
4006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4007#[inline]
4008#[target_feature(enable = "avx512bw,avx512vl")]
4009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4010#[cfg_attr(test, assert_instr(vpcmp))]
4011pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4012    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4013}
4014
4015/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4016///
4017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4018#[inline]
4019#[target_feature(enable = "avx512bw,avx512vl")]
4020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4021#[cfg_attr(test, assert_instr(vpcmp))]
4022pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4023    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4024}
4025
4026/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4027///
4028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4029#[inline]
4030#[target_feature(enable = "avx512bw,avx512vl")]
4031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4032#[cfg_attr(test, assert_instr(vpcmp))]
4033pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4034    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4035}
4036
4037/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4038///
4039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4040#[inline]
4041#[target_feature(enable = "avx512bw,avx512vl")]
4042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4043#[cfg_attr(test, assert_instr(vpcmp))]
4044pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4045    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4046}
4047
4048/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4049///
4050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4051#[inline]
4052#[target_feature(enable = "avx512bw")]
4053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4054#[cfg_attr(test, assert_instr(vpcmp))]
4055pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4056    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4057}
4058
4059/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4062#[inline]
4063#[target_feature(enable = "avx512bw")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4067    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4068}
4069
4070/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4071///
4072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4073#[inline]
4074#[target_feature(enable = "avx512bw,avx512vl")]
4075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4076#[cfg_attr(test, assert_instr(vpcmp))]
4077pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4078    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4079}
4080
4081/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4082///
4083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4084#[inline]
4085#[target_feature(enable = "avx512bw,avx512vl")]
4086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4087#[cfg_attr(test, assert_instr(vpcmp))]
4088pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4089    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4090}
4091
4092/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4093///
4094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4095#[inline]
4096#[target_feature(enable = "avx512bw,avx512vl")]
4097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4098#[cfg_attr(test, assert_instr(vpcmp))]
4099pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4100    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4101}
4102
4103/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4104///
4105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4106#[inline]
4107#[target_feature(enable = "avx512bw,avx512vl")]
4108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4109#[cfg_attr(test, assert_instr(vpcmp))]
4110pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4111    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4112}
4113
4114/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4115///
4116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4117#[inline]
4118#[target_feature(enable = "avx512bw")]
4119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4120#[rustc_legacy_const_generics(2)]
4121#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4122pub fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4123    unsafe {
4124        static_assert_uimm_bits!(IMM8, 3);
4125        let a = a.as_u16x32();
4126        let b = b.as_u16x32();
4127        let r = match IMM8 {
4128            0 => simd_eq(a, b),
4129            1 => simd_lt(a, b),
4130            2 => simd_le(a, b),
4131            3 => i16x32::ZERO,
4132            4 => simd_ne(a, b),
4133            5 => simd_ge(a, b),
4134            6 => simd_gt(a, b),
4135            _ => i16x32::splat(-1),
4136        };
4137        simd_bitmask(r)
4138    }
4139}
4140
4141/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4142///
4143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4144#[inline]
4145#[target_feature(enable = "avx512bw")]
4146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4147#[rustc_legacy_const_generics(3)]
4148#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4149pub fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4150    k1: __mmask32,
4151    a: __m512i,
4152    b: __m512i,
4153) -> __mmask32 {
4154    unsafe {
4155        static_assert_uimm_bits!(IMM8, 3);
4156        let a = a.as_u16x32();
4157        let b = b.as_u16x32();
4158        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4159        let r = match IMM8 {
4160            0 => simd_and(k1, simd_eq(a, b)),
4161            1 => simd_and(k1, simd_lt(a, b)),
4162            2 => simd_and(k1, simd_le(a, b)),
4163            3 => i16x32::ZERO,
4164            4 => simd_and(k1, simd_ne(a, b)),
4165            5 => simd_and(k1, simd_ge(a, b)),
4166            6 => simd_and(k1, simd_gt(a, b)),
4167            _ => k1,
4168        };
4169        simd_bitmask(r)
4170    }
4171}
4172
4173/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4176#[inline]
4177#[target_feature(enable = "avx512bw,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[rustc_legacy_const_generics(2)]
4180#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4181pub fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4182    unsafe {
4183        static_assert_uimm_bits!(IMM8, 3);
4184        let a = a.as_u16x16();
4185        let b = b.as_u16x16();
4186        let r = match IMM8 {
4187            0 => simd_eq(a, b),
4188            1 => simd_lt(a, b),
4189            2 => simd_le(a, b),
4190            3 => i16x16::ZERO,
4191            4 => simd_ne(a, b),
4192            5 => simd_ge(a, b),
4193            6 => simd_gt(a, b),
4194            _ => i16x16::splat(-1),
4195        };
4196        simd_bitmask(r)
4197    }
4198}
4199
4200/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4201///
4202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4203#[inline]
4204#[target_feature(enable = "avx512bw,avx512vl")]
4205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4206#[rustc_legacy_const_generics(3)]
4207#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4208pub fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4209    k1: __mmask16,
4210    a: __m256i,
4211    b: __m256i,
4212) -> __mmask16 {
4213    unsafe {
4214        static_assert_uimm_bits!(IMM8, 3);
4215        let a = a.as_u16x16();
4216        let b = b.as_u16x16();
4217        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4218        let r = match IMM8 {
4219            0 => simd_and(k1, simd_eq(a, b)),
4220            1 => simd_and(k1, simd_lt(a, b)),
4221            2 => simd_and(k1, simd_le(a, b)),
4222            3 => i16x16::ZERO,
4223            4 => simd_and(k1, simd_ne(a, b)),
4224            5 => simd_and(k1, simd_ge(a, b)),
4225            6 => simd_and(k1, simd_gt(a, b)),
4226            _ => k1,
4227        };
4228        simd_bitmask(r)
4229    }
4230}
4231
4232/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4233///
4234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4235#[inline]
4236#[target_feature(enable = "avx512bw,avx512vl")]
4237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4238#[rustc_legacy_const_generics(2)]
4239#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4240pub fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4241    unsafe {
4242        static_assert_uimm_bits!(IMM8, 3);
4243        let a = a.as_u16x8();
4244        let b = b.as_u16x8();
4245        let r = match IMM8 {
4246            0 => simd_eq(a, b),
4247            1 => simd_lt(a, b),
4248            2 => simd_le(a, b),
4249            3 => i16x8::ZERO,
4250            4 => simd_ne(a, b),
4251            5 => simd_ge(a, b),
4252            6 => simd_gt(a, b),
4253            _ => i16x8::splat(-1),
4254        };
4255        simd_bitmask(r)
4256    }
4257}
4258
4259/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4260///
4261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4262#[inline]
4263#[target_feature(enable = "avx512bw,avx512vl")]
4264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4265#[rustc_legacy_const_generics(3)]
4266#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4267pub fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4268    unsafe {
4269        static_assert_uimm_bits!(IMM8, 3);
4270        let a = a.as_u16x8();
4271        let b = b.as_u16x8();
4272        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4273        let r = match IMM8 {
4274            0 => simd_and(k1, simd_eq(a, b)),
4275            1 => simd_and(k1, simd_lt(a, b)),
4276            2 => simd_and(k1, simd_le(a, b)),
4277            3 => i16x8::ZERO,
4278            4 => simd_and(k1, simd_ne(a, b)),
4279            5 => simd_and(k1, simd_ge(a, b)),
4280            6 => simd_and(k1, simd_gt(a, b)),
4281            _ => k1,
4282        };
4283        simd_bitmask(r)
4284    }
4285}
4286
4287/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[rustc_legacy_const_generics(2)]
4294#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4295pub fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4296    unsafe {
4297        static_assert_uimm_bits!(IMM8, 3);
4298        let a = a.as_u8x64();
4299        let b = b.as_u8x64();
4300        let r = match IMM8 {
4301            0 => simd_eq(a, b),
4302            1 => simd_lt(a, b),
4303            2 => simd_le(a, b),
4304            3 => i8x64::ZERO,
4305            4 => simd_ne(a, b),
4306            5 => simd_ge(a, b),
4307            6 => simd_gt(a, b),
4308            _ => i8x64::splat(-1),
4309        };
4310        simd_bitmask(r)
4311    }
4312}
4313
4314/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4315///
4316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4317#[inline]
4318#[target_feature(enable = "avx512bw")]
4319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4320#[rustc_legacy_const_generics(3)]
4321#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4322pub fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4323    k1: __mmask64,
4324    a: __m512i,
4325    b: __m512i,
4326) -> __mmask64 {
4327    unsafe {
4328        static_assert_uimm_bits!(IMM8, 3);
4329        let a = a.as_u8x64();
4330        let b = b.as_u8x64();
4331        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4332        let r = match IMM8 {
4333            0 => simd_and(k1, simd_eq(a, b)),
4334            1 => simd_and(k1, simd_lt(a, b)),
4335            2 => simd_and(k1, simd_le(a, b)),
4336            3 => i8x64::ZERO,
4337            4 => simd_and(k1, simd_ne(a, b)),
4338            5 => simd_and(k1, simd_ge(a, b)),
4339            6 => simd_and(k1, simd_gt(a, b)),
4340            _ => k1,
4341        };
4342        simd_bitmask(r)
4343    }
4344}
4345
4346/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4347///
4348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4349#[inline]
4350#[target_feature(enable = "avx512bw,avx512vl")]
4351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4352#[rustc_legacy_const_generics(2)]
4353#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4354pub fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4355    unsafe {
4356        static_assert_uimm_bits!(IMM8, 3);
4357        let a = a.as_u8x32();
4358        let b = b.as_u8x32();
4359        let r = match IMM8 {
4360            0 => simd_eq(a, b),
4361            1 => simd_lt(a, b),
4362            2 => simd_le(a, b),
4363            3 => i8x32::ZERO,
4364            4 => simd_ne(a, b),
4365            5 => simd_ge(a, b),
4366            6 => simd_gt(a, b),
4367            _ => i8x32::splat(-1),
4368        };
4369        simd_bitmask(r)
4370    }
4371}
4372
4373/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4374///
4375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4376#[inline]
4377#[target_feature(enable = "avx512bw,avx512vl")]
4378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4379#[rustc_legacy_const_generics(3)]
4380#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4381pub fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4382    k1: __mmask32,
4383    a: __m256i,
4384    b: __m256i,
4385) -> __mmask32 {
4386    unsafe {
4387        static_assert_uimm_bits!(IMM8, 3);
4388        let a = a.as_u8x32();
4389        let b = b.as_u8x32();
4390        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4391        let r = match IMM8 {
4392            0 => simd_and(k1, simd_eq(a, b)),
4393            1 => simd_and(k1, simd_lt(a, b)),
4394            2 => simd_and(k1, simd_le(a, b)),
4395            3 => i8x32::ZERO,
4396            4 => simd_and(k1, simd_ne(a, b)),
4397            5 => simd_and(k1, simd_ge(a, b)),
4398            6 => simd_and(k1, simd_gt(a, b)),
4399            _ => k1,
4400        };
4401        simd_bitmask(r)
4402    }
4403}
4404
4405/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4406///
4407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4408#[inline]
4409#[target_feature(enable = "avx512bw,avx512vl")]
4410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4411#[rustc_legacy_const_generics(2)]
4412#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4413pub fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4414    unsafe {
4415        static_assert_uimm_bits!(IMM8, 3);
4416        let a = a.as_u8x16();
4417        let b = b.as_u8x16();
4418        let r = match IMM8 {
4419            0 => simd_eq(a, b),
4420            1 => simd_lt(a, b),
4421            2 => simd_le(a, b),
4422            3 => i8x16::ZERO,
4423            4 => simd_ne(a, b),
4424            5 => simd_ge(a, b),
4425            6 => simd_gt(a, b),
4426            _ => i8x16::splat(-1),
4427        };
4428        simd_bitmask(r)
4429    }
4430}
4431
4432/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4433///
4434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4435#[inline]
4436#[target_feature(enable = "avx512bw,avx512vl")]
4437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4438#[rustc_legacy_const_generics(3)]
4439#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4440pub fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u8x16();
4444        let b = b.as_u8x16();
4445        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4446        let r = match IMM8 {
4447            0 => simd_and(k1, simd_eq(a, b)),
4448            1 => simd_and(k1, simd_lt(a, b)),
4449            2 => simd_and(k1, simd_le(a, b)),
4450            3 => i8x16::ZERO,
4451            4 => simd_and(k1, simd_ne(a, b)),
4452            5 => simd_and(k1, simd_ge(a, b)),
4453            6 => simd_and(k1, simd_gt(a, b)),
4454            _ => k1,
4455        };
4456        simd_bitmask(r)
4457    }
4458}
4459
4460/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4461///
4462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4463#[inline]
4464#[target_feature(enable = "avx512bw")]
4465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4466#[rustc_legacy_const_generics(2)]
4467#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4468pub fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4469    unsafe {
4470        static_assert_uimm_bits!(IMM8, 3);
4471        let a = a.as_i16x32();
4472        let b = b.as_i16x32();
4473        let r = match IMM8 {
4474            0 => simd_eq(a, b),
4475            1 => simd_lt(a, b),
4476            2 => simd_le(a, b),
4477            3 => i16x32::ZERO,
4478            4 => simd_ne(a, b),
4479            5 => simd_ge(a, b),
4480            6 => simd_gt(a, b),
4481            _ => i16x32::splat(-1),
4482        };
4483        simd_bitmask(r)
4484    }
4485}
4486
4487/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4488///
4489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4490#[inline]
4491#[target_feature(enable = "avx512bw")]
4492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4493#[rustc_legacy_const_generics(3)]
4494#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4495pub fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4496    k1: __mmask32,
4497    a: __m512i,
4498    b: __m512i,
4499) -> __mmask32 {
4500    unsafe {
4501        static_assert_uimm_bits!(IMM8, 3);
4502        let a = a.as_i16x32();
4503        let b = b.as_i16x32();
4504        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4505        let r = match IMM8 {
4506            0 => simd_and(k1, simd_eq(a, b)),
4507            1 => simd_and(k1, simd_lt(a, b)),
4508            2 => simd_and(k1, simd_le(a, b)),
4509            3 => i16x32::ZERO,
4510            4 => simd_and(k1, simd_ne(a, b)),
4511            5 => simd_and(k1, simd_ge(a, b)),
4512            6 => simd_and(k1, simd_gt(a, b)),
4513            _ => k1,
4514        };
4515        simd_bitmask(r)
4516    }
4517}
4518
4519/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4520///
4521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4522#[inline]
4523#[target_feature(enable = "avx512bw,avx512vl")]
4524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4525#[rustc_legacy_const_generics(2)]
4526#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4527pub fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4528    unsafe {
4529        static_assert_uimm_bits!(IMM8, 3);
4530        let a = a.as_i16x16();
4531        let b = b.as_i16x16();
4532        let r = match IMM8 {
4533            0 => simd_eq(a, b),
4534            1 => simd_lt(a, b),
4535            2 => simd_le(a, b),
4536            3 => i16x16::ZERO,
4537            4 => simd_ne(a, b),
4538            5 => simd_ge(a, b),
4539            6 => simd_gt(a, b),
4540            _ => i16x16::splat(-1),
4541        };
4542        simd_bitmask(r)
4543    }
4544}
4545
4546/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4547///
4548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4549#[inline]
4550#[target_feature(enable = "avx512bw,avx512vl")]
4551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4552#[rustc_legacy_const_generics(3)]
4553#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4554pub fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4555    k1: __mmask16,
4556    a: __m256i,
4557    b: __m256i,
4558) -> __mmask16 {
4559    unsafe {
4560        static_assert_uimm_bits!(IMM8, 3);
4561        let a = a.as_i16x16();
4562        let b = b.as_i16x16();
4563        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4564        let r = match IMM8 {
4565            0 => simd_and(k1, simd_eq(a, b)),
4566            1 => simd_and(k1, simd_lt(a, b)),
4567            2 => simd_and(k1, simd_le(a, b)),
4568            3 => i16x16::ZERO,
4569            4 => simd_and(k1, simd_ne(a, b)),
4570            5 => simd_and(k1, simd_ge(a, b)),
4571            6 => simd_and(k1, simd_gt(a, b)),
4572            _ => k1,
4573        };
4574        simd_bitmask(r)
4575    }
4576}
4577
4578/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4579///
4580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4581#[inline]
4582#[target_feature(enable = "avx512bw,avx512vl")]
4583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4584#[rustc_legacy_const_generics(2)]
4585#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4586pub fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4587    unsafe {
4588        static_assert_uimm_bits!(IMM8, 3);
4589        let a = a.as_i16x8();
4590        let b = b.as_i16x8();
4591        let r = match IMM8 {
4592            0 => simd_eq(a, b),
4593            1 => simd_lt(a, b),
4594            2 => simd_le(a, b),
4595            3 => i16x8::ZERO,
4596            4 => simd_ne(a, b),
4597            5 => simd_ge(a, b),
4598            6 => simd_gt(a, b),
4599            _ => i16x8::splat(-1),
4600        };
4601        simd_bitmask(r)
4602    }
4603}
4604
4605/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4608#[inline]
4609#[target_feature(enable = "avx512bw,avx512vl")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[rustc_legacy_const_generics(3)]
4612#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4613pub fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4614    unsafe {
4615        static_assert_uimm_bits!(IMM8, 3);
4616        let a = a.as_i16x8();
4617        let b = b.as_i16x8();
4618        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4619        let r = match IMM8 {
4620            0 => simd_and(k1, simd_eq(a, b)),
4621            1 => simd_and(k1, simd_lt(a, b)),
4622            2 => simd_and(k1, simd_le(a, b)),
4623            3 => i16x8::ZERO,
4624            4 => simd_and(k1, simd_ne(a, b)),
4625            5 => simd_and(k1, simd_ge(a, b)),
4626            6 => simd_and(k1, simd_gt(a, b)),
4627            _ => k1,
4628        };
4629        simd_bitmask(r)
4630    }
4631}
4632
4633/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4634///
4635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4636#[inline]
4637#[target_feature(enable = "avx512bw")]
4638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4639#[rustc_legacy_const_generics(2)]
4640#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4641pub fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4642    unsafe {
4643        static_assert_uimm_bits!(IMM8, 3);
4644        let a = a.as_i8x64();
4645        let b = b.as_i8x64();
4646        let r = match IMM8 {
4647            0 => simd_eq(a, b),
4648            1 => simd_lt(a, b),
4649            2 => simd_le(a, b),
4650            3 => i8x64::ZERO,
4651            4 => simd_ne(a, b),
4652            5 => simd_ge(a, b),
4653            6 => simd_gt(a, b),
4654            _ => i8x64::splat(-1),
4655        };
4656        simd_bitmask(r)
4657    }
4658}
4659
4660/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4661///
4662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
4663#[inline]
4664#[target_feature(enable = "avx512bw")]
4665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4666#[rustc_legacy_const_generics(3)]
4667#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4668pub fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
4669    k1: __mmask64,
4670    a: __m512i,
4671    b: __m512i,
4672) -> __mmask64 {
4673    unsafe {
4674        static_assert_uimm_bits!(IMM8, 3);
4675        let a = a.as_i8x64();
4676        let b = b.as_i8x64();
4677        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4678        let r = match IMM8 {
4679            0 => simd_and(k1, simd_eq(a, b)),
4680            1 => simd_and(k1, simd_lt(a, b)),
4681            2 => simd_and(k1, simd_le(a, b)),
4682            3 => i8x64::ZERO,
4683            4 => simd_and(k1, simd_ne(a, b)),
4684            5 => simd_and(k1, simd_ge(a, b)),
4685            6 => simd_and(k1, simd_gt(a, b)),
4686            _ => k1,
4687        };
4688        simd_bitmask(r)
4689    }
4690}
4691
4692/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4693///
4694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
4695#[inline]
4696#[target_feature(enable = "avx512bw,avx512vl")]
4697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4698#[rustc_legacy_const_generics(2)]
4699#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4700pub fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4701    unsafe {
4702        static_assert_uimm_bits!(IMM8, 3);
4703        let a = a.as_i8x32();
4704        let b = b.as_i8x32();
4705        let r = match IMM8 {
4706            0 => simd_eq(a, b),
4707            1 => simd_lt(a, b),
4708            2 => simd_le(a, b),
4709            3 => i8x32::ZERO,
4710            4 => simd_ne(a, b),
4711            5 => simd_ge(a, b),
4712            6 => simd_gt(a, b),
4713            _ => i8x32::splat(-1),
4714        };
4715        simd_bitmask(r)
4716    }
4717}
4718
4719/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4720///
4721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
4722#[inline]
4723#[target_feature(enable = "avx512bw,avx512vl")]
4724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4725#[rustc_legacy_const_generics(3)]
4726#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4727pub fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
4728    k1: __mmask32,
4729    a: __m256i,
4730    b: __m256i,
4731) -> __mmask32 {
4732    unsafe {
4733        static_assert_uimm_bits!(IMM8, 3);
4734        let a = a.as_i8x32();
4735        let b = b.as_i8x32();
4736        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4737        let r = match IMM8 {
4738            0 => simd_and(k1, simd_eq(a, b)),
4739            1 => simd_and(k1, simd_lt(a, b)),
4740            2 => simd_and(k1, simd_le(a, b)),
4741            3 => i8x32::ZERO,
4742            4 => simd_and(k1, simd_ne(a, b)),
4743            5 => simd_and(k1, simd_ge(a, b)),
4744            6 => simd_and(k1, simd_gt(a, b)),
4745            _ => k1,
4746        };
4747        simd_bitmask(r)
4748    }
4749}
4750
4751/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4752///
4753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
4754#[inline]
4755#[target_feature(enable = "avx512bw,avx512vl")]
4756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4757#[rustc_legacy_const_generics(2)]
4758#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4759pub fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4760    unsafe {
4761        static_assert_uimm_bits!(IMM8, 3);
4762        let a = a.as_i8x16();
4763        let b = b.as_i8x16();
4764        let r = match IMM8 {
4765            0 => simd_eq(a, b),
4766            1 => simd_lt(a, b),
4767            2 => simd_le(a, b),
4768            3 => i8x16::ZERO,
4769            4 => simd_ne(a, b),
4770            5 => simd_ge(a, b),
4771            6 => simd_gt(a, b),
4772            _ => i8x16::splat(-1),
4773        };
4774        simd_bitmask(r)
4775    }
4776}
4777
4778/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
4781#[inline]
4782#[target_feature(enable = "avx512bw,avx512vl")]
4783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4784#[rustc_legacy_const_generics(3)]
4785#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4786pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4787    unsafe {
4788        static_assert_uimm_bits!(IMM8, 3);
4789        let a = a.as_i8x16();
4790        let b = b.as_i8x16();
4791        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4792        let r = match IMM8 {
4793            0 => simd_and(k1, simd_eq(a, b)),
4794            1 => simd_and(k1, simd_lt(a, b)),
4795            2 => simd_and(k1, simd_le(a, b)),
4796            3 => i8x16::ZERO,
4797            4 => simd_and(k1, simd_ne(a, b)),
4798            5 => simd_and(k1, simd_ge(a, b)),
4799            6 => simd_and(k1, simd_gt(a, b)),
4800            _ => k1,
4801        };
4802        simd_bitmask(r)
4803    }
4804}
4805
4806/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4807///
4808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
4809#[inline]
4810#[target_feature(enable = "avx512bw,avx512vl")]
4811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4812pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4813    unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
4814}
4815
4816/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4817///
4818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
4819#[inline]
4820#[target_feature(enable = "avx512bw,avx512vl")]
4821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4822pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4823    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
4824}
4825
4826/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4827///
4828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
4829#[inline]
4830#[target_feature(enable = "avx512bw,avx512vl")]
4831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4832pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4833    unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
4834}
4835
4836/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4837///
4838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
4839#[inline]
4840#[target_feature(enable = "avx512bw,avx512vl")]
4841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4842pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4843    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
4844}
4845
4846/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4847///
4848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
4849#[inline]
4850#[target_feature(enable = "avx512bw,avx512vl")]
4851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4852pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4853    unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
4854}
4855
4856/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4857///
4858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
4859#[inline]
4860#[target_feature(enable = "avx512bw,avx512vl")]
4861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4862pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4863    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
4864}
4865
4866/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
4869#[inline]
4870#[target_feature(enable = "avx512bw,avx512vl")]
4871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4872pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4873    unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
4874}
4875
4876/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4877///
4878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
4879#[inline]
4880#[target_feature(enable = "avx512bw,avx512vl")]
4881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4882pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4883    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
4884}
4885
4886/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
4893    unsafe { simd_reduce_and(a.as_i16x16()) }
4894}
4895
4896/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4897///
4898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
4899#[inline]
4900#[target_feature(enable = "avx512bw,avx512vl")]
4901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4902pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
4903    unsafe {
4904        simd_reduce_and(simd_select_bitmask(
4905            k,
4906            a.as_i16x16(),
4907            _mm256_set1_epi64x(-1).as_i16x16(),
4908        ))
4909    }
4910}
4911
4912/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4913///
4914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
4915#[inline]
4916#[target_feature(enable = "avx512bw,avx512vl")]
4917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4918pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
4919    unsafe { simd_reduce_and(a.as_i16x8()) }
4920}
4921
4922/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4923///
4924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
4925#[inline]
4926#[target_feature(enable = "avx512bw,avx512vl")]
4927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4928pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
4929    unsafe {
4930        simd_reduce_and(simd_select_bitmask(
4931            k,
4932            a.as_i16x8(),
4933            _mm_set1_epi64x(-1).as_i16x8(),
4934        ))
4935    }
4936}
4937
4938/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4939///
4940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
4941#[inline]
4942#[target_feature(enable = "avx512bw,avx512vl")]
4943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4944pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
4945    unsafe { simd_reduce_and(a.as_i8x32()) }
4946}
4947
4948/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4949///
4950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
4951#[inline]
4952#[target_feature(enable = "avx512bw,avx512vl")]
4953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4954pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
4955    unsafe {
4956        simd_reduce_and(simd_select_bitmask(
4957            k,
4958            a.as_i8x32(),
4959            _mm256_set1_epi64x(-1).as_i8x32(),
4960        ))
4961    }
4962}
4963
4964/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4965///
4966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
4967#[inline]
4968#[target_feature(enable = "avx512bw,avx512vl")]
4969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4970pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
4971    unsafe { simd_reduce_and(a.as_i8x16()) }
4972}
4973
4974/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4975///
4976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
4977#[inline]
4978#[target_feature(enable = "avx512bw,avx512vl")]
4979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4980pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
4981    unsafe {
4982        simd_reduce_and(simd_select_bitmask(
4983            k,
4984            a.as_i8x16(),
4985            _mm_set1_epi64x(-1).as_i8x16(),
4986        ))
4987    }
4988}
4989
4990/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4991///
4992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
4993#[inline]
4994#[target_feature(enable = "avx512bw,avx512vl")]
4995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4996pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
4997    unsafe { simd_reduce_max(a.as_i16x16()) }
4998}
4999
5000/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5001///
5002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5003#[inline]
5004#[target_feature(enable = "avx512bw,avx512vl")]
5005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5006pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5007    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5008}
5009
5010/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5011///
5012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5013#[inline]
5014#[target_feature(enable = "avx512bw,avx512vl")]
5015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5016pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5017    unsafe { simd_reduce_max(a.as_i16x8()) }
5018}
5019
5020/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5023#[inline]
5024#[target_feature(enable = "avx512bw,avx512vl")]
5025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5026pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5027    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5028}
5029
5030/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5031///
5032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5033#[inline]
5034#[target_feature(enable = "avx512bw,avx512vl")]
5035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5036pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5037    unsafe { simd_reduce_max(a.as_i8x32()) }
5038}
5039
5040/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5041///
5042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5043#[inline]
5044#[target_feature(enable = "avx512bw,avx512vl")]
5045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5046pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5047    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5048}
5049
5050/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5051///
5052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5053#[inline]
5054#[target_feature(enable = "avx512bw,avx512vl")]
5055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5056pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5057    unsafe { simd_reduce_max(a.as_i8x16()) }
5058}
5059
5060/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5061///
5062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5063#[inline]
5064#[target_feature(enable = "avx512bw,avx512vl")]
5065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5066pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5067    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5068}
5069
5070/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5071///
5072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5073#[inline]
5074#[target_feature(enable = "avx512bw,avx512vl")]
5075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5076pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5077    unsafe { simd_reduce_max(a.as_u16x16()) }
5078}
5079
5080/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5081///
5082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5083#[inline]
5084#[target_feature(enable = "avx512bw,avx512vl")]
5085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5086pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5087    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5088}
5089
5090/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5091///
5092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5093#[inline]
5094#[target_feature(enable = "avx512bw,avx512vl")]
5095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5096pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5097    unsafe { simd_reduce_max(a.as_u16x8()) }
5098}
5099
5100/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5101///
5102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5103#[inline]
5104#[target_feature(enable = "avx512bw,avx512vl")]
5105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5106pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5107    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5108}
5109
5110/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5111///
5112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5113#[inline]
5114#[target_feature(enable = "avx512bw,avx512vl")]
5115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5116pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5117    unsafe { simd_reduce_max(a.as_u8x32()) }
5118}
5119
5120/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5121///
5122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5123#[inline]
5124#[target_feature(enable = "avx512bw,avx512vl")]
5125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5126pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5127    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5128}
5129
5130/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5137    unsafe { simd_reduce_max(a.as_u8x16()) }
5138}
5139
5140/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5141///
5142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5143#[inline]
5144#[target_feature(enable = "avx512bw,avx512vl")]
5145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5146pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5147    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5148}
5149
5150/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5151///
5152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5153#[inline]
5154#[target_feature(enable = "avx512bw,avx512vl")]
5155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5156pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5157    unsafe { simd_reduce_min(a.as_i16x16()) }
5158}
5159
5160/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5161///
5162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5163#[inline]
5164#[target_feature(enable = "avx512bw,avx512vl")]
5165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5166pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5167    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5168}
5169
5170/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5171///
5172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5173#[inline]
5174#[target_feature(enable = "avx512bw,avx512vl")]
5175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5176pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5177    unsafe { simd_reduce_min(a.as_i16x8()) }
5178}
5179
5180/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5183#[inline]
5184#[target_feature(enable = "avx512bw,avx512vl")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5187    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5188}
5189
5190/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5191///
5192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5193#[inline]
5194#[target_feature(enable = "avx512bw,avx512vl")]
5195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5196pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5197    unsafe { simd_reduce_min(a.as_i8x32()) }
5198}
5199
5200/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5201///
5202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5203#[inline]
5204#[target_feature(enable = "avx512bw,avx512vl")]
5205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5206pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5207    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5208}
5209
5210/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5211///
5212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5213#[inline]
5214#[target_feature(enable = "avx512bw,avx512vl")]
5215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5216pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5217    unsafe { simd_reduce_min(a.as_i8x16()) }
5218}
5219
5220/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5221///
5222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5223#[inline]
5224#[target_feature(enable = "avx512bw,avx512vl")]
5225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5226pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5227    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5228}
5229
5230/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5231///
5232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5233#[inline]
5234#[target_feature(enable = "avx512bw,avx512vl")]
5235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5236pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5237    unsafe { simd_reduce_min(a.as_u16x16()) }
5238}
5239
5240/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5247    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5248}
5249
5250/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5251///
5252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5253#[inline]
5254#[target_feature(enable = "avx512bw,avx512vl")]
5255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5256pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5257    unsafe { simd_reduce_min(a.as_u16x8()) }
5258}
5259
5260/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5261///
5262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5263#[inline]
5264#[target_feature(enable = "avx512bw,avx512vl")]
5265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5266pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5267    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5268}
5269
5270/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5271///
5272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5273#[inline]
5274#[target_feature(enable = "avx512bw,avx512vl")]
5275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5276pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5277    unsafe { simd_reduce_min(a.as_u8x32()) }
5278}
5279
5280/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5281///
5282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5283#[inline]
5284#[target_feature(enable = "avx512bw,avx512vl")]
5285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5286pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5287    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5288}
5289
5290/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5297    unsafe { simd_reduce_min(a.as_u8x16()) }
5298}
5299
5300/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5301///
5302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5303#[inline]
5304#[target_feature(enable = "avx512bw,avx512vl")]
5305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5306pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5307    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5308}
5309
5310/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5311///
5312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5313#[inline]
5314#[target_feature(enable = "avx512bw,avx512vl")]
5315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5316pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5317    unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
5318}
5319
5320/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5323#[inline]
5324#[target_feature(enable = "avx512bw,avx512vl")]
5325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5326pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5327    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
5328}
5329
5330/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5331///
5332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5333#[inline]
5334#[target_feature(enable = "avx512bw,avx512vl")]
5335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5336pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5337    unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
5338}
5339
5340/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5341///
5342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5343#[inline]
5344#[target_feature(enable = "avx512bw,avx512vl")]
5345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5346pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5347    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
5348}
5349
5350/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5351///
5352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5353#[inline]
5354#[target_feature(enable = "avx512bw,avx512vl")]
5355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5356pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5357    unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
5358}
5359
5360/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5363#[inline]
5364#[target_feature(enable = "avx512bw,avx512vl")]
5365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5366pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5367    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
5368}
5369
5370/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5371///
5372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5373#[inline]
5374#[target_feature(enable = "avx512bw,avx512vl")]
5375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5376pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5377    unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
5378}
5379
5380/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5381///
5382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5383#[inline]
5384#[target_feature(enable = "avx512bw,avx512vl")]
5385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5386pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5387    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
5388}
5389
5390/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5391///
5392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5393#[inline]
5394#[target_feature(enable = "avx512bw,avx512vl")]
5395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5396pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5397    unsafe { simd_reduce_or(a.as_i16x16()) }
5398}
5399
5400/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5403#[inline]
5404#[target_feature(enable = "avx512bw,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5407    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5408}
5409
5410/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5411///
5412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5413#[inline]
5414#[target_feature(enable = "avx512bw,avx512vl")]
5415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5416pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5417    unsafe { simd_reduce_or(a.as_i16x8()) }
5418}
5419
5420/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5421///
5422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5423#[inline]
5424#[target_feature(enable = "avx512bw,avx512vl")]
5425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5426pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5427    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5428}
5429
5430/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5431///
5432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5433#[inline]
5434#[target_feature(enable = "avx512bw,avx512vl")]
5435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5436pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5437    unsafe { simd_reduce_or(a.as_i8x32()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5447    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5448}
5449
5450/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5451///
5452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5453#[inline]
5454#[target_feature(enable = "avx512bw,avx512vl")]
5455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5456pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5457    unsafe { simd_reduce_or(a.as_i8x16()) }
5458}
5459
5460/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5461///
5462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5463#[inline]
5464#[target_feature(enable = "avx512bw,avx512vl")]
5465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5466pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5467    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5468}
5469
5470/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5471///
5472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5473#[inline]
5474#[target_feature(enable = "avx512bw")]
5475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5476#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5477pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5478    ptr::read_unaligned(mem_addr as *const __m512i)
5479}
5480
5481/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5482///
5483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5484#[inline]
5485#[target_feature(enable = "avx512bw,avx512vl")]
5486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5487#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5488pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5489    ptr::read_unaligned(mem_addr as *const __m256i)
5490}
5491
5492/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5493///
5494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5495#[inline]
5496#[target_feature(enable = "avx512bw,avx512vl")]
5497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5498#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5499pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5500    ptr::read_unaligned(mem_addr as *const __m128i)
5501}
5502
5503/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5504///
5505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5506#[inline]
5507#[target_feature(enable = "avx512bw")]
5508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5509#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5510pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5511    ptr::read_unaligned(mem_addr as *const __m512i)
5512}
5513
5514/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5515///
5516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5517#[inline]
5518#[target_feature(enable = "avx512bw,avx512vl")]
5519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5520#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5521pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5522    ptr::read_unaligned(mem_addr as *const __m256i)
5523}
5524
5525/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5526///
5527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5528#[inline]
5529#[target_feature(enable = "avx512bw,avx512vl")]
5530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5531#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5532pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5533    ptr::read_unaligned(mem_addr as *const __m128i)
5534}
5535
5536/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5537///
5538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5539#[inline]
5540#[target_feature(enable = "avx512bw")]
5541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5542#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5543pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5544    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5545}
5546
5547/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5548///
5549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5550#[inline]
5551#[target_feature(enable = "avx512bw,avx512vl")]
5552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5553#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5554pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5555    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5556}
5557
5558/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5559///
5560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5561#[inline]
5562#[target_feature(enable = "avx512bw,avx512vl")]
5563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5564#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5565pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5566    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5567}
5568
5569/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5570///
5571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
5572#[inline]
5573#[target_feature(enable = "avx512bw")]
5574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5575#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5576pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
5577    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5578}
5579
5580/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5581///
5582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
5583#[inline]
5584#[target_feature(enable = "avx512bw,avx512vl")]
5585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5586#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5587pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
5588    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5589}
5590
5591/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5592///
5593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
5594#[inline]
5595#[target_feature(enable = "avx512bw,avx512vl")]
5596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5597#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5598pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
5599    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5600}
5601
5602/// Load packed 16-bit integers from memory into dst using writemask k
5603/// (elements are copied from src when the corresponding mask bit is not set).
5604/// mem_addr does not need to be aligned on any particular boundary.
5605///
5606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
5607#[inline]
5608#[target_feature(enable = "avx512bw")]
5609#[cfg_attr(test, assert_instr(vmovdqu16))]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
5612    transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k))
5613}
5614
5615/// Load packed 16-bit integers from memory into dst using zeromask k
5616/// (elements are zeroed out when the corresponding mask bit is not set).
5617/// mem_addr does not need to be aligned on any particular boundary.
5618///
5619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
5620#[inline]
5621#[target_feature(enable = "avx512bw")]
5622#[cfg_attr(test, assert_instr(vmovdqu16))]
5623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5624pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
5625    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
5626}
5627
5628/// Load packed 8-bit integers from memory into dst using writemask k
5629/// (elements are copied from src when the corresponding mask bit is not set).
5630/// mem_addr does not need to be aligned on any particular boundary.
5631///
5632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
5633#[inline]
5634#[target_feature(enable = "avx512bw")]
5635#[cfg_attr(test, assert_instr(vmovdqu8))]
5636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5637pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
5638    transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k))
5639}
5640
5641/// Load packed 8-bit integers from memory into dst using zeromask k
5642/// (elements are zeroed out when the corresponding mask bit is not set).
5643/// mem_addr does not need to be aligned on any particular boundary.
5644///
5645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
5646#[inline]
5647#[target_feature(enable = "avx512bw")]
5648#[cfg_attr(test, assert_instr(vmovdqu8))]
5649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5650pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
5651    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
5652}
5653
5654/// Load packed 16-bit integers from memory into dst using writemask k
5655/// (elements are copied from src when the corresponding mask bit is not set).
5656/// mem_addr does not need to be aligned on any particular boundary.
5657///
5658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
5659#[inline]
5660#[target_feature(enable = "avx512bw,avx512vl")]
5661#[cfg_attr(test, assert_instr(vmovdqu16))]
5662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5663pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
5664    transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k))
5665}
5666
5667/// Load packed 16-bit integers from memory into dst using zeromask k
5668/// (elements are zeroed out when the corresponding mask bit is not set).
5669/// mem_addr does not need to be aligned on any particular boundary.
5670///
5671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
5672#[inline]
5673#[target_feature(enable = "avx512bw,avx512vl")]
5674#[cfg_attr(test, assert_instr(vmovdqu16))]
5675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5676pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
5677    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
5678}
5679
5680/// Load packed 8-bit integers from memory into dst using writemask k
5681/// (elements are copied from src when the corresponding mask bit is not set).
5682/// mem_addr does not need to be aligned on any particular boundary.
5683///
5684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
5685#[inline]
5686#[target_feature(enable = "avx512bw,avx512vl")]
5687#[cfg_attr(test, assert_instr(vmovdqu8))]
5688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5689pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
5690    transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k))
5691}
5692
5693/// Load packed 8-bit integers from memory into dst using zeromask k
5694/// (elements are zeroed out when the corresponding mask bit is not set).
5695/// mem_addr does not need to be aligned on any particular boundary.
5696///
5697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
5698#[inline]
5699#[target_feature(enable = "avx512bw,avx512vl")]
5700#[cfg_attr(test, assert_instr(vmovdqu8))]
5701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5702pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
5703    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
5704}
5705
5706/// Load packed 16-bit integers from memory into dst using writemask k
5707/// (elements are copied from src when the corresponding mask bit is not set).
5708/// mem_addr does not need to be aligned on any particular boundary.
5709///
5710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
5711#[inline]
5712#[target_feature(enable = "avx512bw,avx512vl")]
5713#[cfg_attr(test, assert_instr(vmovdqu16))]
5714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5715pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
5716    transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k))
5717}
5718
5719/// Load packed 16-bit integers from memory into dst using zeromask k
5720/// (elements are zeroed out when the corresponding mask bit is not set).
5721/// mem_addr does not need to be aligned on any particular boundary.
5722///
5723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
5724#[inline]
5725#[target_feature(enable = "avx512bw,avx512vl")]
5726#[cfg_attr(test, assert_instr(vmovdqu16))]
5727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5728pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
5729    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
5730}
5731
5732/// Load packed 8-bit integers from memory into dst using writemask k
5733/// (elements are copied from src when the corresponding mask bit is not set).
5734/// mem_addr does not need to be aligned on any particular boundary.
5735///
5736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
5737#[inline]
5738#[target_feature(enable = "avx512bw,avx512vl")]
5739#[cfg_attr(test, assert_instr(vmovdqu8))]
5740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5741pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
5742    transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k))
5743}
5744
5745/// Load packed 8-bit integers from memory into dst using zeromask k
5746/// (elements are zeroed out when the corresponding mask bit is not set).
5747/// mem_addr does not need to be aligned on any particular boundary.
5748///
5749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
5750#[inline]
5751#[target_feature(enable = "avx512bw,avx512vl")]
5752#[cfg_attr(test, assert_instr(vmovdqu8))]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
5755    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
5756}
5757
5758/// Store packed 16-bit integers from a into memory using writemask k.
5759/// mem_addr does not need to be aligned on any particular boundary.
5760///
5761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
5762#[inline]
5763#[target_feature(enable = "avx512bw")]
5764#[cfg_attr(test, assert_instr(vmovdqu16))]
5765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5766pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
5767    storedqu16_512(mem_addr, a.as_i16x32(), mask)
5768}
5769
5770/// Store packed 8-bit integers from a into memory using writemask k.
5771/// mem_addr does not need to be aligned on any particular boundary.
5772///
5773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
5774#[inline]
5775#[target_feature(enable = "avx512bw")]
5776#[cfg_attr(test, assert_instr(vmovdqu8))]
5777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5778pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
5779    storedqu8_512(mem_addr, a.as_i8x64(), mask)
5780}
5781
5782/// Store packed 16-bit integers from a into memory using writemask k.
5783/// mem_addr does not need to be aligned on any particular boundary.
5784///
5785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
5786#[inline]
5787#[target_feature(enable = "avx512bw,avx512vl")]
5788#[cfg_attr(test, assert_instr(vmovdqu16))]
5789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5790pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
5791    storedqu16_256(mem_addr, a.as_i16x16(), mask)
5792}
5793
5794/// Store packed 8-bit integers from a into memory using writemask k.
5795/// mem_addr does not need to be aligned on any particular boundary.
5796///
5797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
5798#[inline]
5799#[target_feature(enable = "avx512bw,avx512vl")]
5800#[cfg_attr(test, assert_instr(vmovdqu8))]
5801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5802pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
5803    storedqu8_256(mem_addr, a.as_i8x32(), mask)
5804}
5805
5806/// Store packed 16-bit integers from a into memory using writemask k.
5807/// mem_addr does not need to be aligned on any particular boundary.
5808///
5809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
5810#[inline]
5811#[target_feature(enable = "avx512bw,avx512vl")]
5812#[cfg_attr(test, assert_instr(vmovdqu16))]
5813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5814pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
5815    storedqu16_128(mem_addr, a.as_i16x8(), mask)
5816}
5817
5818/// Store packed 8-bit integers from a into memory using writemask k.
5819/// mem_addr does not need to be aligned on any particular boundary.
5820///
5821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
5822#[inline]
5823#[target_feature(enable = "avx512bw,avx512vl")]
5824#[cfg_attr(test, assert_instr(vmovdqu8))]
5825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5826pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
5827    storedqu8_128(mem_addr, a.as_i8x16(), mask)
5828}
5829
5830/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
5831///
5832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
5833#[inline]
5834#[target_feature(enable = "avx512bw")]
5835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5836#[cfg_attr(test, assert_instr(vpmaddwd))]
5837pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5838    unsafe {
5839        let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32()));
5840        let even: i32x16 = simd_shuffle!(
5841            r,
5842            r,
5843            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
5844        );
5845        let odd: i32x16 = simd_shuffle!(
5846            r,
5847            r,
5848            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
5849        );
5850        simd_add(even, odd).as_m512i()
5851    }
5852}
5853
5854/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5855///
5856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
5857#[inline]
5858#[target_feature(enable = "avx512bw")]
5859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5860#[cfg_attr(test, assert_instr(vpmaddwd))]
5861pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5862    unsafe {
5863        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5864        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
5865    }
5866}
5867
5868/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5869///
5870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
5871#[inline]
5872#[target_feature(enable = "avx512bw")]
5873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5874#[cfg_attr(test, assert_instr(vpmaddwd))]
5875pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5876    unsafe {
5877        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5878        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
5879    }
5880}
5881
5882/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5883///
5884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
5885#[inline]
5886#[target_feature(enable = "avx512bw,avx512vl")]
5887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5888#[cfg_attr(test, assert_instr(vpmaddwd))]
5889pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5890    unsafe {
5891        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5892        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
5893    }
5894}
5895
5896/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5897///
5898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
5899#[inline]
5900#[target_feature(enable = "avx512bw,avx512vl")]
5901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5902#[cfg_attr(test, assert_instr(vpmaddwd))]
5903pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5904    unsafe {
5905        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5906        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
5907    }
5908}
5909
5910/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5911///
5912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
5913#[inline]
5914#[target_feature(enable = "avx512bw,avx512vl")]
5915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5916#[cfg_attr(test, assert_instr(vpmaddwd))]
5917pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5918    unsafe {
5919        let madd = _mm_madd_epi16(a, b).as_i32x4();
5920        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
5921    }
5922}
5923
5924/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5925///
5926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
5927#[inline]
5928#[target_feature(enable = "avx512bw,avx512vl")]
5929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5930#[cfg_attr(test, assert_instr(vpmaddwd))]
5931pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5932    unsafe {
5933        let madd = _mm_madd_epi16(a, b).as_i32x4();
5934        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
5935    }
5936}
5937
5938/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
5939///
5940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
5941#[inline]
5942#[target_feature(enable = "avx512bw")]
5943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5944#[cfg_attr(test, assert_instr(vpmaddubsw))]
5945pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
5946    unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
5947}
5948
5949/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5950///
5951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
5952#[inline]
5953#[target_feature(enable = "avx512bw")]
5954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5955#[cfg_attr(test, assert_instr(vpmaddubsw))]
5956pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5957    unsafe {
5958        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5959        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
5960    }
5961}
5962
5963/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5964///
5965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
5966#[inline]
5967#[target_feature(enable = "avx512bw")]
5968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5969#[cfg_attr(test, assert_instr(vpmaddubsw))]
5970pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5971    unsafe {
5972        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5973        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
5974    }
5975}
5976
5977/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
5980#[inline]
5981#[target_feature(enable = "avx512bw,avx512vl")]
5982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5983#[cfg_attr(test, assert_instr(vpmaddubsw))]
5984pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5985    unsafe {
5986        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5987        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
5988    }
5989}
5990
5991/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5992///
5993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
5994#[inline]
5995#[target_feature(enable = "avx512bw,avx512vl")]
5996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5997#[cfg_attr(test, assert_instr(vpmaddubsw))]
5998pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5999    unsafe {
6000        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6001        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6002    }
6003}
6004
6005/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6006///
6007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6008#[inline]
6009#[target_feature(enable = "avx512bw,avx512vl")]
6010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6011#[cfg_attr(test, assert_instr(vpmaddubsw))]
6012pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6013    unsafe {
6014        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6015        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6016    }
6017}
6018
6019/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6020///
6021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6022#[inline]
6023#[target_feature(enable = "avx512bw,avx512vl")]
6024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6025#[cfg_attr(test, assert_instr(vpmaddubsw))]
6026pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6027    unsafe {
6028        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6029        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6030    }
6031}
6032
6033/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6036#[inline]
6037#[target_feature(enable = "avx512bw")]
6038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6039#[cfg_attr(test, assert_instr(vpackssdw))]
6040pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6041    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6042}
6043
6044/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6047#[inline]
6048#[target_feature(enable = "avx512bw")]
6049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6050#[cfg_attr(test, assert_instr(vpackssdw))]
6051pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6052    unsafe {
6053        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6054        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6055    }
6056}
6057
6058/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6059///
6060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6061#[inline]
6062#[target_feature(enable = "avx512bw")]
6063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6064#[cfg_attr(test, assert_instr(vpackssdw))]
6065pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6066    unsafe {
6067        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6068        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6069    }
6070}
6071
6072/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6073///
6074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6075#[inline]
6076#[target_feature(enable = "avx512bw,avx512vl")]
6077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6078#[cfg_attr(test, assert_instr(vpackssdw))]
6079pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6080    unsafe {
6081        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6082        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6083    }
6084}
6085
6086/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6087///
6088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6089#[inline]
6090#[target_feature(enable = "avx512bw,avx512vl")]
6091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6092#[cfg_attr(test, assert_instr(vpackssdw))]
6093pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6094    unsafe {
6095        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6096        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6097    }
6098}
6099
6100/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6101///
6102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6103#[inline]
6104#[target_feature(enable = "avx512bw,avx512vl")]
6105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6106#[cfg_attr(test, assert_instr(vpackssdw))]
6107pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6108    unsafe {
6109        let pack = _mm_packs_epi32(a, b).as_i16x8();
6110        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6111    }
6112}
6113
6114/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6117#[inline]
6118#[target_feature(enable = "avx512bw,avx512vl")]
6119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6120#[cfg_attr(test, assert_instr(vpackssdw))]
6121pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6122    unsafe {
6123        let pack = _mm_packs_epi32(a, b).as_i16x8();
6124        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6125    }
6126}
6127
6128/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6129///
6130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6131#[inline]
6132#[target_feature(enable = "avx512bw")]
6133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6134#[cfg_attr(test, assert_instr(vpacksswb))]
6135pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6136    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6137}
6138
6139/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6140///
6141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6142#[inline]
6143#[target_feature(enable = "avx512bw")]
6144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6145#[cfg_attr(test, assert_instr(vpacksswb))]
6146pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6147    unsafe {
6148        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6149        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6150    }
6151}
6152
6153/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6154///
6155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6156#[inline]
6157#[target_feature(enable = "avx512bw")]
6158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6159#[cfg_attr(test, assert_instr(vpacksswb))]
6160pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6161    unsafe {
6162        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6163        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6164    }
6165}
6166
6167/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6168///
6169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6170#[inline]
6171#[target_feature(enable = "avx512bw,avx512vl")]
6172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6173#[cfg_attr(test, assert_instr(vpacksswb))]
6174pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6175    unsafe {
6176        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6177        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6178    }
6179}
6180
6181/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6184#[inline]
6185#[target_feature(enable = "avx512bw,avx512vl")]
6186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6187#[cfg_attr(test, assert_instr(vpacksswb))]
6188pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6189    unsafe {
6190        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6191        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6192    }
6193}
6194
6195/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6196///
6197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6198#[inline]
6199#[target_feature(enable = "avx512bw,avx512vl")]
6200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6201#[cfg_attr(test, assert_instr(vpacksswb))]
6202pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6203    unsafe {
6204        let pack = _mm_packs_epi16(a, b).as_i8x16();
6205        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6206    }
6207}
6208
6209/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6210///
6211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6212#[inline]
6213#[target_feature(enable = "avx512bw,avx512vl")]
6214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6215#[cfg_attr(test, assert_instr(vpacksswb))]
6216pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6217    unsafe {
6218        let pack = _mm_packs_epi16(a, b).as_i8x16();
6219        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6220    }
6221}
6222
6223/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6224///
6225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6226#[inline]
6227#[target_feature(enable = "avx512bw")]
6228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6229#[cfg_attr(test, assert_instr(vpackusdw))]
6230pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6231    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6232}
6233
6234/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6235///
6236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6237#[inline]
6238#[target_feature(enable = "avx512bw")]
6239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6240#[cfg_attr(test, assert_instr(vpackusdw))]
6241pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6242    unsafe {
6243        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6244        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6245    }
6246}
6247
6248/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6249///
6250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6251#[inline]
6252#[target_feature(enable = "avx512bw")]
6253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6254#[cfg_attr(test, assert_instr(vpackusdw))]
6255pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6256    unsafe {
6257        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6258        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6259    }
6260}
6261
6262/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6263///
6264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6265#[inline]
6266#[target_feature(enable = "avx512bw,avx512vl")]
6267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6268#[cfg_attr(test, assert_instr(vpackusdw))]
6269pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6270    unsafe {
6271        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6272        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6273    }
6274}
6275
6276/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6277///
6278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6279#[inline]
6280#[target_feature(enable = "avx512bw,avx512vl")]
6281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6282#[cfg_attr(test, assert_instr(vpackusdw))]
6283pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6284    unsafe {
6285        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6286        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6287    }
6288}
6289
6290/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6291///
6292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6293#[inline]
6294#[target_feature(enable = "avx512bw,avx512vl")]
6295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6296#[cfg_attr(test, assert_instr(vpackusdw))]
6297pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6298    unsafe {
6299        let pack = _mm_packus_epi32(a, b).as_i16x8();
6300        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6301    }
6302}
6303
6304/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6305///
6306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6307#[inline]
6308#[target_feature(enable = "avx512bw,avx512vl")]
6309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6310#[cfg_attr(test, assert_instr(vpackusdw))]
6311pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6312    unsafe {
6313        let pack = _mm_packus_epi32(a, b).as_i16x8();
6314        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6315    }
6316}
6317
6318/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6321#[inline]
6322#[target_feature(enable = "avx512bw")]
6323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6324#[cfg_attr(test, assert_instr(vpackuswb))]
6325pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6326    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6327}
6328
6329/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6330///
6331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6332#[inline]
6333#[target_feature(enable = "avx512bw")]
6334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6335#[cfg_attr(test, assert_instr(vpackuswb))]
6336pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6337    unsafe {
6338        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6339        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6340    }
6341}
6342
6343/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6344///
6345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6346#[inline]
6347#[target_feature(enable = "avx512bw")]
6348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6349#[cfg_attr(test, assert_instr(vpackuswb))]
6350pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6351    unsafe {
6352        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6353        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6354    }
6355}
6356
6357/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6358///
6359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6360#[inline]
6361#[target_feature(enable = "avx512bw,avx512vl")]
6362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6363#[cfg_attr(test, assert_instr(vpackuswb))]
6364pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6365    unsafe {
6366        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6367        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6368    }
6369}
6370
6371/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6372///
6373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6374#[inline]
6375#[target_feature(enable = "avx512bw,avx512vl")]
6376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6377#[cfg_attr(test, assert_instr(vpackuswb))]
6378pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6379    unsafe {
6380        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6381        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6382    }
6383}
6384
6385/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6388#[inline]
6389#[target_feature(enable = "avx512bw,avx512vl")]
6390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6391#[cfg_attr(test, assert_instr(vpackuswb))]
6392pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6393    unsafe {
6394        let pack = _mm_packus_epi16(a, b).as_i8x16();
6395        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6396    }
6397}
6398
6399/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6400///
6401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6402#[inline]
6403#[target_feature(enable = "avx512bw,avx512vl")]
6404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6405#[cfg_attr(test, assert_instr(vpackuswb))]
6406pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6407    unsafe {
6408        let pack = _mm_packus_epi16(a, b).as_i8x16();
6409        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6410    }
6411}
6412
6413/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6414///
6415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6416#[inline]
6417#[target_feature(enable = "avx512bw")]
6418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6419#[cfg_attr(test, assert_instr(vpavgw))]
6420pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6421    unsafe {
6422        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6423        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6424        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6425        transmute(simd_cast::<_, u16x32>(r))
6426    }
6427}
6428
6429/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6430///
6431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6432#[inline]
6433#[target_feature(enable = "avx512bw")]
6434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6435#[cfg_attr(test, assert_instr(vpavgw))]
6436pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6437    unsafe {
6438        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6439        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6440    }
6441}
6442
6443/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6444///
6445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6446#[inline]
6447#[target_feature(enable = "avx512bw")]
6448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6449#[cfg_attr(test, assert_instr(vpavgw))]
6450pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6451    unsafe {
6452        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6453        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6454    }
6455}
6456
6457/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6458///
6459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6460#[inline]
6461#[target_feature(enable = "avx512bw,avx512vl")]
6462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6463#[cfg_attr(test, assert_instr(vpavgw))]
6464pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6465    unsafe {
6466        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6467        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6468    }
6469}
6470
6471/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6472///
6473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6474#[inline]
6475#[target_feature(enable = "avx512bw,avx512vl")]
6476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6477#[cfg_attr(test, assert_instr(vpavgw))]
6478pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6479    unsafe {
6480        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6481        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6482    }
6483}
6484
6485/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6486///
6487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6488#[inline]
6489#[target_feature(enable = "avx512bw,avx512vl")]
6490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6491#[cfg_attr(test, assert_instr(vpavgw))]
6492pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6493    unsafe {
6494        let avg = _mm_avg_epu16(a, b).as_u16x8();
6495        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6496    }
6497}
6498
6499/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6500///
6501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6502#[inline]
6503#[target_feature(enable = "avx512bw,avx512vl")]
6504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6505#[cfg_attr(test, assert_instr(vpavgw))]
6506pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6507    unsafe {
6508        let avg = _mm_avg_epu16(a, b).as_u16x8();
6509        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
6510    }
6511}
6512
6513/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
6514///
6515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
6516#[inline]
6517#[target_feature(enable = "avx512bw")]
6518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6519#[cfg_attr(test, assert_instr(vpavgb))]
6520pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
6521    unsafe {
6522        let a = simd_cast::<_, u16x64>(a.as_u8x64());
6523        let b = simd_cast::<_, u16x64>(b.as_u8x64());
6524        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
6525        transmute(simd_cast::<_, u8x64>(r))
6526    }
6527}
6528
6529/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6530///
6531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
6532#[inline]
6533#[target_feature(enable = "avx512bw")]
6534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6535#[cfg_attr(test, assert_instr(vpavgb))]
6536pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6537    unsafe {
6538        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6539        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
6540    }
6541}
6542
6543/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
6546#[inline]
6547#[target_feature(enable = "avx512bw")]
6548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6549#[cfg_attr(test, assert_instr(vpavgb))]
6550pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6551    unsafe {
6552        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6553        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
6554    }
6555}
6556
6557/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6558///
6559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
6560#[inline]
6561#[target_feature(enable = "avx512bw,avx512vl")]
6562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6563#[cfg_attr(test, assert_instr(vpavgb))]
6564pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6565    unsafe {
6566        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6567        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
6568    }
6569}
6570
6571/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6572///
6573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
6574#[inline]
6575#[target_feature(enable = "avx512bw,avx512vl")]
6576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6577#[cfg_attr(test, assert_instr(vpavgb))]
6578pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6579    unsafe {
6580        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6581        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
6582    }
6583}
6584
6585/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6586///
6587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
6588#[inline]
6589#[target_feature(enable = "avx512bw,avx512vl")]
6590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6591#[cfg_attr(test, assert_instr(vpavgb))]
6592pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6593    unsafe {
6594        let avg = _mm_avg_epu8(a, b).as_u8x16();
6595        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
6596    }
6597}
6598
6599/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6600///
6601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
6602#[inline]
6603#[target_feature(enable = "avx512bw,avx512vl")]
6604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6605#[cfg_attr(test, assert_instr(vpavgb))]
6606pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6607    unsafe {
6608        let avg = _mm_avg_epu8(a, b).as_u8x16();
6609        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
6610    }
6611}
6612
6613/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
6614///
6615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
6616#[inline]
6617#[target_feature(enable = "avx512bw")]
6618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6619#[cfg_attr(test, assert_instr(vpsllw))]
6620pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
6621    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
6622}
6623
6624/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6625///
6626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
6627#[inline]
6628#[target_feature(enable = "avx512bw")]
6629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6630#[cfg_attr(test, assert_instr(vpsllw))]
6631pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6632    unsafe {
6633        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6634        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6635    }
6636}
6637
6638/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6639///
6640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
6641#[inline]
6642#[target_feature(enable = "avx512bw")]
6643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6644#[cfg_attr(test, assert_instr(vpsllw))]
6645pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6646    unsafe {
6647        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6648        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6649    }
6650}
6651
6652/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6653///
6654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
6655#[inline]
6656#[target_feature(enable = "avx512bw,avx512vl")]
6657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6658#[cfg_attr(test, assert_instr(vpsllw))]
6659pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6660    unsafe {
6661        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6662        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6663    }
6664}
6665
6666/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6667///
6668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
6669#[inline]
6670#[target_feature(enable = "avx512bw,avx512vl")]
6671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6672#[cfg_attr(test, assert_instr(vpsllw))]
6673pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6674    unsafe {
6675        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6676        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6677    }
6678}
6679
6680/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6681///
6682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
6683#[inline]
6684#[target_feature(enable = "avx512bw,avx512vl")]
6685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6686#[cfg_attr(test, assert_instr(vpsllw))]
6687pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6688    unsafe {
6689        let shf = _mm_sll_epi16(a, count).as_i16x8();
6690        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6691    }
6692}
6693
6694/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6695///
6696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
6697#[inline]
6698#[target_feature(enable = "avx512bw,avx512vl")]
6699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6700#[cfg_attr(test, assert_instr(vpsllw))]
6701pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6702    unsafe {
6703        let shf = _mm_sll_epi16(a, count).as_i16x8();
6704        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6705    }
6706}
6707
6708/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6709///
6710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
6711#[inline]
6712#[target_feature(enable = "avx512bw")]
6713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6714#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6715#[rustc_legacy_const_generics(1)]
6716pub fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
6717    unsafe {
6718        static_assert_uimm_bits!(IMM8, 8);
6719        if IMM8 >= 16 {
6720            _mm512_setzero_si512()
6721        } else {
6722            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
6723        }
6724    }
6725}
6726
6727/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6728///
6729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
6730#[inline]
6731#[target_feature(enable = "avx512bw")]
6732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6733#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6734#[rustc_legacy_const_generics(3)]
6735pub fn _mm512_mask_slli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
6736    unsafe {
6737        static_assert_uimm_bits!(IMM8, 8);
6738        let shf = if IMM8 >= 16 {
6739            u16x32::ZERO
6740        } else {
6741            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
6742        };
6743        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
6744    }
6745}
6746
6747/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6748///
6749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
6750#[inline]
6751#[target_feature(enable = "avx512bw")]
6752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6753#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6754#[rustc_legacy_const_generics(2)]
6755pub fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
6756    unsafe {
6757        static_assert_uimm_bits!(IMM8, 8);
6758        if IMM8 >= 16 {
6759            _mm512_setzero_si512()
6760        } else {
6761            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
6762            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
6763        }
6764    }
6765}
6766
6767/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6768///
6769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
6770#[inline]
6771#[target_feature(enable = "avx512bw,avx512vl")]
6772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6773#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6774#[rustc_legacy_const_generics(3)]
6775pub fn _mm256_mask_slli_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
6776    unsafe {
6777        static_assert_uimm_bits!(IMM8, 8);
6778        let shf = if IMM8 >= 16 {
6779            u16x16::ZERO
6780        } else {
6781            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
6782        };
6783        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
6784    }
6785}
6786
6787/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
6790#[inline]
6791#[target_feature(enable = "avx512bw,avx512vl")]
6792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6793#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6794#[rustc_legacy_const_generics(2)]
6795pub fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
6796    unsafe {
6797        static_assert_uimm_bits!(IMM8, 8);
6798        if IMM8 >= 16 {
6799            _mm256_setzero_si256()
6800        } else {
6801            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
6802            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
6803        }
6804    }
6805}
6806
6807/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6808///
6809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
6810#[inline]
6811#[target_feature(enable = "avx512bw,avx512vl")]
6812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6813#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6814#[rustc_legacy_const_generics(3)]
6815pub fn _mm_mask_slli_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6816    unsafe {
6817        static_assert_uimm_bits!(IMM8, 8);
6818        let shf = if IMM8 >= 16 {
6819            u16x8::ZERO
6820        } else {
6821            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
6822        };
6823        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
6824    }
6825}
6826
6827/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6828///
6829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
6830#[inline]
6831#[target_feature(enable = "avx512bw,avx512vl")]
6832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6833#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6834#[rustc_legacy_const_generics(2)]
6835pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
6836    unsafe {
6837        static_assert_uimm_bits!(IMM8, 8);
6838        if IMM8 >= 16 {
6839            _mm_setzero_si128()
6840        } else {
6841            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
6842            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
6843        }
6844    }
6845}
6846
6847/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6848///
6849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
6850#[inline]
6851#[target_feature(enable = "avx512bw")]
6852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6853#[cfg_attr(test, assert_instr(vpsllvw))]
6854pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
6855    unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
6856}
6857
6858/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6859///
6860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
6861#[inline]
6862#[target_feature(enable = "avx512bw")]
6863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6864#[cfg_attr(test, assert_instr(vpsllvw))]
6865pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6866    unsafe {
6867        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6868        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6869    }
6870}
6871
6872/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6873///
6874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
6875#[inline]
6876#[target_feature(enable = "avx512bw")]
6877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6878#[cfg_attr(test, assert_instr(vpsllvw))]
6879pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6880    unsafe {
6881        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6882        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6883    }
6884}
6885
6886/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6887///
6888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
6889#[inline]
6890#[target_feature(enable = "avx512bw,avx512vl")]
6891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6892#[cfg_attr(test, assert_instr(vpsllvw))]
6893pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
6894    unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
6895}
6896
6897/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6898///
6899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
6900#[inline]
6901#[target_feature(enable = "avx512bw,avx512vl")]
6902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6903#[cfg_attr(test, assert_instr(vpsllvw))]
6904pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6905    unsafe {
6906        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6907        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6908    }
6909}
6910
6911/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6912///
6913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
6914#[inline]
6915#[target_feature(enable = "avx512bw,avx512vl")]
6916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6917#[cfg_attr(test, assert_instr(vpsllvw))]
6918pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6919    unsafe {
6920        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6921        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6922    }
6923}
6924
6925/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6926///
6927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
6928#[inline]
6929#[target_feature(enable = "avx512bw,avx512vl")]
6930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6931#[cfg_attr(test, assert_instr(vpsllvw))]
6932pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
6933    unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
6934}
6935
6936/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6937///
6938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
6939#[inline]
6940#[target_feature(enable = "avx512bw,avx512vl")]
6941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6942#[cfg_attr(test, assert_instr(vpsllvw))]
6943pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6944    unsafe {
6945        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6946        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6947    }
6948}
6949
6950/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6951///
6952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
6953#[inline]
6954#[target_feature(enable = "avx512bw,avx512vl")]
6955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6956#[cfg_attr(test, assert_instr(vpsllvw))]
6957pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6958    unsafe {
6959        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6960        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6961    }
6962}
6963
6964/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
6965///
6966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
6967#[inline]
6968#[target_feature(enable = "avx512bw")]
6969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6970#[cfg_attr(test, assert_instr(vpsrlw))]
6971pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
6972    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
6973}
6974
6975/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6976///
6977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
6978#[inline]
6979#[target_feature(enable = "avx512bw")]
6980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6981#[cfg_attr(test, assert_instr(vpsrlw))]
6982pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6983    unsafe {
6984        let shf = _mm512_srl_epi16(a, count).as_i16x32();
6985        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6986    }
6987}
6988
6989/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6990///
6991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
6992#[inline]
6993#[target_feature(enable = "avx512bw")]
6994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6995#[cfg_attr(test, assert_instr(vpsrlw))]
6996pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6997    unsafe {
6998        let shf = _mm512_srl_epi16(a, count).as_i16x32();
6999        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7000    }
7001}
7002
7003/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7004///
7005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7006#[inline]
7007#[target_feature(enable = "avx512bw,avx512vl")]
7008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7009#[cfg_attr(test, assert_instr(vpsrlw))]
7010pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7011    unsafe {
7012        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7013        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7014    }
7015}
7016
7017/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7018///
7019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7020#[inline]
7021#[target_feature(enable = "avx512bw,avx512vl")]
7022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7023#[cfg_attr(test, assert_instr(vpsrlw))]
7024pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7025    unsafe {
7026        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7027        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7028    }
7029}
7030
7031/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7032///
7033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7034#[inline]
7035#[target_feature(enable = "avx512bw,avx512vl")]
7036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7037#[cfg_attr(test, assert_instr(vpsrlw))]
7038pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7039    unsafe {
7040        let shf = _mm_srl_epi16(a, count).as_i16x8();
7041        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7042    }
7043}
7044
7045/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7048#[inline]
7049#[target_feature(enable = "avx512bw,avx512vl")]
7050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7051#[cfg_attr(test, assert_instr(vpsrlw))]
7052pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7053    unsafe {
7054        let shf = _mm_srl_epi16(a, count).as_i16x8();
7055        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7056    }
7057}
7058
7059/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7060///
7061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7062#[inline]
7063#[target_feature(enable = "avx512bw")]
7064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7065#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7066#[rustc_legacy_const_generics(1)]
7067pub fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7068    unsafe {
7069        static_assert_uimm_bits!(IMM8, 8);
7070        if IMM8 >= 16 {
7071            _mm512_setzero_si512()
7072        } else {
7073            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7074        }
7075    }
7076}
7077
7078/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7079///
7080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7081#[inline]
7082#[target_feature(enable = "avx512bw")]
7083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7084#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7085#[rustc_legacy_const_generics(3)]
7086pub fn _mm512_mask_srli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7087    unsafe {
7088        static_assert_uimm_bits!(IMM8, 8);
7089        let shf = if IMM8 >= 16 {
7090            u16x32::ZERO
7091        } else {
7092            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7093        };
7094        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7095    }
7096}
7097
7098/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7099///
7100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7101#[inline]
7102#[target_feature(enable = "avx512bw")]
7103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7104#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7105#[rustc_legacy_const_generics(2)]
7106pub fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7107    unsafe {
7108        static_assert_uimm_bits!(IMM8, 8);
7109        //imm8 should be u32, it seems the document to verify is incorrect
7110        if IMM8 >= 16 {
7111            _mm512_setzero_si512()
7112        } else {
7113            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7114            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7115        }
7116    }
7117}
7118
7119/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7120///
7121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7122#[inline]
7123#[target_feature(enable = "avx512bw,avx512vl")]
7124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7125#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7126#[rustc_legacy_const_generics(3)]
7127pub fn _mm256_mask_srli_epi16<const IMM8: i32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7128    unsafe {
7129        static_assert_uimm_bits!(IMM8, 8);
7130        let shf = _mm256_srli_epi16::<IMM8>(a);
7131        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7132    }
7133}
7134
7135/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7136///
7137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7138#[inline]
7139#[target_feature(enable = "avx512bw,avx512vl")]
7140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7141#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7142#[rustc_legacy_const_generics(2)]
7143pub fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7144    unsafe {
7145        static_assert_uimm_bits!(IMM8, 8);
7146        let shf = _mm256_srli_epi16::<IMM8>(a);
7147        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7148    }
7149}
7150
7151/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7152///
7153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7154#[inline]
7155#[target_feature(enable = "avx512bw,avx512vl")]
7156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7157#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7158#[rustc_legacy_const_generics(3)]
7159pub fn _mm_mask_srli_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7160    unsafe {
7161        static_assert_uimm_bits!(IMM8, 8);
7162        let shf = _mm_srli_epi16::<IMM8>(a);
7163        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7164    }
7165}
7166
7167/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7168///
7169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7170#[inline]
7171#[target_feature(enable = "avx512bw,avx512vl")]
7172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7173#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7174#[rustc_legacy_const_generics(2)]
7175pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7176    unsafe {
7177        static_assert_uimm_bits!(IMM8, 8);
7178        let shf = _mm_srli_epi16::<IMM8>(a);
7179        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7180    }
7181}
7182
7183/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7184///
7185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7186#[inline]
7187#[target_feature(enable = "avx512bw")]
7188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7189#[cfg_attr(test, assert_instr(vpsrlvw))]
7190pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7191    unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
7192}
7193
7194/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7195///
7196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7197#[inline]
7198#[target_feature(enable = "avx512bw")]
7199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7200#[cfg_attr(test, assert_instr(vpsrlvw))]
7201pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7202    unsafe {
7203        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7204        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7205    }
7206}
7207
7208/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7209///
7210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7211#[inline]
7212#[target_feature(enable = "avx512bw")]
7213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7214#[cfg_attr(test, assert_instr(vpsrlvw))]
7215pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7216    unsafe {
7217        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7218        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7219    }
7220}
7221
7222/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7223///
7224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7225#[inline]
7226#[target_feature(enable = "avx512bw,avx512vl")]
7227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7228#[cfg_attr(test, assert_instr(vpsrlvw))]
7229pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7230    unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
7231}
7232
7233/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7234///
7235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7236#[inline]
7237#[target_feature(enable = "avx512bw,avx512vl")]
7238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7239#[cfg_attr(test, assert_instr(vpsrlvw))]
7240pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7241    unsafe {
7242        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7243        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7244    }
7245}
7246
7247/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7248///
7249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7250#[inline]
7251#[target_feature(enable = "avx512bw,avx512vl")]
7252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7253#[cfg_attr(test, assert_instr(vpsrlvw))]
7254pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7255    unsafe {
7256        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7257        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7258    }
7259}
7260
7261/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7262///
7263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7264#[inline]
7265#[target_feature(enable = "avx512bw,avx512vl")]
7266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7267#[cfg_attr(test, assert_instr(vpsrlvw))]
7268pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7269    unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
7270}
7271
7272/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7273///
7274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7275#[inline]
7276#[target_feature(enable = "avx512bw,avx512vl")]
7277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7278#[cfg_attr(test, assert_instr(vpsrlvw))]
7279pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7280    unsafe {
7281        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7282        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7283    }
7284}
7285
7286/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7287///
7288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7289#[inline]
7290#[target_feature(enable = "avx512bw,avx512vl")]
7291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7292#[cfg_attr(test, assert_instr(vpsrlvw))]
7293pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7294    unsafe {
7295        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7296        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7297    }
7298}
7299
7300/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7301///
7302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7303#[inline]
7304#[target_feature(enable = "avx512bw")]
7305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7306#[cfg_attr(test, assert_instr(vpsraw))]
7307pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7308    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7309}
7310
7311/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7312///
7313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7314#[inline]
7315#[target_feature(enable = "avx512bw")]
7316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7317#[cfg_attr(test, assert_instr(vpsraw))]
7318pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7319    unsafe {
7320        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7321        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7322    }
7323}
7324
7325/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7326///
7327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7328#[inline]
7329#[target_feature(enable = "avx512bw")]
7330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7331#[cfg_attr(test, assert_instr(vpsraw))]
7332pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7333    unsafe {
7334        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7335        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7336    }
7337}
7338
7339/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7340///
7341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7342#[inline]
7343#[target_feature(enable = "avx512bw,avx512vl")]
7344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7345#[cfg_attr(test, assert_instr(vpsraw))]
7346pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7347    unsafe {
7348        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7349        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7350    }
7351}
7352
7353/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7354///
7355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7356#[inline]
7357#[target_feature(enable = "avx512bw,avx512vl")]
7358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7359#[cfg_attr(test, assert_instr(vpsraw))]
7360pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7361    unsafe {
7362        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7363        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7364    }
7365}
7366
7367/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7368///
7369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7370#[inline]
7371#[target_feature(enable = "avx512bw,avx512vl")]
7372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7373#[cfg_attr(test, assert_instr(vpsraw))]
7374pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7375    unsafe {
7376        let shf = _mm_sra_epi16(a, count).as_i16x8();
7377        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7378    }
7379}
7380
7381/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7382///
7383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7384#[inline]
7385#[target_feature(enable = "avx512bw,avx512vl")]
7386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7387#[cfg_attr(test, assert_instr(vpsraw))]
7388pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7389    unsafe {
7390        let shf = _mm_sra_epi16(a, count).as_i16x8();
7391        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7392    }
7393}
7394
7395/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7396///
7397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
7398#[inline]
7399#[target_feature(enable = "avx512bw")]
7400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7401#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7402#[rustc_legacy_const_generics(1)]
7403pub fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7404    unsafe {
7405        static_assert_uimm_bits!(IMM8, 8);
7406        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
7407    }
7408}
7409
7410/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7411///
7412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
7413#[inline]
7414#[target_feature(enable = "avx512bw")]
7415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7416#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7417#[rustc_legacy_const_generics(3)]
7418pub fn _mm512_mask_srai_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7419    unsafe {
7420        static_assert_uimm_bits!(IMM8, 8);
7421        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7422        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7423    }
7424}
7425
7426/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7427///
7428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
7429#[inline]
7430#[target_feature(enable = "avx512bw")]
7431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7432#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7433#[rustc_legacy_const_generics(2)]
7434pub fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7435    unsafe {
7436        static_assert_uimm_bits!(IMM8, 8);
7437        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7438        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7439    }
7440}
7441
7442/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7443///
7444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
7445#[inline]
7446#[target_feature(enable = "avx512bw,avx512vl")]
7447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7448#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7449#[rustc_legacy_const_generics(3)]
7450pub fn _mm256_mask_srai_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7451    unsafe {
7452        static_assert_uimm_bits!(IMM8, 8);
7453        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7454        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7455    }
7456}
7457
7458/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7459///
7460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
7461#[inline]
7462#[target_feature(enable = "avx512bw,avx512vl")]
7463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7464#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7465#[rustc_legacy_const_generics(2)]
7466pub fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7467    unsafe {
7468        static_assert_uimm_bits!(IMM8, 8);
7469        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7470        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
7471    }
7472}
7473
7474/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7475///
7476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
7477#[inline]
7478#[target_feature(enable = "avx512bw,avx512vl")]
7479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7480#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7481#[rustc_legacy_const_generics(3)]
7482pub fn _mm_mask_srai_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7483    unsafe {
7484        static_assert_uimm_bits!(IMM8, 8);
7485        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7486        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7487    }
7488}
7489
7490/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7491///
7492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
7493#[inline]
7494#[target_feature(enable = "avx512bw,avx512vl")]
7495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7496#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7497#[rustc_legacy_const_generics(2)]
7498pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7499    unsafe {
7500        static_assert_uimm_bits!(IMM8, 8);
7501        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7502        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
7503    }
7504}
7505
7506/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7507///
7508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
7509#[inline]
7510#[target_feature(enable = "avx512bw")]
7511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7512#[cfg_attr(test, assert_instr(vpsravw))]
7513pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
7514    unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
7515}
7516
7517/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7518///
7519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
7520#[inline]
7521#[target_feature(enable = "avx512bw")]
7522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7523#[cfg_attr(test, assert_instr(vpsravw))]
7524pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7525    unsafe {
7526        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7527        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7528    }
7529}
7530
7531/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7532///
7533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
7534#[inline]
7535#[target_feature(enable = "avx512bw")]
7536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7537#[cfg_attr(test, assert_instr(vpsravw))]
7538pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7539    unsafe {
7540        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7541        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7542    }
7543}
7544
7545/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7546///
7547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
7548#[inline]
7549#[target_feature(enable = "avx512bw,avx512vl")]
7550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7551#[cfg_attr(test, assert_instr(vpsravw))]
7552pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
7553    unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
7554}
7555
7556/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
7559#[inline]
7560#[target_feature(enable = "avx512bw,avx512vl")]
7561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7562#[cfg_attr(test, assert_instr(vpsravw))]
7563pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7564    unsafe {
7565        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7566        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7567    }
7568}
7569
7570/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7571///
7572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
7573#[inline]
7574#[target_feature(enable = "avx512bw,avx512vl")]
7575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7576#[cfg_attr(test, assert_instr(vpsravw))]
7577pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7578    unsafe {
7579        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7580        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7581    }
7582}
7583
7584/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7585///
7586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
7587#[inline]
7588#[target_feature(enable = "avx512bw,avx512vl")]
7589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7590#[cfg_attr(test, assert_instr(vpsravw))]
7591pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
7592    unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
7593}
7594
7595/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7596///
7597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
7598#[inline]
7599#[target_feature(enable = "avx512bw,avx512vl")]
7600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7601#[cfg_attr(test, assert_instr(vpsravw))]
7602pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7603    unsafe {
7604        let shf = _mm_srav_epi16(a, count).as_i16x8();
7605        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7606    }
7607}
7608
7609/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7610///
7611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
7612#[inline]
7613#[target_feature(enable = "avx512bw,avx512vl")]
7614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7615#[cfg_attr(test, assert_instr(vpsravw))]
7616pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7617    unsafe {
7618        let shf = _mm_srav_epi16(a, count).as_i16x8();
7619        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7620    }
7621}
7622
7623/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7624///
7625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
7626#[inline]
7627#[target_feature(enable = "avx512bw")]
7628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7629#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7630pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
7631    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
7632}
7633
7634/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7635///
7636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
7637#[inline]
7638#[target_feature(enable = "avx512bw")]
7639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7640#[cfg_attr(test, assert_instr(vpermt2w))]
7641pub fn _mm512_mask_permutex2var_epi16(
7642    a: __m512i,
7643    k: __mmask32,
7644    idx: __m512i,
7645    b: __m512i,
7646) -> __m512i {
7647    unsafe {
7648        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7649        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
7650    }
7651}
7652
7653/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7654///
7655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
7656#[inline]
7657#[target_feature(enable = "avx512bw")]
7658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7659#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7660pub fn _mm512_maskz_permutex2var_epi16(
7661    k: __mmask32,
7662    a: __m512i,
7663    idx: __m512i,
7664    b: __m512i,
7665) -> __m512i {
7666    unsafe {
7667        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7668        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7669    }
7670}
7671
7672/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7673///
7674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
7675#[inline]
7676#[target_feature(enable = "avx512bw")]
7677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7678#[cfg_attr(test, assert_instr(vpermi2w))]
7679pub fn _mm512_mask2_permutex2var_epi16(
7680    a: __m512i,
7681    idx: __m512i,
7682    k: __mmask32,
7683    b: __m512i,
7684) -> __m512i {
7685    unsafe {
7686        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7687        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
7688    }
7689}
7690
7691/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7692///
7693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
7694#[inline]
7695#[target_feature(enable = "avx512bw,avx512vl")]
7696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7697#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7698pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
7699    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
7700}
7701
7702/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7703///
7704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
7705#[inline]
7706#[target_feature(enable = "avx512bw,avx512vl")]
7707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7708#[cfg_attr(test, assert_instr(vpermt2w))]
7709pub fn _mm256_mask_permutex2var_epi16(
7710    a: __m256i,
7711    k: __mmask16,
7712    idx: __m256i,
7713    b: __m256i,
7714) -> __m256i {
7715    unsafe {
7716        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7717        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
7718    }
7719}
7720
7721/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7722///
7723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
7724#[inline]
7725#[target_feature(enable = "avx512bw,avx512vl")]
7726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7727#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7728pub fn _mm256_maskz_permutex2var_epi16(
7729    k: __mmask16,
7730    a: __m256i,
7731    idx: __m256i,
7732    b: __m256i,
7733) -> __m256i {
7734    unsafe {
7735        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7736        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7737    }
7738}
7739
7740/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7741///
7742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
7743#[inline]
7744#[target_feature(enable = "avx512bw,avx512vl")]
7745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7746#[cfg_attr(test, assert_instr(vpermi2w))]
7747pub fn _mm256_mask2_permutex2var_epi16(
7748    a: __m256i,
7749    idx: __m256i,
7750    k: __mmask16,
7751    b: __m256i,
7752) -> __m256i {
7753    unsafe {
7754        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7755        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
7756    }
7757}
7758
7759/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7760///
7761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
7762#[inline]
7763#[target_feature(enable = "avx512bw,avx512vl")]
7764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7765#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7766pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7767    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
7768}
7769
7770/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7771///
7772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
7773#[inline]
7774#[target_feature(enable = "avx512bw,avx512vl")]
7775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7776#[cfg_attr(test, assert_instr(vpermt2w))]
7777pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
7778    unsafe {
7779        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7780        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
7781    }
7782}
7783
7784/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7785///
7786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
7787#[inline]
7788#[target_feature(enable = "avx512bw,avx512vl")]
7789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7790#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7791pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7792    unsafe {
7793        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7794        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7795    }
7796}
7797
7798/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7799///
7800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
7801#[inline]
7802#[target_feature(enable = "avx512bw,avx512vl")]
7803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7804#[cfg_attr(test, assert_instr(vpermi2w))]
7805pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
7806    unsafe {
7807        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7808        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
7809    }
7810}
7811
7812/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7813///
7814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
7815#[inline]
7816#[target_feature(enable = "avx512bw")]
7817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7818#[cfg_attr(test, assert_instr(vpermw))]
7819pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
7820    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
7821}
7822
7823/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7824///
7825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
7826#[inline]
7827#[target_feature(enable = "avx512bw")]
7828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7829#[cfg_attr(test, assert_instr(vpermw))]
7830pub fn _mm512_mask_permutexvar_epi16(
7831    src: __m512i,
7832    k: __mmask32,
7833    idx: __m512i,
7834    a: __m512i,
7835) -> __m512i {
7836    unsafe {
7837        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7838        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
7839    }
7840}
7841
7842/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7843///
7844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
7845#[inline]
7846#[target_feature(enable = "avx512bw")]
7847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7848#[cfg_attr(test, assert_instr(vpermw))]
7849pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
7850    unsafe {
7851        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7852        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7853    }
7854}
7855
7856/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7857///
7858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
7859#[inline]
7860#[target_feature(enable = "avx512bw,avx512vl")]
7861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7862#[cfg_attr(test, assert_instr(vpermw))]
7863pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
7864    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
7865}
7866
7867/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7868///
7869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
7870#[inline]
7871#[target_feature(enable = "avx512bw,avx512vl")]
7872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7873#[cfg_attr(test, assert_instr(vpermw))]
7874pub fn _mm256_mask_permutexvar_epi16(
7875    src: __m256i,
7876    k: __mmask16,
7877    idx: __m256i,
7878    a: __m256i,
7879) -> __m256i {
7880    unsafe {
7881        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7882        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
7883    }
7884}
7885
7886/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7887///
7888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
7889#[inline]
7890#[target_feature(enable = "avx512bw,avx512vl")]
7891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7892#[cfg_attr(test, assert_instr(vpermw))]
7893pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
7894    unsafe {
7895        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7896        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7897    }
7898}
7899
7900/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7901///
7902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
7903#[inline]
7904#[target_feature(enable = "avx512bw,avx512vl")]
7905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7906#[cfg_attr(test, assert_instr(vpermw))]
7907pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
7908    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
7909}
7910
7911/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7912///
7913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
7914#[inline]
7915#[target_feature(enable = "avx512bw,avx512vl")]
7916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7917#[cfg_attr(test, assert_instr(vpermw))]
7918pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7919    unsafe {
7920        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7921        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
7922    }
7923}
7924
7925/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7926///
7927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
7928#[inline]
7929#[target_feature(enable = "avx512bw,avx512vl")]
7930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7931#[cfg_attr(test, assert_instr(vpermw))]
7932pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7933    unsafe {
7934        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7935        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7936    }
7937}
7938
7939/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7940///
7941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
7942#[inline]
7943#[target_feature(enable = "avx512bw")]
7944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7945#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7946pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
7947    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
7948}
7949
7950/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7951///
7952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
7953#[inline]
7954#[target_feature(enable = "avx512bw,avx512vl")]
7955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7956#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7957pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7958    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
7959}
7960
7961/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7962///
7963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
7964#[inline]
7965#[target_feature(enable = "avx512bw,avx512vl")]
7966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7967#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7968pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7969    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
7970}
7971
7972/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7973///
7974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
7975#[inline]
7976#[target_feature(enable = "avx512bw")]
7977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7978#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7979pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7980    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
7981}
7982
7983/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7984///
7985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
7986#[inline]
7987#[target_feature(enable = "avx512bw,avx512vl")]
7988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7989#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7990pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7991    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
7992}
7993
7994/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7995///
7996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
7997#[inline]
7998#[target_feature(enable = "avx512bw,avx512vl")]
7999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8000#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8001pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8002    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8003}
8004
8005/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8006///
8007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8008#[inline]
8009#[target_feature(enable = "avx512bw")]
8010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8011#[cfg_attr(test, assert_instr(vpbroadcastw))]
8012pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8013    unsafe {
8014        let a = _mm512_castsi128_si512(a).as_i16x32();
8015        let ret: i16x32 = simd_shuffle!(
8016            a,
8017            a,
8018            [
8019                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8020                0, 0, 0, 0,
8021            ],
8022        );
8023        transmute(ret)
8024    }
8025}
8026
8027/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8028///
8029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8030#[inline]
8031#[target_feature(enable = "avx512bw")]
8032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8033#[cfg_attr(test, assert_instr(vpbroadcastw))]
8034pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8035    unsafe {
8036        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8037        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8038    }
8039}
8040
8041/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8042///
8043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8044#[inline]
8045#[target_feature(enable = "avx512bw")]
8046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8047#[cfg_attr(test, assert_instr(vpbroadcastw))]
8048pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8049    unsafe {
8050        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8051        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8052    }
8053}
8054
8055/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8056///
8057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8058#[inline]
8059#[target_feature(enable = "avx512bw,avx512vl")]
8060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8061#[cfg_attr(test, assert_instr(vpbroadcastw))]
8062pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8063    unsafe {
8064        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8065        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8066    }
8067}
8068
8069/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8070///
8071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8072#[inline]
8073#[target_feature(enable = "avx512bw,avx512vl")]
8074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8075#[cfg_attr(test, assert_instr(vpbroadcastw))]
8076pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8077    unsafe {
8078        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8079        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8080    }
8081}
8082
8083/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8084///
8085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8086#[inline]
8087#[target_feature(enable = "avx512bw,avx512vl")]
8088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8089#[cfg_attr(test, assert_instr(vpbroadcastw))]
8090pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8091    unsafe {
8092        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8093        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8094    }
8095}
8096
8097/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8098///
8099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8100#[inline]
8101#[target_feature(enable = "avx512bw,avx512vl")]
8102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8103#[cfg_attr(test, assert_instr(vpbroadcastw))]
8104pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8105    unsafe {
8106        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8107        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8108    }
8109}
8110
8111/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8112///
8113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8114#[inline]
8115#[target_feature(enable = "avx512bw")]
8116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8117#[cfg_attr(test, assert_instr(vpbroadcastb))]
8118pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8119    unsafe {
8120        let a = _mm512_castsi128_si512(a).as_i8x64();
8121        let ret: i8x64 = simd_shuffle!(
8122            a,
8123            a,
8124            [
8125                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8126                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8127                0, 0, 0, 0, 0, 0, 0, 0,
8128            ],
8129        );
8130        transmute(ret)
8131    }
8132}
8133
8134/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8135///
8136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8137#[inline]
8138#[target_feature(enable = "avx512bw")]
8139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8140#[cfg_attr(test, assert_instr(vpbroadcastb))]
8141pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8142    unsafe {
8143        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8144        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8145    }
8146}
8147
8148/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8149///
8150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8151#[inline]
8152#[target_feature(enable = "avx512bw")]
8153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8154#[cfg_attr(test, assert_instr(vpbroadcastb))]
8155pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8156    unsafe {
8157        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8158        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8159    }
8160}
8161
8162/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8163///
8164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8165#[inline]
8166#[target_feature(enable = "avx512bw,avx512vl")]
8167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8168#[cfg_attr(test, assert_instr(vpbroadcastb))]
8169pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8170    unsafe {
8171        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8172        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8173    }
8174}
8175
8176/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8177///
8178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8179#[inline]
8180#[target_feature(enable = "avx512bw,avx512vl")]
8181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8182#[cfg_attr(test, assert_instr(vpbroadcastb))]
8183pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8184    unsafe {
8185        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8186        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8187    }
8188}
8189
8190/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8191///
8192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8193#[inline]
8194#[target_feature(enable = "avx512bw,avx512vl")]
8195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8196#[cfg_attr(test, assert_instr(vpbroadcastb))]
8197pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8198    unsafe {
8199        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8200        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8201    }
8202}
8203
8204/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8205///
8206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8207#[inline]
8208#[target_feature(enable = "avx512bw,avx512vl")]
8209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8210#[cfg_attr(test, assert_instr(vpbroadcastb))]
8211pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8212    unsafe {
8213        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8214        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8215    }
8216}
8217
8218/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8219///
8220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8221#[inline]
8222#[target_feature(enable = "avx512bw")]
8223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8224#[cfg_attr(test, assert_instr(vpunpckhwd))]
8225pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8226    unsafe {
8227        let a = a.as_i16x32();
8228        let b = b.as_i16x32();
8229        #[rustfmt::skip]
8230        let r: i16x32 = simd_shuffle!(
8231            a,
8232            b,
8233            [
8234                4, 32 + 4, 5, 32 + 5,
8235                6, 32 + 6, 7, 32 + 7,
8236                12, 32 + 12, 13, 32 + 13,
8237                14, 32 + 14, 15, 32 + 15,
8238                20, 32 + 20, 21, 32 + 21,
8239                22, 32 + 22, 23, 32 + 23,
8240                28, 32 + 28, 29, 32 + 29,
8241                30, 32 + 30, 31, 32 + 31,
8242            ],
8243        );
8244        transmute(r)
8245    }
8246}
8247
8248/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8249///
8250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8251#[inline]
8252#[target_feature(enable = "avx512bw")]
8253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8254#[cfg_attr(test, assert_instr(vpunpckhwd))]
8255pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8256    unsafe {
8257        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8258        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8259    }
8260}
8261
8262/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8263///
8264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8265#[inline]
8266#[target_feature(enable = "avx512bw")]
8267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8268#[cfg_attr(test, assert_instr(vpunpckhwd))]
8269pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8270    unsafe {
8271        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8272        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8273    }
8274}
8275
8276/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8277///
8278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8279#[inline]
8280#[target_feature(enable = "avx512bw,avx512vl")]
8281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8282#[cfg_attr(test, assert_instr(vpunpckhwd))]
8283pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8284    unsafe {
8285        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8286        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8287    }
8288}
8289
8290/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8291///
8292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8293#[inline]
8294#[target_feature(enable = "avx512bw,avx512vl")]
8295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8296#[cfg_attr(test, assert_instr(vpunpckhwd))]
8297pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8298    unsafe {
8299        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8300        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8301    }
8302}
8303
8304/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8305///
8306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
8307#[inline]
8308#[target_feature(enable = "avx512bw,avx512vl")]
8309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8310#[cfg_attr(test, assert_instr(vpunpckhwd))]
8311pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8312    unsafe {
8313        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8314        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
8315    }
8316}
8317
8318/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8319///
8320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
8321#[inline]
8322#[target_feature(enable = "avx512bw,avx512vl")]
8323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8324#[cfg_attr(test, assert_instr(vpunpckhwd))]
8325pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8326    unsafe {
8327        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8328        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
8329    }
8330}
8331
8332/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8333///
8334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
8335#[inline]
8336#[target_feature(enable = "avx512bw")]
8337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8338#[cfg_attr(test, assert_instr(vpunpckhbw))]
8339pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
8340    unsafe {
8341        let a = a.as_i8x64();
8342        let b = b.as_i8x64();
8343        #[rustfmt::skip]
8344        let r: i8x64 = simd_shuffle!(
8345            a,
8346            b,
8347            [
8348                8, 64 + 8, 9, 64 + 9,
8349                10, 64 + 10, 11, 64 + 11,
8350                12, 64 + 12, 13, 64 + 13,
8351                14, 64 + 14, 15, 64 + 15,
8352                24, 64 + 24, 25, 64 + 25,
8353                26, 64 + 26, 27, 64 + 27,
8354                28, 64 + 28, 29, 64 + 29,
8355                30, 64 + 30, 31, 64 + 31,
8356                40, 64 + 40, 41, 64 + 41,
8357                42, 64 + 42, 43, 64 + 43,
8358                44, 64 + 44, 45, 64 + 45,
8359                46, 64 + 46, 47, 64 + 47,
8360                56, 64 + 56, 57, 64 + 57,
8361                58, 64 + 58, 59, 64 + 59,
8362                60, 64 + 60, 61, 64 + 61,
8363                62, 64 + 62, 63, 64 + 63,
8364            ],
8365        );
8366        transmute(r)
8367    }
8368}
8369
8370/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8371///
8372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
8373#[inline]
8374#[target_feature(enable = "avx512bw")]
8375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8376#[cfg_attr(test, assert_instr(vpunpckhbw))]
8377pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8378    unsafe {
8379        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8380        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
8381    }
8382}
8383
8384/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8385///
8386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
8387#[inline]
8388#[target_feature(enable = "avx512bw")]
8389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8390#[cfg_attr(test, assert_instr(vpunpckhbw))]
8391pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8392    unsafe {
8393        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8394        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
8395    }
8396}
8397
8398/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8399///
8400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
8401#[inline]
8402#[target_feature(enable = "avx512bw,avx512vl")]
8403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8404#[cfg_attr(test, assert_instr(vpunpckhbw))]
8405pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8406    unsafe {
8407        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8408        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
8409    }
8410}
8411
8412/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8413///
8414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
8415#[inline]
8416#[target_feature(enable = "avx512bw,avx512vl")]
8417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8418#[cfg_attr(test, assert_instr(vpunpckhbw))]
8419pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8420    unsafe {
8421        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8422        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
8423    }
8424}
8425
8426/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8427///
8428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
8429#[inline]
8430#[target_feature(enable = "avx512bw,avx512vl")]
8431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8432#[cfg_attr(test, assert_instr(vpunpckhbw))]
8433pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8434    unsafe {
8435        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8436        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
8437    }
8438}
8439
8440/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8441///
8442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
8443#[inline]
8444#[target_feature(enable = "avx512bw,avx512vl")]
8445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8446#[cfg_attr(test, assert_instr(vpunpckhbw))]
8447pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8448    unsafe {
8449        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8450        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
8451    }
8452}
8453
8454/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8455///
8456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
8457#[inline]
8458#[target_feature(enable = "avx512bw")]
8459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8460#[cfg_attr(test, assert_instr(vpunpcklwd))]
8461pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
8462    unsafe {
8463        let a = a.as_i16x32();
8464        let b = b.as_i16x32();
8465        #[rustfmt::skip]
8466        let r: i16x32 = simd_shuffle!(
8467            a,
8468            b,
8469            [
8470               0,  32+0,   1, 32+1,
8471               2,  32+2,   3, 32+3,
8472               8,  32+8,   9, 32+9,
8473               10, 32+10, 11, 32+11,
8474               16, 32+16, 17, 32+17,
8475               18, 32+18, 19, 32+19,
8476               24, 32+24, 25, 32+25,
8477               26, 32+26, 27, 32+27
8478            ],
8479        );
8480        transmute(r)
8481    }
8482}
8483
8484/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8485///
8486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
8487#[inline]
8488#[target_feature(enable = "avx512bw")]
8489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8490#[cfg_attr(test, assert_instr(vpunpcklwd))]
8491pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8492    unsafe {
8493        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8494        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
8495    }
8496}
8497
8498/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8499///
8500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
8501#[inline]
8502#[target_feature(enable = "avx512bw")]
8503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8504#[cfg_attr(test, assert_instr(vpunpcklwd))]
8505pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8506    unsafe {
8507        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8508        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
8509    }
8510}
8511
8512/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8513///
8514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
8515#[inline]
8516#[target_feature(enable = "avx512bw,avx512vl")]
8517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8518#[cfg_attr(test, assert_instr(vpunpcklwd))]
8519pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8520    unsafe {
8521        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8522        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
8523    }
8524}
8525
8526/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8527///
8528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
8529#[inline]
8530#[target_feature(enable = "avx512bw,avx512vl")]
8531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8532#[cfg_attr(test, assert_instr(vpunpcklwd))]
8533pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8534    unsafe {
8535        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8536        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
8537    }
8538}
8539
8540/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8541///
8542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
8543#[inline]
8544#[target_feature(enable = "avx512bw,avx512vl")]
8545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8546#[cfg_attr(test, assert_instr(vpunpcklwd))]
8547pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8548    unsafe {
8549        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8550        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
8551    }
8552}
8553
8554/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8555///
8556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
8557#[inline]
8558#[target_feature(enable = "avx512bw,avx512vl")]
8559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8560#[cfg_attr(test, assert_instr(vpunpcklwd))]
8561pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8562    unsafe {
8563        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8564        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
8565    }
8566}
8567
8568/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8569///
8570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
8571#[inline]
8572#[target_feature(enable = "avx512bw")]
8573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8574#[cfg_attr(test, assert_instr(vpunpcklbw))]
8575pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
8576    unsafe {
8577        let a = a.as_i8x64();
8578        let b = b.as_i8x64();
8579        #[rustfmt::skip]
8580        let r: i8x64 = simd_shuffle!(
8581            a,
8582            b,
8583            [
8584                0,  64+0,   1, 64+1,
8585                2,  64+2,   3, 64+3,
8586                4,  64+4,   5, 64+5,
8587                6,  64+6,   7, 64+7,
8588                16, 64+16, 17, 64+17,
8589                18, 64+18, 19, 64+19,
8590                20, 64+20, 21, 64+21,
8591                22, 64+22, 23, 64+23,
8592                32, 64+32, 33, 64+33,
8593                34, 64+34, 35, 64+35,
8594                36, 64+36, 37, 64+37,
8595                38, 64+38, 39, 64+39,
8596                48, 64+48, 49, 64+49,
8597                50, 64+50, 51, 64+51,
8598                52, 64+52, 53, 64+53,
8599                54, 64+54, 55, 64+55,
8600            ],
8601        );
8602        transmute(r)
8603    }
8604}
8605
8606/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8607///
8608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
8609#[inline]
8610#[target_feature(enable = "avx512bw")]
8611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8612#[cfg_attr(test, assert_instr(vpunpcklbw))]
8613pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8614    unsafe {
8615        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8616        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
8617    }
8618}
8619
8620/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8621///
8622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
8623#[inline]
8624#[target_feature(enable = "avx512bw")]
8625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8626#[cfg_attr(test, assert_instr(vpunpcklbw))]
8627pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8628    unsafe {
8629        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8630        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
8631    }
8632}
8633
8634/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8635///
8636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
8637#[inline]
8638#[target_feature(enable = "avx512bw,avx512vl")]
8639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8640#[cfg_attr(test, assert_instr(vpunpcklbw))]
8641pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8642    unsafe {
8643        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8644        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
8645    }
8646}
8647
8648/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8649///
8650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
8651#[inline]
8652#[target_feature(enable = "avx512bw,avx512vl")]
8653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8654#[cfg_attr(test, assert_instr(vpunpcklbw))]
8655pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8656    unsafe {
8657        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8658        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
8659    }
8660}
8661
8662/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8663///
8664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
8665#[inline]
8666#[target_feature(enable = "avx512bw,avx512vl")]
8667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8668#[cfg_attr(test, assert_instr(vpunpcklbw))]
8669pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8670    unsafe {
8671        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8672        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
8673    }
8674}
8675
8676/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8677///
8678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
8679#[inline]
8680#[target_feature(enable = "avx512bw,avx512vl")]
8681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8682#[cfg_attr(test, assert_instr(vpunpcklbw))]
8683pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8684    unsafe {
8685        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8686        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
8687    }
8688}
8689
8690/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8691///
8692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
8693#[inline]
8694#[target_feature(enable = "avx512bw")]
8695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8696#[cfg_attr(test, assert_instr(vmovdqu16))]
8697pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
8698    unsafe {
8699        let mov = a.as_i16x32();
8700        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
8701    }
8702}
8703
8704/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8705///
8706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
8707#[inline]
8708#[target_feature(enable = "avx512bw")]
8709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8710#[cfg_attr(test, assert_instr(vmovdqu16))]
8711pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
8712    unsafe {
8713        let mov = a.as_i16x32();
8714        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
8715    }
8716}
8717
8718/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8719///
8720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
8721#[inline]
8722#[target_feature(enable = "avx512bw,avx512vl")]
8723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8724#[cfg_attr(test, assert_instr(vmovdqu16))]
8725pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
8726    unsafe {
8727        let mov = a.as_i16x16();
8728        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
8729    }
8730}
8731
8732/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8733///
8734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
8735#[inline]
8736#[target_feature(enable = "avx512bw,avx512vl")]
8737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8738#[cfg_attr(test, assert_instr(vmovdqu16))]
8739pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
8740    unsafe {
8741        let mov = a.as_i16x16();
8742        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
8743    }
8744}
8745
8746/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8747///
8748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
8749#[inline]
8750#[target_feature(enable = "avx512bw,avx512vl")]
8751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8752#[cfg_attr(test, assert_instr(vmovdqu16))]
8753pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8754    unsafe {
8755        let mov = a.as_i16x8();
8756        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
8757    }
8758}
8759
8760/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8761///
8762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
8763#[inline]
8764#[target_feature(enable = "avx512bw,avx512vl")]
8765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8766#[cfg_attr(test, assert_instr(vmovdqu16))]
8767pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
8768    unsafe {
8769        let mov = a.as_i16x8();
8770        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
8771    }
8772}
8773
8774/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8775///
8776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
8777#[inline]
8778#[target_feature(enable = "avx512bw")]
8779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8780#[cfg_attr(test, assert_instr(vmovdqu8))]
8781pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
8782    unsafe {
8783        let mov = a.as_i8x64();
8784        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
8785    }
8786}
8787
8788/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8789///
8790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
8791#[inline]
8792#[target_feature(enable = "avx512bw")]
8793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8794#[cfg_attr(test, assert_instr(vmovdqu8))]
8795pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
8796    unsafe {
8797        let mov = a.as_i8x64();
8798        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
8799    }
8800}
8801
8802/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8803///
8804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
8805#[inline]
8806#[target_feature(enable = "avx512bw,avx512vl")]
8807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8808#[cfg_attr(test, assert_instr(vmovdqu8))]
8809pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
8810    unsafe {
8811        let mov = a.as_i8x32();
8812        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
8813    }
8814}
8815
8816/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8817///
8818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
8819#[inline]
8820#[target_feature(enable = "avx512bw,avx512vl")]
8821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8822#[cfg_attr(test, assert_instr(vmovdqu8))]
8823pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
8824    unsafe {
8825        let mov = a.as_i8x32();
8826        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
8827    }
8828}
8829
8830/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8831///
8832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
8833#[inline]
8834#[target_feature(enable = "avx512bw,avx512vl")]
8835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8836#[cfg_attr(test, assert_instr(vmovdqu8))]
8837pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8838    unsafe {
8839        let mov = a.as_i8x16();
8840        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
8841    }
8842}
8843
8844/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8845///
8846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
8847#[inline]
8848#[target_feature(enable = "avx512bw,avx512vl")]
8849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8850#[cfg_attr(test, assert_instr(vmovdqu8))]
8851pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
8852    unsafe {
8853        let mov = a.as_i8x16();
8854        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
8855    }
8856}
8857
8858/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8859///
8860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
8861#[inline]
8862#[target_feature(enable = "avx512bw")]
8863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8864#[cfg_attr(test, assert_instr(vpbroadcastw))]
8865pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
8866    unsafe {
8867        let r = _mm512_set1_epi16(a).as_i16x32();
8868        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
8869    }
8870}
8871
8872/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8873///
8874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
8875#[inline]
8876#[target_feature(enable = "avx512bw")]
8877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8878#[cfg_attr(test, assert_instr(vpbroadcastw))]
8879pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
8880    unsafe {
8881        let r = _mm512_set1_epi16(a).as_i16x32();
8882        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
8883    }
8884}
8885
8886/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8887///
8888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
8889#[inline]
8890#[target_feature(enable = "avx512bw,avx512vl")]
8891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8892#[cfg_attr(test, assert_instr(vpbroadcastw))]
8893pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
8894    unsafe {
8895        let r = _mm256_set1_epi16(a).as_i16x16();
8896        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8897    }
8898}
8899
8900/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8901///
8902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
8903#[inline]
8904#[target_feature(enable = "avx512bw,avx512vl")]
8905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8906#[cfg_attr(test, assert_instr(vpbroadcastw))]
8907pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
8908    unsafe {
8909        let r = _mm256_set1_epi16(a).as_i16x16();
8910        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8911    }
8912}
8913
8914/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8915///
8916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
8917#[inline]
8918#[target_feature(enable = "avx512bw,avx512vl")]
8919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8920#[cfg_attr(test, assert_instr(vpbroadcastw))]
8921pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
8922    unsafe {
8923        let r = _mm_set1_epi16(a).as_i16x8();
8924        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8925    }
8926}
8927
8928/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8929///
8930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
8931#[inline]
8932#[target_feature(enable = "avx512bw,avx512vl")]
8933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8934#[cfg_attr(test, assert_instr(vpbroadcastw))]
8935pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
8936    unsafe {
8937        let r = _mm_set1_epi16(a).as_i16x8();
8938        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8939    }
8940}
8941
8942/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
8945#[inline]
8946#[target_feature(enable = "avx512bw")]
8947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8948#[cfg_attr(test, assert_instr(vpbroadcast))]
8949pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
8950    unsafe {
8951        let r = _mm512_set1_epi8(a).as_i8x64();
8952        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
8953    }
8954}
8955
8956/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8957///
8958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
8959#[inline]
8960#[target_feature(enable = "avx512bw")]
8961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8962#[cfg_attr(test, assert_instr(vpbroadcast))]
8963pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
8964    unsafe {
8965        let r = _mm512_set1_epi8(a).as_i8x64();
8966        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
8967    }
8968}
8969
8970/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8971///
8972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
8973#[inline]
8974#[target_feature(enable = "avx512bw,avx512vl")]
8975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8976#[cfg_attr(test, assert_instr(vpbroadcast))]
8977pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
8978    unsafe {
8979        let r = _mm256_set1_epi8(a).as_i8x32();
8980        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
8981    }
8982}
8983
8984/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8985///
8986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
8987#[inline]
8988#[target_feature(enable = "avx512bw,avx512vl")]
8989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8990#[cfg_attr(test, assert_instr(vpbroadcast))]
8991pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
8992    unsafe {
8993        let r = _mm256_set1_epi8(a).as_i8x32();
8994        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
8995    }
8996}
8997
8998/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8999///
9000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9001#[inline]
9002#[target_feature(enable = "avx512bw,avx512vl")]
9003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9004#[cfg_attr(test, assert_instr(vpbroadcast))]
9005pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9006    unsafe {
9007        let r = _mm_set1_epi8(a).as_i8x16();
9008        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9009    }
9010}
9011
9012/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9013///
9014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9015#[inline]
9016#[target_feature(enable = "avx512bw,avx512vl")]
9017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9018#[cfg_attr(test, assert_instr(vpbroadcast))]
9019pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9020    unsafe {
9021        let r = _mm_set1_epi8(a).as_i8x16();
9022        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9023    }
9024}
9025
9026/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9027///
9028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9029#[inline]
9030#[target_feature(enable = "avx512bw")]
9031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9032#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9033#[rustc_legacy_const_generics(1)]
9034pub fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9035    unsafe {
9036        static_assert_uimm_bits!(IMM8, 8);
9037        let a = a.as_i16x32();
9038        let r: i16x32 = simd_shuffle!(
9039            a,
9040            a,
9041            [
9042                IMM8 as u32 & 0b11,
9043                (IMM8 as u32 >> 2) & 0b11,
9044                (IMM8 as u32 >> 4) & 0b11,
9045                (IMM8 as u32 >> 6) & 0b11,
9046                4,
9047                5,
9048                6,
9049                7,
9050                (IMM8 as u32 & 0b11) + 8,
9051                ((IMM8 as u32 >> 2) & 0b11) + 8,
9052                ((IMM8 as u32 >> 4) & 0b11) + 8,
9053                ((IMM8 as u32 >> 6) & 0b11) + 8,
9054                12,
9055                13,
9056                14,
9057                15,
9058                (IMM8 as u32 & 0b11) + 16,
9059                ((IMM8 as u32 >> 2) & 0b11) + 16,
9060                ((IMM8 as u32 >> 4) & 0b11) + 16,
9061                ((IMM8 as u32 >> 6) & 0b11) + 16,
9062                20,
9063                21,
9064                22,
9065                23,
9066                (IMM8 as u32 & 0b11) + 24,
9067                ((IMM8 as u32 >> 2) & 0b11) + 24,
9068                ((IMM8 as u32 >> 4) & 0b11) + 24,
9069                ((IMM8 as u32 >> 6) & 0b11) + 24,
9070                28,
9071                29,
9072                30,
9073                31,
9074            ],
9075        );
9076        transmute(r)
9077    }
9078}
9079
9080/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9081///
9082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9083#[inline]
9084#[target_feature(enable = "avx512bw")]
9085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9086#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9087#[rustc_legacy_const_generics(3)]
9088pub fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9089    src: __m512i,
9090    k: __mmask32,
9091    a: __m512i,
9092) -> __m512i {
9093    unsafe {
9094        static_assert_uimm_bits!(IMM8, 8);
9095        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9096        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9097    }
9098}
9099
9100/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9101///
9102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9103#[inline]
9104#[target_feature(enable = "avx512bw")]
9105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9106#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9107#[rustc_legacy_const_generics(2)]
9108pub fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9109    unsafe {
9110        static_assert_uimm_bits!(IMM8, 8);
9111        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9112        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9113    }
9114}
9115
9116/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9117///
9118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9119#[inline]
9120#[target_feature(enable = "avx512bw,avx512vl")]
9121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9122#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9123#[rustc_legacy_const_generics(3)]
9124pub fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9125    src: __m256i,
9126    k: __mmask16,
9127    a: __m256i,
9128) -> __m256i {
9129    unsafe {
9130        static_assert_uimm_bits!(IMM8, 8);
9131        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9132        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9133    }
9134}
9135
9136/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9137///
9138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9139#[inline]
9140#[target_feature(enable = "avx512bw,avx512vl")]
9141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9142#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9143#[rustc_legacy_const_generics(2)]
9144pub fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9145    unsafe {
9146        static_assert_uimm_bits!(IMM8, 8);
9147        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9148        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9149    }
9150}
9151
9152/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9153///
9154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9155#[inline]
9156#[target_feature(enable = "avx512bw,avx512vl")]
9157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9158#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9159#[rustc_legacy_const_generics(3)]
9160pub fn _mm_mask_shufflelo_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9161    unsafe {
9162        static_assert_uimm_bits!(IMM8, 8);
9163        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9164        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9165    }
9166}
9167
9168/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9169///
9170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9171#[inline]
9172#[target_feature(enable = "avx512bw,avx512vl")]
9173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9174#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9175#[rustc_legacy_const_generics(2)]
9176pub fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9177    unsafe {
9178        static_assert_uimm_bits!(IMM8, 8);
9179        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9180        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9181    }
9182}
9183
9184/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9185///
9186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9187#[inline]
9188#[target_feature(enable = "avx512bw")]
9189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9190#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9191#[rustc_legacy_const_generics(1)]
9192pub fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9193    unsafe {
9194        static_assert_uimm_bits!(IMM8, 8);
9195        let a = a.as_i16x32();
9196        let r: i16x32 = simd_shuffle!(
9197            a,
9198            a,
9199            [
9200                0,
9201                1,
9202                2,
9203                3,
9204                (IMM8 as u32 & 0b11) + 4,
9205                ((IMM8 as u32 >> 2) & 0b11) + 4,
9206                ((IMM8 as u32 >> 4) & 0b11) + 4,
9207                ((IMM8 as u32 >> 6) & 0b11) + 4,
9208                8,
9209                9,
9210                10,
9211                11,
9212                (IMM8 as u32 & 0b11) + 12,
9213                ((IMM8 as u32 >> 2) & 0b11) + 12,
9214                ((IMM8 as u32 >> 4) & 0b11) + 12,
9215                ((IMM8 as u32 >> 6) & 0b11) + 12,
9216                16,
9217                17,
9218                18,
9219                19,
9220                (IMM8 as u32 & 0b11) + 20,
9221                ((IMM8 as u32 >> 2) & 0b11) + 20,
9222                ((IMM8 as u32 >> 4) & 0b11) + 20,
9223                ((IMM8 as u32 >> 6) & 0b11) + 20,
9224                24,
9225                25,
9226                26,
9227                27,
9228                (IMM8 as u32 & 0b11) + 28,
9229                ((IMM8 as u32 >> 2) & 0b11) + 28,
9230                ((IMM8 as u32 >> 4) & 0b11) + 28,
9231                ((IMM8 as u32 >> 6) & 0b11) + 28,
9232            ],
9233        );
9234        transmute(r)
9235    }
9236}
9237
9238/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9239///
9240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
9241#[inline]
9242#[target_feature(enable = "avx512bw")]
9243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9244#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9245#[rustc_legacy_const_generics(3)]
9246pub fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
9247    src: __m512i,
9248    k: __mmask32,
9249    a: __m512i,
9250) -> __m512i {
9251    unsafe {
9252        static_assert_uimm_bits!(IMM8, 8);
9253        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9254        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9255    }
9256}
9257
9258/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9259///
9260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
9261#[inline]
9262#[target_feature(enable = "avx512bw")]
9263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9264#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9265#[rustc_legacy_const_generics(2)]
9266pub fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9267    unsafe {
9268        static_assert_uimm_bits!(IMM8, 8);
9269        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9270        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9271    }
9272}
9273
9274/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9275///
9276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
9277#[inline]
9278#[target_feature(enable = "avx512bw,avx512vl")]
9279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9280#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9281#[rustc_legacy_const_generics(3)]
9282pub fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
9283    src: __m256i,
9284    k: __mmask16,
9285    a: __m256i,
9286) -> __m256i {
9287    unsafe {
9288        static_assert_uimm_bits!(IMM8, 8);
9289        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9290        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9291    }
9292}
9293
9294/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9295///
9296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
9297#[inline]
9298#[target_feature(enable = "avx512bw,avx512vl")]
9299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9300#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9301#[rustc_legacy_const_generics(2)]
9302pub fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9303    unsafe {
9304        static_assert_uimm_bits!(IMM8, 8);
9305        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9306        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9307    }
9308}
9309
9310/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9311///
9312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
9313#[inline]
9314#[target_feature(enable = "avx512bw,avx512vl")]
9315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9316#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9317#[rustc_legacy_const_generics(3)]
9318pub fn _mm_mask_shufflehi_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9319    unsafe {
9320        static_assert_uimm_bits!(IMM8, 8);
9321        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9322        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9323    }
9324}
9325
9326/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9327///
9328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
9329#[inline]
9330#[target_feature(enable = "avx512bw,avx512vl")]
9331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9332#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9333#[rustc_legacy_const_generics(2)]
9334pub fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9335    unsafe {
9336        static_assert_uimm_bits!(IMM8, 8);
9337        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9338        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9339    }
9340}
9341
9342/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
9343///
9344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
9345#[inline]
9346#[target_feature(enable = "avx512bw")]
9347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9348#[cfg_attr(test, assert_instr(vpshufb))]
9349pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
9350    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
9351}
9352
9353/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9354///
9355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
9356#[inline]
9357#[target_feature(enable = "avx512bw")]
9358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9359#[cfg_attr(test, assert_instr(vpshufb))]
9360pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9361    unsafe {
9362        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9363        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
9364    }
9365}
9366
9367/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9368///
9369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
9370#[inline]
9371#[target_feature(enable = "avx512bw")]
9372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9373#[cfg_attr(test, assert_instr(vpshufb))]
9374pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9375    unsafe {
9376        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9377        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
9378    }
9379}
9380
9381/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9382///
9383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
9384#[inline]
9385#[target_feature(enable = "avx512bw,avx512vl")]
9386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9387#[cfg_attr(test, assert_instr(vpshufb))]
9388pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9389    unsafe {
9390        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9391        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
9392    }
9393}
9394
9395/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9396///
9397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
9398#[inline]
9399#[target_feature(enable = "avx512bw,avx512vl")]
9400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9401#[cfg_attr(test, assert_instr(vpshufb))]
9402pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9403    unsafe {
9404        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9405        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
9406    }
9407}
9408
9409/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9410///
9411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
9412#[inline]
9413#[target_feature(enable = "avx512bw,avx512vl")]
9414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9415#[cfg_attr(test, assert_instr(vpshufb))]
9416pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9417    unsafe {
9418        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9419        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
9420    }
9421}
9422
9423/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9424///
9425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
9426#[inline]
9427#[target_feature(enable = "avx512bw,avx512vl")]
9428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9429#[cfg_attr(test, assert_instr(vpshufb))]
9430pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9431    unsafe {
9432        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9433        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
9434    }
9435}
9436
9437/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9438///
9439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
9440#[inline]
9441#[target_feature(enable = "avx512bw")]
9442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9443#[cfg_attr(test, assert_instr(vptestmw))]
9444pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9445    let and = _mm512_and_si512(a, b);
9446    let zero = _mm512_setzero_si512();
9447    _mm512_cmpneq_epi16_mask(and, zero)
9448}
9449
9450/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9451///
9452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
9453#[inline]
9454#[target_feature(enable = "avx512bw")]
9455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9456#[cfg_attr(test, assert_instr(vptestmw))]
9457pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9458    let and = _mm512_and_si512(a, b);
9459    let zero = _mm512_setzero_si512();
9460    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
9461}
9462
9463/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9464///
9465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
9466#[inline]
9467#[target_feature(enable = "avx512bw,avx512vl")]
9468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9469#[cfg_attr(test, assert_instr(vptestmw))]
9470pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9471    let and = _mm256_and_si256(a, b);
9472    let zero = _mm256_setzero_si256();
9473    _mm256_cmpneq_epi16_mask(and, zero)
9474}
9475
9476/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9477///
9478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
9479#[inline]
9480#[target_feature(enable = "avx512bw,avx512vl")]
9481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9482#[cfg_attr(test, assert_instr(vptestmw))]
9483pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9484    let and = _mm256_and_si256(a, b);
9485    let zero = _mm256_setzero_si256();
9486    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
9487}
9488
9489/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9490///
9491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
9492#[inline]
9493#[target_feature(enable = "avx512bw,avx512vl")]
9494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9495#[cfg_attr(test, assert_instr(vptestmw))]
9496pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9497    let and = _mm_and_si128(a, b);
9498    let zero = _mm_setzero_si128();
9499    _mm_cmpneq_epi16_mask(and, zero)
9500}
9501
9502/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9503///
9504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
9505#[inline]
9506#[target_feature(enable = "avx512bw,avx512vl")]
9507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9508#[cfg_attr(test, assert_instr(vptestmw))]
9509pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9510    let and = _mm_and_si128(a, b);
9511    let zero = _mm_setzero_si128();
9512    _mm_mask_cmpneq_epi16_mask(k, and, zero)
9513}
9514
9515/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9516///
9517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
9518#[inline]
9519#[target_feature(enable = "avx512bw")]
9520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9521#[cfg_attr(test, assert_instr(vptestmb))]
9522pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9523    let and = _mm512_and_si512(a, b);
9524    let zero = _mm512_setzero_si512();
9525    _mm512_cmpneq_epi8_mask(and, zero)
9526}
9527
9528/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9529///
9530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
9531#[inline]
9532#[target_feature(enable = "avx512bw")]
9533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9534#[cfg_attr(test, assert_instr(vptestmb))]
9535pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9536    let and = _mm512_and_si512(a, b);
9537    let zero = _mm512_setzero_si512();
9538    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
9539}
9540
9541/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9542///
9543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
9544#[inline]
9545#[target_feature(enable = "avx512bw,avx512vl")]
9546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9547#[cfg_attr(test, assert_instr(vptestmb))]
9548pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9549    let and = _mm256_and_si256(a, b);
9550    let zero = _mm256_setzero_si256();
9551    _mm256_cmpneq_epi8_mask(and, zero)
9552}
9553
9554/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9555///
9556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
9557#[inline]
9558#[target_feature(enable = "avx512bw,avx512vl")]
9559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9560#[cfg_attr(test, assert_instr(vptestmb))]
9561pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9562    let and = _mm256_and_si256(a, b);
9563    let zero = _mm256_setzero_si256();
9564    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
9565}
9566
9567/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9568///
9569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
9570#[inline]
9571#[target_feature(enable = "avx512bw,avx512vl")]
9572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9573#[cfg_attr(test, assert_instr(vptestmb))]
9574pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9575    let and = _mm_and_si128(a, b);
9576    let zero = _mm_setzero_si128();
9577    _mm_cmpneq_epi8_mask(and, zero)
9578}
9579
9580/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9581///
9582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
9583#[inline]
9584#[target_feature(enable = "avx512bw,avx512vl")]
9585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9586#[cfg_attr(test, assert_instr(vptestmb))]
9587pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9588    let and = _mm_and_si128(a, b);
9589    let zero = _mm_setzero_si128();
9590    _mm_mask_cmpneq_epi8_mask(k, and, zero)
9591}
9592
9593/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9594///
9595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
9596#[inline]
9597#[target_feature(enable = "avx512bw")]
9598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9599#[cfg_attr(test, assert_instr(vptestnmw))]
9600pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9601    let and = _mm512_and_si512(a, b);
9602    let zero = _mm512_setzero_si512();
9603    _mm512_cmpeq_epi16_mask(and, zero)
9604}
9605
9606/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9607///
9608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
9609#[inline]
9610#[target_feature(enable = "avx512bw")]
9611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9612#[cfg_attr(test, assert_instr(vptestnmw))]
9613pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9614    let and = _mm512_and_si512(a, b);
9615    let zero = _mm512_setzero_si512();
9616    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
9617}
9618
9619/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9620///
9621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
9622#[inline]
9623#[target_feature(enable = "avx512bw,avx512vl")]
9624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9625#[cfg_attr(test, assert_instr(vptestnmw))]
9626pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9627    let and = _mm256_and_si256(a, b);
9628    let zero = _mm256_setzero_si256();
9629    _mm256_cmpeq_epi16_mask(and, zero)
9630}
9631
9632/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9633///
9634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
9635#[inline]
9636#[target_feature(enable = "avx512bw,avx512vl")]
9637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9638#[cfg_attr(test, assert_instr(vptestnmw))]
9639pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9640    let and = _mm256_and_si256(a, b);
9641    let zero = _mm256_setzero_si256();
9642    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
9643}
9644
9645/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9646///
9647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
9648#[inline]
9649#[target_feature(enable = "avx512bw,avx512vl")]
9650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9651#[cfg_attr(test, assert_instr(vptestnmw))]
9652pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9653    let and = _mm_and_si128(a, b);
9654    let zero = _mm_setzero_si128();
9655    _mm_cmpeq_epi16_mask(and, zero)
9656}
9657
9658/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9659///
9660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
9661#[inline]
9662#[target_feature(enable = "avx512bw,avx512vl")]
9663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9664#[cfg_attr(test, assert_instr(vptestnmw))]
9665pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9666    let and = _mm_and_si128(a, b);
9667    let zero = _mm_setzero_si128();
9668    _mm_mask_cmpeq_epi16_mask(k, and, zero)
9669}
9670
9671/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9672///
9673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
9674#[inline]
9675#[target_feature(enable = "avx512bw")]
9676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9677#[cfg_attr(test, assert_instr(vptestnmb))]
9678pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9679    let and = _mm512_and_si512(a, b);
9680    let zero = _mm512_setzero_si512();
9681    _mm512_cmpeq_epi8_mask(and, zero)
9682}
9683
9684/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9685///
9686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
9687#[inline]
9688#[target_feature(enable = "avx512bw")]
9689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9690#[cfg_attr(test, assert_instr(vptestnmb))]
9691pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9692    let and = _mm512_and_si512(a, b);
9693    let zero = _mm512_setzero_si512();
9694    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
9695}
9696
9697/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9698///
9699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
9700#[inline]
9701#[target_feature(enable = "avx512bw,avx512vl")]
9702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9703#[cfg_attr(test, assert_instr(vptestnmb))]
9704pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9705    let and = _mm256_and_si256(a, b);
9706    let zero = _mm256_setzero_si256();
9707    _mm256_cmpeq_epi8_mask(and, zero)
9708}
9709
9710/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9711///
9712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
9713#[inline]
9714#[target_feature(enable = "avx512bw,avx512vl")]
9715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9716#[cfg_attr(test, assert_instr(vptestnmb))]
9717pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9718    let and = _mm256_and_si256(a, b);
9719    let zero = _mm256_setzero_si256();
9720    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
9721}
9722
9723/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9724///
9725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
9726#[inline]
9727#[target_feature(enable = "avx512bw,avx512vl")]
9728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9729#[cfg_attr(test, assert_instr(vptestnmb))]
9730pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9731    let and = _mm_and_si128(a, b);
9732    let zero = _mm_setzero_si128();
9733    _mm_cmpeq_epi8_mask(and, zero)
9734}
9735
9736/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9737///
9738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
9739#[inline]
9740#[target_feature(enable = "avx512bw,avx512vl")]
9741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9742#[cfg_attr(test, assert_instr(vptestnmb))]
9743pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9744    let and = _mm_and_si128(a, b);
9745    let zero = _mm_setzero_si128();
9746    _mm_mask_cmpeq_epi8_mask(k, and, zero)
9747}
9748
9749/// Store 64-bit mask from a into memory.
9750///
9751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
9752#[inline]
9753#[target_feature(enable = "avx512bw")]
9754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9755#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9756pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
9757    ptr::write(mem_addr as *mut __mmask64, a);
9758}
9759
9760/// Store 32-bit mask from a into memory.
9761///
9762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
9763#[inline]
9764#[target_feature(enable = "avx512bw")]
9765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9766#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9767pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
9768    ptr::write(mem_addr as *mut __mmask32, a);
9769}
9770
9771/// Load 64-bit mask from memory into k.
9772///
9773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
9774#[inline]
9775#[target_feature(enable = "avx512bw")]
9776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9777#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9778pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
9779    ptr::read(mem_addr as *const __mmask64)
9780}
9781
9782/// Load 32-bit mask from memory into k.
9783///
9784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
9785#[inline]
9786#[target_feature(enable = "avx512bw")]
9787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9788#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9789pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
9790    ptr::read(mem_addr as *const __mmask32)
9791}
9792
9793/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
9794///
9795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
9796#[inline]
9797#[target_feature(enable = "avx512bw")]
9798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9799#[cfg_attr(test, assert_instr(vpsadbw))]
9800pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
9801    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
9802}
9803
9804/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9805///
9806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
9807#[inline]
9808#[target_feature(enable = "avx512bw")]
9809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9810#[rustc_legacy_const_generics(2)]
9811#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9812pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
9813    unsafe {
9814        static_assert_uimm_bits!(IMM8, 8);
9815        let a = a.as_u8x64();
9816        let b = b.as_u8x64();
9817        let r = vdbpsadbw(a, b, IMM8);
9818        transmute(r)
9819    }
9820}
9821
9822/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9823///
9824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
9825#[inline]
9826#[target_feature(enable = "avx512bw")]
9827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9828#[rustc_legacy_const_generics(4)]
9829#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9830pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
9831    src: __m512i,
9832    k: __mmask32,
9833    a: __m512i,
9834    b: __m512i,
9835) -> __m512i {
9836    unsafe {
9837        static_assert_uimm_bits!(IMM8, 8);
9838        let a = a.as_u8x64();
9839        let b = b.as_u8x64();
9840        let r = vdbpsadbw(a, b, IMM8);
9841        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
9842    }
9843}
9844
9845/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9846///
9847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
9848#[inline]
9849#[target_feature(enable = "avx512bw")]
9850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9851#[rustc_legacy_const_generics(3)]
9852#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9853pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9854    unsafe {
9855        static_assert_uimm_bits!(IMM8, 8);
9856        let a = a.as_u8x64();
9857        let b = b.as_u8x64();
9858        let r = vdbpsadbw(a, b, IMM8);
9859        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
9860    }
9861}
9862
9863/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9864///
9865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
9866#[inline]
9867#[target_feature(enable = "avx512bw,avx512vl")]
9868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9869#[rustc_legacy_const_generics(2)]
9870#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9871pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
9872    unsafe {
9873        static_assert_uimm_bits!(IMM8, 8);
9874        let a = a.as_u8x32();
9875        let b = b.as_u8x32();
9876        let r = vdbpsadbw256(a, b, IMM8);
9877        transmute(r)
9878    }
9879}
9880
9881/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9882///
9883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
9884#[inline]
9885#[target_feature(enable = "avx512bw,avx512vl")]
9886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9887#[rustc_legacy_const_generics(4)]
9888#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9889pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
9890    src: __m256i,
9891    k: __mmask16,
9892    a: __m256i,
9893    b: __m256i,
9894) -> __m256i {
9895    unsafe {
9896        static_assert_uimm_bits!(IMM8, 8);
9897        let a = a.as_u8x32();
9898        let b = b.as_u8x32();
9899        let r = vdbpsadbw256(a, b, IMM8);
9900        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
9901    }
9902}
9903
9904/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
9907#[inline]
9908#[target_feature(enable = "avx512bw,avx512vl")]
9909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9910#[rustc_legacy_const_generics(3)]
9911#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9912pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9913    unsafe {
9914        static_assert_uimm_bits!(IMM8, 8);
9915        let a = a.as_u8x32();
9916        let b = b.as_u8x32();
9917        let r = vdbpsadbw256(a, b, IMM8);
9918        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
9919    }
9920}
9921
9922/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9923///
9924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
9925#[inline]
9926#[target_feature(enable = "avx512bw,avx512vl")]
9927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9928#[rustc_legacy_const_generics(2)]
9929#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9930pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
9931    unsafe {
9932        static_assert_uimm_bits!(IMM8, 8);
9933        let a = a.as_u8x16();
9934        let b = b.as_u8x16();
9935        let r = vdbpsadbw128(a, b, IMM8);
9936        transmute(r)
9937    }
9938}
9939
9940/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9941///
9942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
9943#[inline]
9944#[target_feature(enable = "avx512bw,avx512vl")]
9945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9946#[rustc_legacy_const_generics(4)]
9947#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9948pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
9949    src: __m128i,
9950    k: __mmask8,
9951    a: __m128i,
9952    b: __m128i,
9953) -> __m128i {
9954    unsafe {
9955        static_assert_uimm_bits!(IMM8, 8);
9956        let a = a.as_u8x16();
9957        let b = b.as_u8x16();
9958        let r = vdbpsadbw128(a, b, IMM8);
9959        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
9960    }
9961}
9962
9963/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9964///
9965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
9966#[inline]
9967#[target_feature(enable = "avx512bw,avx512vl")]
9968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9969#[rustc_legacy_const_generics(3)]
9970#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9971pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9972    unsafe {
9973        static_assert_uimm_bits!(IMM8, 8);
9974        let a = a.as_u8x16();
9975        let b = b.as_u8x16();
9976        let r = vdbpsadbw128(a, b, IMM8);
9977        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
9978    }
9979}
9980
9981/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9982///
9983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
9984#[inline]
9985#[target_feature(enable = "avx512bw")]
9986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9987#[cfg_attr(test, assert_instr(vpmovw2m))]
9988pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
9989    let filter = _mm512_set1_epi16(1 << 15);
9990    let a = _mm512_and_si512(a, filter);
9991    _mm512_cmpeq_epi16_mask(a, filter)
9992}
9993
9994/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9995///
9996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
9997#[inline]
9998#[target_feature(enable = "avx512bw,avx512vl")]
9999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10000#[cfg_attr(test, assert_instr(vpmovw2m))]
10001pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10002    let filter = _mm256_set1_epi16(1 << 15);
10003    let a = _mm256_and_si256(a, filter);
10004    _mm256_cmpeq_epi16_mask(a, filter)
10005}
10006
10007/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10008///
10009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
10010#[inline]
10011#[target_feature(enable = "avx512bw,avx512vl")]
10012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10013#[cfg_attr(test, assert_instr(vpmovw2m))]
10014pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10015    let filter = _mm_set1_epi16(1 << 15);
10016    let a = _mm_and_si128(a, filter);
10017    _mm_cmpeq_epi16_mask(a, filter)
10018}
10019
10020/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10021///
10022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10023#[inline]
10024#[target_feature(enable = "avx512bw")]
10025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10026#[cfg_attr(test, assert_instr(vpmovb2m))]
10027pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10028    let filter = _mm512_set1_epi8(1 << 7);
10029    let a = _mm512_and_si512(a, filter);
10030    _mm512_cmpeq_epi8_mask(a, filter)
10031}
10032
10033/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10034///
10035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10036#[inline]
10037#[target_feature(enable = "avx512bw,avx512vl")]
10038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10039#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10040// using vpmovb2m plus converting the mask register to a standard register.
10041pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10042    let filter = _mm256_set1_epi8(1 << 7);
10043    let a = _mm256_and_si256(a, filter);
10044    _mm256_cmpeq_epi8_mask(a, filter)
10045}
10046
10047/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10048///
10049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10050#[inline]
10051#[target_feature(enable = "avx512bw,avx512vl")]
10052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10053#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10054// using vpmovb2m plus converting the mask register to a standard register.
10055pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10056    let filter = _mm_set1_epi8(1 << 7);
10057    let a = _mm_and_si128(a, filter);
10058    _mm_cmpeq_epi8_mask(a, filter)
10059}
10060
10061/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10062///
10063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10064#[inline]
10065#[target_feature(enable = "avx512bw")]
10066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10067#[cfg_attr(test, assert_instr(vpmovm2w))]
10068pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10069    unsafe {
10070        let one = _mm512_set1_epi16(
10071            1 << 15
10072                | 1 << 14
10073                | 1 << 13
10074                | 1 << 12
10075                | 1 << 11
10076                | 1 << 10
10077                | 1 << 9
10078                | 1 << 8
10079                | 1 << 7
10080                | 1 << 6
10081                | 1 << 5
10082                | 1 << 4
10083                | 1 << 3
10084                | 1 << 2
10085                | 1 << 1
10086                | 1 << 0,
10087        )
10088        .as_i16x32();
10089        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10090    }
10091}
10092
10093/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10094///
10095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10096#[inline]
10097#[target_feature(enable = "avx512bw,avx512vl")]
10098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10099#[cfg_attr(test, assert_instr(vpmovm2w))]
10100pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10101    unsafe {
10102        let one = _mm256_set1_epi16(
10103            1 << 15
10104                | 1 << 14
10105                | 1 << 13
10106                | 1 << 12
10107                | 1 << 11
10108                | 1 << 10
10109                | 1 << 9
10110                | 1 << 8
10111                | 1 << 7
10112                | 1 << 6
10113                | 1 << 5
10114                | 1 << 4
10115                | 1 << 3
10116                | 1 << 2
10117                | 1 << 1
10118                | 1 << 0,
10119        )
10120        .as_i16x16();
10121        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10122    }
10123}
10124
10125/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10126///
10127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10128#[inline]
10129#[target_feature(enable = "avx512bw,avx512vl")]
10130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10131#[cfg_attr(test, assert_instr(vpmovm2w))]
10132pub fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10133    unsafe {
10134        let one = _mm_set1_epi16(
10135            1 << 15
10136                | 1 << 14
10137                | 1 << 13
10138                | 1 << 12
10139                | 1 << 11
10140                | 1 << 10
10141                | 1 << 9
10142                | 1 << 8
10143                | 1 << 7
10144                | 1 << 6
10145                | 1 << 5
10146                | 1 << 4
10147                | 1 << 3
10148                | 1 << 2
10149                | 1 << 1
10150                | 1 << 0,
10151        )
10152        .as_i16x8();
10153        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10154    }
10155}
10156
10157/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10158///
10159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10160#[inline]
10161#[target_feature(enable = "avx512bw")]
10162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10163#[cfg_attr(test, assert_instr(vpmovm2b))]
10164pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10165    unsafe {
10166        let one =
10167            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10168                .as_i8x64();
10169        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
10170    }
10171}
10172
10173/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10174///
10175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
10176#[inline]
10177#[target_feature(enable = "avx512bw,avx512vl")]
10178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10179#[cfg_attr(test, assert_instr(vpmovm2b))]
10180pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
10181    unsafe {
10182        let one =
10183            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10184                .as_i8x32();
10185        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
10186    }
10187}
10188
10189/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10190///
10191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
10192#[inline]
10193#[target_feature(enable = "avx512bw,avx512vl")]
10194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10195#[cfg_attr(test, assert_instr(vpmovm2b))]
10196pub fn _mm_movm_epi8(k: __mmask16) -> __m128i {
10197    unsafe {
10198        let one =
10199            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10200                .as_i8x16();
10201        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
10202    }
10203}
10204
10205/// Convert 32-bit mask a into an integer value, and store the result in dst.
10206///
10207/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
10208#[inline]
10209#[target_feature(enable = "avx512bw")]
10210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10211pub fn _cvtmask32_u32(a: __mmask32) -> u32 {
10212    a
10213}
10214
10215/// Convert integer value a into an 32-bit mask, and store the result in k.
10216///
10217/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
10218#[inline]
10219#[target_feature(enable = "avx512bw")]
10220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10221pub fn _cvtu32_mask32(a: u32) -> __mmask32 {
10222    a
10223}
10224
10225/// Add 32-bit masks in a and b, and store the result in k.
10226///
10227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
10228#[inline]
10229#[target_feature(enable = "avx512bw")]
10230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10231pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10232    a + b
10233}
10234
10235/// Add 64-bit masks in a and b, and store the result in k.
10236///
10237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
10238#[inline]
10239#[target_feature(enable = "avx512bw")]
10240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10241pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10242    a + b
10243}
10244
10245/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
10246///
10247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
10248#[inline]
10249#[target_feature(enable = "avx512bw")]
10250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10251pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10252    a & b
10253}
10254
10255/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
10256///
10257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
10258#[inline]
10259#[target_feature(enable = "avx512bw")]
10260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10261pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10262    a & b
10263}
10264
10265/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
10266///
10267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
10268#[inline]
10269#[target_feature(enable = "avx512bw")]
10270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10271pub fn _knot_mask32(a: __mmask32) -> __mmask32 {
10272    !a
10273}
10274
10275/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
10276///
10277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
10278#[inline]
10279#[target_feature(enable = "avx512bw")]
10280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10281pub fn _knot_mask64(a: __mmask64) -> __mmask64 {
10282    !a
10283}
10284
10285/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
10286///
10287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
10288#[inline]
10289#[target_feature(enable = "avx512bw")]
10290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10291pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10292    _knot_mask32(a) & b
10293}
10294
10295/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
10296///
10297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
10298#[inline]
10299#[target_feature(enable = "avx512bw")]
10300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10301pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10302    _knot_mask64(a) & b
10303}
10304
10305/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
10306///
10307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
10308#[inline]
10309#[target_feature(enable = "avx512bw")]
10310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10311pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10312    a | b
10313}
10314
10315/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
10316///
10317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
10318#[inline]
10319#[target_feature(enable = "avx512bw")]
10320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10321pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10322    a | b
10323}
10324
10325/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
10326///
10327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
10328#[inline]
10329#[target_feature(enable = "avx512bw")]
10330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10331pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10332    a ^ b
10333}
10334
10335/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
10336///
10337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
10338#[inline]
10339#[target_feature(enable = "avx512bw")]
10340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10341pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10342    a ^ b
10343}
10344
10345/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
10346///
10347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
10348#[inline]
10349#[target_feature(enable = "avx512bw")]
10350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10351pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10352    _knot_mask32(a ^ b)
10353}
10354
10355/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
10356///
10357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
10358#[inline]
10359#[target_feature(enable = "avx512bw")]
10360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10361pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10362    _knot_mask64(a ^ b)
10363}
10364
10365/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10366/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10367///
10368/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
10369#[inline]
10370#[target_feature(enable = "avx512bw")]
10371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10372pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
10373    let tmp = _kor_mask32(a, b);
10374    *all_ones = (tmp == 0xffffffff) as u8;
10375    (tmp == 0) as u8
10376}
10377
10378/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10379/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10380///
10381/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
10382#[inline]
10383#[target_feature(enable = "avx512bw")]
10384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10385pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
10386    let tmp = _kor_mask64(a, b);
10387    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
10388    (tmp == 0) as u8
10389}
10390
10391/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10392/// store 0 in dst.
10393///
10394/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
10395#[inline]
10396#[target_feature(enable = "avx512bw")]
10397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10398pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10399    (_kor_mask32(a, b) == 0xffffffff) as u8
10400}
10401
10402/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10403/// store 0 in dst.
10404///
10405/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
10406#[inline]
10407#[target_feature(enable = "avx512bw")]
10408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10409pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10410    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
10411}
10412
10413/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10414/// store 0 in dst.
10415///
10416/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
10417#[inline]
10418#[target_feature(enable = "avx512bw")]
10419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10420pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10421    (_kor_mask32(a, b) == 0) as u8
10422}
10423
10424/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10425/// store 0 in dst.
10426///
10427/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
10428#[inline]
10429#[target_feature(enable = "avx512bw")]
10430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10431pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10432    (_kor_mask64(a, b) == 0) as u8
10433}
10434
10435/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10436///
10437/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
10438#[inline]
10439#[target_feature(enable = "avx512bw")]
10440#[rustc_legacy_const_generics(1)]
10441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10442pub fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10443    a.unbounded_shl(COUNT)
10444}
10445
10446/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10447///
10448/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
10449#[inline]
10450#[target_feature(enable = "avx512bw")]
10451#[rustc_legacy_const_generics(1)]
10452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10453pub fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10454    a.unbounded_shl(COUNT)
10455}
10456
10457/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10458///
10459/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
10460#[inline]
10461#[target_feature(enable = "avx512bw")]
10462#[rustc_legacy_const_generics(1)]
10463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10464pub fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10465    a.unbounded_shr(COUNT)
10466}
10467
10468/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10469///
10470/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
10471#[inline]
10472#[target_feature(enable = "avx512bw")]
10473#[rustc_legacy_const_generics(1)]
10474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10475pub fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10476    a.unbounded_shr(COUNT)
10477}
10478
10479/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
10480/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10481/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10482///
10483/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
10484#[inline]
10485#[target_feature(enable = "avx512bw")]
10486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10487pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
10488    *and_not = (_kandn_mask32(a, b) == 0) as u8;
10489    (_kand_mask32(a, b) == 0) as u8
10490}
10491
10492/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
10493/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10494/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10495///
10496/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
10497#[inline]
10498#[target_feature(enable = "avx512bw")]
10499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10500pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
10501    *and_not = (_kandn_mask64(a, b) == 0) as u8;
10502    (_kand_mask64(a, b) == 0) as u8
10503}
10504
10505/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
10506/// zeros, store 1 in dst, otherwise store 0 in dst.
10507///
10508/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
10509#[inline]
10510#[target_feature(enable = "avx512bw")]
10511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10512pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10513    (_kandn_mask32(a, b) == 0) as u8
10514}
10515
10516/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
10517/// zeros, store 1 in dst, otherwise store 0 in dst.
10518///
10519/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
10520#[inline]
10521#[target_feature(enable = "avx512bw")]
10522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10523pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10524    (_kandn_mask64(a, b) == 0) as u8
10525}
10526
10527/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10528/// store 0 in dst.
10529///
10530/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
10531#[inline]
10532#[target_feature(enable = "avx512bw")]
10533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10534pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10535    (_kand_mask32(a, b) == 0) as u8
10536}
10537
10538/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10539/// store 0 in dst.
10540///
10541/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
10542#[inline]
10543#[target_feature(enable = "avx512bw")]
10544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10545pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10546    (_kand_mask64(a, b) == 0) as u8
10547}
10548
10549/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
10550///
10551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
10552#[inline]
10553#[target_feature(enable = "avx512bw")]
10554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10555#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
10556pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
10557    ((a & 0xffff) << 16) | (b & 0xffff)
10558}
10559
10560/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
10561///
10562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
10563#[inline]
10564#[target_feature(enable = "avx512bw")]
10565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10566#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
10567pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
10568    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
10569}
10570
10571/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10572///
10573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
10574#[inline]
10575#[target_feature(enable = "avx512bw")]
10576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10577#[cfg_attr(test, assert_instr(vpmovwb))]
10578pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
10579    unsafe {
10580        let a = a.as_i16x32();
10581        transmute::<i8x32, _>(simd_cast(a))
10582    }
10583}
10584
10585/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10586///
10587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
10588#[inline]
10589#[target_feature(enable = "avx512bw")]
10590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10591#[cfg_attr(test, assert_instr(vpmovwb))]
10592pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10593    unsafe {
10594        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10595        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
10596    }
10597}
10598
10599/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10600///
10601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
10602#[inline]
10603#[target_feature(enable = "avx512bw")]
10604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10605#[cfg_attr(test, assert_instr(vpmovwb))]
10606pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10607    unsafe {
10608        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10609        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
10610    }
10611}
10612
10613/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10614///
10615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
10616#[inline]
10617#[target_feature(enable = "avx512bw,avx512vl")]
10618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10619#[cfg_attr(test, assert_instr(vpmovwb))]
10620pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
10621    unsafe {
10622        let a = a.as_i16x16();
10623        transmute::<i8x16, _>(simd_cast(a))
10624    }
10625}
10626
10627/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10628///
10629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
10630#[inline]
10631#[target_feature(enable = "avx512bw,avx512vl")]
10632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10633#[cfg_attr(test, assert_instr(vpmovwb))]
10634pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10635    unsafe {
10636        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10637        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10638    }
10639}
10640
10641/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10642///
10643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
10644#[inline]
10645#[target_feature(enable = "avx512bw,avx512vl")]
10646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10647#[cfg_attr(test, assert_instr(vpmovwb))]
10648pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10649    unsafe {
10650        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10651        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10652    }
10653}
10654
10655/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10656///
10657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
10658#[inline]
10659#[target_feature(enable = "avx512bw,avx512vl")]
10660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10661#[cfg_attr(test, assert_instr(vpmovwb))]
10662pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
10663    unsafe {
10664        let a = a.as_i16x8();
10665        let v256: i16x16 = simd_shuffle!(
10666            a,
10667            i16x8::ZERO,
10668            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
10669        );
10670        transmute::<i8x16, _>(simd_cast(v256))
10671    }
10672}
10673
10674/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10675///
10676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
10677#[inline]
10678#[target_feature(enable = "avx512bw,avx512vl")]
10679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10680#[cfg_attr(test, assert_instr(vpmovwb))]
10681pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10682    unsafe {
10683        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10684        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10685        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10686    }
10687}
10688
10689/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10690///
10691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
10692#[inline]
10693#[target_feature(enable = "avx512bw,avx512vl")]
10694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10695#[cfg_attr(test, assert_instr(vpmovwb))]
10696pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10697    unsafe {
10698        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10699        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10700        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10701    }
10702}
10703
10704/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10705///
10706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
10707#[inline]
10708#[target_feature(enable = "avx512bw")]
10709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10710#[cfg_attr(test, assert_instr(vpmovswb))]
10711pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
10712    unsafe {
10713        transmute(vpmovswb(
10714            a.as_i16x32(),
10715            i8x32::ZERO,
10716            0b11111111_11111111_11111111_11111111,
10717        ))
10718    }
10719}
10720
10721/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10722///
10723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
10724#[inline]
10725#[target_feature(enable = "avx512bw")]
10726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10727#[cfg_attr(test, assert_instr(vpmovswb))]
10728pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10729    unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
10730}
10731
10732/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10733///
10734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
10735#[inline]
10736#[target_feature(enable = "avx512bw")]
10737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10738#[cfg_attr(test, assert_instr(vpmovswb))]
10739pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10740    unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) }
10741}
10742
10743/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10744///
10745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
10746#[inline]
10747#[target_feature(enable = "avx512bw,avx512vl")]
10748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10749#[cfg_attr(test, assert_instr(vpmovswb))]
10750pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
10751    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
10752}
10753
10754/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10755///
10756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
10757#[inline]
10758#[target_feature(enable = "avx512bw,avx512vl")]
10759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10760#[cfg_attr(test, assert_instr(vpmovswb))]
10761pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10762    unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
10763}
10764
10765/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10766///
10767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
10768#[inline]
10769#[target_feature(enable = "avx512bw,avx512vl")]
10770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10771#[cfg_attr(test, assert_instr(vpmovswb))]
10772pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10773    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) }
10774}
10775
10776/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10777///
10778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
10779#[inline]
10780#[target_feature(enable = "avx512bw,avx512vl")]
10781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10782#[cfg_attr(test, assert_instr(vpmovswb))]
10783pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
10784    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
10785}
10786
10787/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10788///
10789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
10790#[inline]
10791#[target_feature(enable = "avx512bw,avx512vl")]
10792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10793#[cfg_attr(test, assert_instr(vpmovswb))]
10794pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10795    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
10796}
10797
10798/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10799///
10800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
10801#[inline]
10802#[target_feature(enable = "avx512bw,avx512vl")]
10803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10804#[cfg_attr(test, assert_instr(vpmovswb))]
10805pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10806    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
10807}
10808
10809/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10810///
10811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
10812#[inline]
10813#[target_feature(enable = "avx512bw")]
10814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10815#[cfg_attr(test, assert_instr(vpmovuswb))]
10816pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
10817    unsafe {
10818        transmute(vpmovuswb(
10819            a.as_u16x32(),
10820            u8x32::ZERO,
10821            0b11111111_11111111_11111111_11111111,
10822        ))
10823    }
10824}
10825
10826/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10827///
10828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
10829#[inline]
10830#[target_feature(enable = "avx512bw")]
10831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10832#[cfg_attr(test, assert_instr(vpmovuswb))]
10833pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10834    unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
10835}
10836
10837/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10838///
10839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
10840#[inline]
10841#[target_feature(enable = "avx512bw")]
10842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10843#[cfg_attr(test, assert_instr(vpmovuswb))]
10844pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10845    unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) }
10846}
10847
10848/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10849///
10850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
10851#[inline]
10852#[target_feature(enable = "avx512bw,avx512vl")]
10853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10854#[cfg_attr(test, assert_instr(vpmovuswb))]
10855pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
10856    unsafe {
10857        transmute(vpmovuswb256(
10858            a.as_u16x16(),
10859            u8x16::ZERO,
10860            0b11111111_11111111,
10861        ))
10862    }
10863}
10864
10865/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10866///
10867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
10868#[inline]
10869#[target_feature(enable = "avx512bw,avx512vl")]
10870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10871#[cfg_attr(test, assert_instr(vpmovuswb))]
10872pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10873    unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
10874}
10875
10876/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10877///
10878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
10879#[inline]
10880#[target_feature(enable = "avx512bw,avx512vl")]
10881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10882#[cfg_attr(test, assert_instr(vpmovuswb))]
10883pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10884    unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) }
10885}
10886
10887/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10888///
10889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
10890#[inline]
10891#[target_feature(enable = "avx512bw,avx512vl")]
10892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10893#[cfg_attr(test, assert_instr(vpmovuswb))]
10894pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
10895    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
10896}
10897
10898/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10899///
10900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
10901#[inline]
10902#[target_feature(enable = "avx512bw,avx512vl")]
10903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10904#[cfg_attr(test, assert_instr(vpmovuswb))]
10905pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10906    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
10907}
10908
10909/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10910///
10911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
10912#[inline]
10913#[target_feature(enable = "avx512bw,avx512vl")]
10914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10915#[cfg_attr(test, assert_instr(vpmovuswb))]
10916pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10917    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
10918}
10919
10920/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
10921///
10922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
10923#[inline]
10924#[target_feature(enable = "avx512bw")]
10925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10926#[cfg_attr(test, assert_instr(vpmovsxbw))]
10927pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
10928    unsafe {
10929        let a = a.as_i8x32();
10930        transmute::<i16x32, _>(simd_cast(a))
10931    }
10932}
10933
10934/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
10937#[inline]
10938#[target_feature(enable = "avx512bw")]
10939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10940#[cfg_attr(test, assert_instr(vpmovsxbw))]
10941pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
10942    unsafe {
10943        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10944        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
10945    }
10946}
10947
10948/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10949///
10950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
10951#[inline]
10952#[target_feature(enable = "avx512bw")]
10953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10954#[cfg_attr(test, assert_instr(vpmovsxbw))]
10955pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
10956    unsafe {
10957        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10958        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
10959    }
10960}
10961
10962/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10963///
10964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
10965#[inline]
10966#[target_feature(enable = "avx512bw,avx512vl")]
10967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10968#[cfg_attr(test, assert_instr(vpmovsxbw))]
10969pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
10970    unsafe {
10971        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10972        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
10973    }
10974}
10975
10976/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10977///
10978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
10979#[inline]
10980#[target_feature(enable = "avx512bw,avx512vl")]
10981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10982#[cfg_attr(test, assert_instr(vpmovsxbw))]
10983pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
10984    unsafe {
10985        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10986        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
10987    }
10988}
10989
10990/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10991///
10992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
10993#[inline]
10994#[target_feature(enable = "avx512bw,avx512vl")]
10995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10996#[cfg_attr(test, assert_instr(vpmovsxbw))]
10997pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10998    unsafe {
10999        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11000        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11001    }
11002}
11003
11004/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11005///
11006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
11007#[inline]
11008#[target_feature(enable = "avx512bw,avx512vl")]
11009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11010#[cfg_attr(test, assert_instr(vpmovsxbw))]
11011pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11012    unsafe {
11013        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11014        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11015    }
11016}
11017
11018/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11019///
11020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11021#[inline]
11022#[target_feature(enable = "avx512bw")]
11023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11024#[cfg_attr(test, assert_instr(vpmovzxbw))]
11025pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11026    unsafe {
11027        let a = a.as_u8x32();
11028        transmute::<i16x32, _>(simd_cast(a))
11029    }
11030}
11031
11032/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11033///
11034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11035#[inline]
11036#[target_feature(enable = "avx512bw")]
11037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11038#[cfg_attr(test, assert_instr(vpmovzxbw))]
11039pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11040    unsafe {
11041        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11042        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11043    }
11044}
11045
11046/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11047///
11048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11049#[inline]
11050#[target_feature(enable = "avx512bw")]
11051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11052#[cfg_attr(test, assert_instr(vpmovzxbw))]
11053pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11054    unsafe {
11055        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11056        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11057    }
11058}
11059
11060/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11061///
11062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11063#[inline]
11064#[target_feature(enable = "avx512bw,avx512vl")]
11065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11066#[cfg_attr(test, assert_instr(vpmovzxbw))]
11067pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11068    unsafe {
11069        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11070        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11071    }
11072}
11073
11074/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11075///
11076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11077#[inline]
11078#[target_feature(enable = "avx512bw,avx512vl")]
11079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11080#[cfg_attr(test, assert_instr(vpmovzxbw))]
11081pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11082    unsafe {
11083        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11084        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11085    }
11086}
11087
11088/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11089///
11090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11091#[inline]
11092#[target_feature(enable = "avx512bw,avx512vl")]
11093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11094#[cfg_attr(test, assert_instr(vpmovzxbw))]
11095pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11096    unsafe {
11097        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11098        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11099    }
11100}
11101
11102/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11103///
11104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
11105#[inline]
11106#[target_feature(enable = "avx512bw,avx512vl")]
11107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11108#[cfg_attr(test, assert_instr(vpmovzxbw))]
11109pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11110    unsafe {
11111        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11112        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11113    }
11114}
11115
11116/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
11117///
11118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
11119#[inline]
11120#[target_feature(enable = "avx512bw")]
11121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11122#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
11123#[rustc_legacy_const_generics(1)]
11124pub fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11125    unsafe {
11126        static_assert_uimm_bits!(IMM8, 8);
11127        const fn mask(shift: i32, i: u32) -> u32 {
11128            let shift = shift as u32 & 0xff;
11129            if shift > 15 || i % 16 < shift {
11130                0
11131            } else {
11132                64 + (i - shift)
11133            }
11134        }
11135        let a = a.as_i8x64();
11136        let zero = i8x64::ZERO;
11137        let r: i8x64 = simd_shuffle!(
11138            zero,
11139            a,
11140            [
11141                mask(IMM8, 0),
11142                mask(IMM8, 1),
11143                mask(IMM8, 2),
11144                mask(IMM8, 3),
11145                mask(IMM8, 4),
11146                mask(IMM8, 5),
11147                mask(IMM8, 6),
11148                mask(IMM8, 7),
11149                mask(IMM8, 8),
11150                mask(IMM8, 9),
11151                mask(IMM8, 10),
11152                mask(IMM8, 11),
11153                mask(IMM8, 12),
11154                mask(IMM8, 13),
11155                mask(IMM8, 14),
11156                mask(IMM8, 15),
11157                mask(IMM8, 16),
11158                mask(IMM8, 17),
11159                mask(IMM8, 18),
11160                mask(IMM8, 19),
11161                mask(IMM8, 20),
11162                mask(IMM8, 21),
11163                mask(IMM8, 22),
11164                mask(IMM8, 23),
11165                mask(IMM8, 24),
11166                mask(IMM8, 25),
11167                mask(IMM8, 26),
11168                mask(IMM8, 27),
11169                mask(IMM8, 28),
11170                mask(IMM8, 29),
11171                mask(IMM8, 30),
11172                mask(IMM8, 31),
11173                mask(IMM8, 32),
11174                mask(IMM8, 33),
11175                mask(IMM8, 34),
11176                mask(IMM8, 35),
11177                mask(IMM8, 36),
11178                mask(IMM8, 37),
11179                mask(IMM8, 38),
11180                mask(IMM8, 39),
11181                mask(IMM8, 40),
11182                mask(IMM8, 41),
11183                mask(IMM8, 42),
11184                mask(IMM8, 43),
11185                mask(IMM8, 44),
11186                mask(IMM8, 45),
11187                mask(IMM8, 46),
11188                mask(IMM8, 47),
11189                mask(IMM8, 48),
11190                mask(IMM8, 49),
11191                mask(IMM8, 50),
11192                mask(IMM8, 51),
11193                mask(IMM8, 52),
11194                mask(IMM8, 53),
11195                mask(IMM8, 54),
11196                mask(IMM8, 55),
11197                mask(IMM8, 56),
11198                mask(IMM8, 57),
11199                mask(IMM8, 58),
11200                mask(IMM8, 59),
11201                mask(IMM8, 60),
11202                mask(IMM8, 61),
11203                mask(IMM8, 62),
11204                mask(IMM8, 63),
11205            ],
11206        );
11207        transmute(r)
11208    }
11209}
11210
11211/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
11212///
11213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
11214#[inline]
11215#[target_feature(enable = "avx512bw")]
11216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11217#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
11218#[rustc_legacy_const_generics(1)]
11219pub fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11220    unsafe {
11221        static_assert_uimm_bits!(IMM8, 8);
11222        const fn mask(shift: i32, i: u32) -> u32 {
11223            let shift = shift as u32 & 0xff;
11224            if shift > 15 || (15 - (i % 16)) < shift {
11225                0
11226            } else {
11227                64 + (i + shift)
11228            }
11229        }
11230        let a = a.as_i8x64();
11231        let zero = i8x64::ZERO;
11232        let r: i8x64 = simd_shuffle!(
11233            zero,
11234            a,
11235            [
11236                mask(IMM8, 0),
11237                mask(IMM8, 1),
11238                mask(IMM8, 2),
11239                mask(IMM8, 3),
11240                mask(IMM8, 4),
11241                mask(IMM8, 5),
11242                mask(IMM8, 6),
11243                mask(IMM8, 7),
11244                mask(IMM8, 8),
11245                mask(IMM8, 9),
11246                mask(IMM8, 10),
11247                mask(IMM8, 11),
11248                mask(IMM8, 12),
11249                mask(IMM8, 13),
11250                mask(IMM8, 14),
11251                mask(IMM8, 15),
11252                mask(IMM8, 16),
11253                mask(IMM8, 17),
11254                mask(IMM8, 18),
11255                mask(IMM8, 19),
11256                mask(IMM8, 20),
11257                mask(IMM8, 21),
11258                mask(IMM8, 22),
11259                mask(IMM8, 23),
11260                mask(IMM8, 24),
11261                mask(IMM8, 25),
11262                mask(IMM8, 26),
11263                mask(IMM8, 27),
11264                mask(IMM8, 28),
11265                mask(IMM8, 29),
11266                mask(IMM8, 30),
11267                mask(IMM8, 31),
11268                mask(IMM8, 32),
11269                mask(IMM8, 33),
11270                mask(IMM8, 34),
11271                mask(IMM8, 35),
11272                mask(IMM8, 36),
11273                mask(IMM8, 37),
11274                mask(IMM8, 38),
11275                mask(IMM8, 39),
11276                mask(IMM8, 40),
11277                mask(IMM8, 41),
11278                mask(IMM8, 42),
11279                mask(IMM8, 43),
11280                mask(IMM8, 44),
11281                mask(IMM8, 45),
11282                mask(IMM8, 46),
11283                mask(IMM8, 47),
11284                mask(IMM8, 48),
11285                mask(IMM8, 49),
11286                mask(IMM8, 50),
11287                mask(IMM8, 51),
11288                mask(IMM8, 52),
11289                mask(IMM8, 53),
11290                mask(IMM8, 54),
11291                mask(IMM8, 55),
11292                mask(IMM8, 56),
11293                mask(IMM8, 57),
11294                mask(IMM8, 58),
11295                mask(IMM8, 59),
11296                mask(IMM8, 60),
11297                mask(IMM8, 61),
11298                mask(IMM8, 62),
11299                mask(IMM8, 63),
11300            ],
11301        );
11302        transmute(r)
11303    }
11304}
11305
11306/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
11307/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
11308/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
11309///
11310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
11311#[inline]
11312#[target_feature(enable = "avx512bw")]
11313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11314#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11315#[rustc_legacy_const_generics(2)]
11316pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
11317    const fn mask(shift: u32, i: u32) -> u32 {
11318        let shift = shift % 16;
11319        let mod_i = i % 16;
11320        if mod_i < (16 - shift) {
11321            i + shift
11322        } else {
11323            i + 48 + shift
11324        }
11325    }
11326
11327    // If palignr is shifting the pair of vectors more than the size of two
11328    // lanes, emit zero.
11329    if IMM8 >= 32 {
11330        return _mm512_setzero_si512();
11331    }
11332    // If palignr is shifting the pair of input vectors more than one lane,
11333    // but less than two lanes, convert to shifting in zeroes.
11334    let (a, b) = if IMM8 > 16 {
11335        (_mm512_setzero_si512(), a)
11336    } else {
11337        (a, b)
11338    };
11339    unsafe {
11340        if IMM8 == 16 {
11341            return transmute(a);
11342        }
11343
11344        let r: i8x64 = simd_shuffle!(
11345            b.as_i8x64(),
11346            a.as_i8x64(),
11347            [
11348                mask(IMM8 as u32, 0),
11349                mask(IMM8 as u32, 1),
11350                mask(IMM8 as u32, 2),
11351                mask(IMM8 as u32, 3),
11352                mask(IMM8 as u32, 4),
11353                mask(IMM8 as u32, 5),
11354                mask(IMM8 as u32, 6),
11355                mask(IMM8 as u32, 7),
11356                mask(IMM8 as u32, 8),
11357                mask(IMM8 as u32, 9),
11358                mask(IMM8 as u32, 10),
11359                mask(IMM8 as u32, 11),
11360                mask(IMM8 as u32, 12),
11361                mask(IMM8 as u32, 13),
11362                mask(IMM8 as u32, 14),
11363                mask(IMM8 as u32, 15),
11364                mask(IMM8 as u32, 16),
11365                mask(IMM8 as u32, 17),
11366                mask(IMM8 as u32, 18),
11367                mask(IMM8 as u32, 19),
11368                mask(IMM8 as u32, 20),
11369                mask(IMM8 as u32, 21),
11370                mask(IMM8 as u32, 22),
11371                mask(IMM8 as u32, 23),
11372                mask(IMM8 as u32, 24),
11373                mask(IMM8 as u32, 25),
11374                mask(IMM8 as u32, 26),
11375                mask(IMM8 as u32, 27),
11376                mask(IMM8 as u32, 28),
11377                mask(IMM8 as u32, 29),
11378                mask(IMM8 as u32, 30),
11379                mask(IMM8 as u32, 31),
11380                mask(IMM8 as u32, 32),
11381                mask(IMM8 as u32, 33),
11382                mask(IMM8 as u32, 34),
11383                mask(IMM8 as u32, 35),
11384                mask(IMM8 as u32, 36),
11385                mask(IMM8 as u32, 37),
11386                mask(IMM8 as u32, 38),
11387                mask(IMM8 as u32, 39),
11388                mask(IMM8 as u32, 40),
11389                mask(IMM8 as u32, 41),
11390                mask(IMM8 as u32, 42),
11391                mask(IMM8 as u32, 43),
11392                mask(IMM8 as u32, 44),
11393                mask(IMM8 as u32, 45),
11394                mask(IMM8 as u32, 46),
11395                mask(IMM8 as u32, 47),
11396                mask(IMM8 as u32, 48),
11397                mask(IMM8 as u32, 49),
11398                mask(IMM8 as u32, 50),
11399                mask(IMM8 as u32, 51),
11400                mask(IMM8 as u32, 52),
11401                mask(IMM8 as u32, 53),
11402                mask(IMM8 as u32, 54),
11403                mask(IMM8 as u32, 55),
11404                mask(IMM8 as u32, 56),
11405                mask(IMM8 as u32, 57),
11406                mask(IMM8 as u32, 58),
11407                mask(IMM8 as u32, 59),
11408                mask(IMM8 as u32, 60),
11409                mask(IMM8 as u32, 61),
11410                mask(IMM8 as u32, 62),
11411                mask(IMM8 as u32, 63),
11412            ],
11413        );
11414        transmute(r)
11415    }
11416}
11417
11418/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11419///
11420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
11421#[inline]
11422#[target_feature(enable = "avx512bw")]
11423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11424#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11425#[rustc_legacy_const_generics(4)]
11426pub fn _mm512_mask_alignr_epi8<const IMM8: i32>(
11427    src: __m512i,
11428    k: __mmask64,
11429    a: __m512i,
11430    b: __m512i,
11431) -> __m512i {
11432    unsafe {
11433        static_assert_uimm_bits!(IMM8, 8);
11434        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11435        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
11436    }
11437}
11438
11439/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11440///
11441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
11442#[inline]
11443#[target_feature(enable = "avx512bw")]
11444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11445#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11446#[rustc_legacy_const_generics(3)]
11447pub fn _mm512_maskz_alignr_epi8<const IMM8: i32>(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
11448    unsafe {
11449        static_assert_uimm_bits!(IMM8, 8);
11450        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11451        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
11452    }
11453}
11454
11455/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
11458#[inline]
11459#[target_feature(enable = "avx512bw,avx512vl")]
11460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11461#[rustc_legacy_const_generics(4)]
11462#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11463pub fn _mm256_mask_alignr_epi8<const IMM8: i32>(
11464    src: __m256i,
11465    k: __mmask32,
11466    a: __m256i,
11467    b: __m256i,
11468) -> __m256i {
11469    unsafe {
11470        static_assert_uimm_bits!(IMM8, 8);
11471        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11472        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
11473    }
11474}
11475
11476/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11477///
11478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
11479#[inline]
11480#[target_feature(enable = "avx512bw,avx512vl")]
11481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11482#[rustc_legacy_const_generics(3)]
11483#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11484pub fn _mm256_maskz_alignr_epi8<const IMM8: i32>(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
11485    unsafe {
11486        static_assert_uimm_bits!(IMM8, 8);
11487        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11488        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
11489    }
11490}
11491
11492/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11493///
11494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
11495#[inline]
11496#[target_feature(enable = "avx512bw,avx512vl")]
11497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11498#[rustc_legacy_const_generics(4)]
11499#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11500pub fn _mm_mask_alignr_epi8<const IMM8: i32>(
11501    src: __m128i,
11502    k: __mmask16,
11503    a: __m128i,
11504    b: __m128i,
11505) -> __m128i {
11506    unsafe {
11507        static_assert_uimm_bits!(IMM8, 8);
11508        let r = _mm_alignr_epi8::<IMM8>(a, b);
11509        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
11510    }
11511}
11512
11513/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11514///
11515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
11516#[inline]
11517#[target_feature(enable = "avx512bw,avx512vl")]
11518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11519#[rustc_legacy_const_generics(3)]
11520#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11521pub fn _mm_maskz_alignr_epi8<const IMM8: i32>(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
11522    unsafe {
11523        static_assert_uimm_bits!(IMM8, 8);
11524        let r = _mm_alignr_epi8::<IMM8>(a, b);
11525        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
11526    }
11527}
11528
11529/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11530///
11531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
11532#[inline]
11533#[target_feature(enable = "avx512bw")]
11534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11535#[cfg_attr(test, assert_instr(vpmovswb))]
11536pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11537    vpmovswbmem(mem_addr, a.as_i16x32(), k);
11538}
11539
11540/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
11543#[inline]
11544#[target_feature(enable = "avx512bw,avx512vl")]
11545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11546#[cfg_attr(test, assert_instr(vpmovswb))]
11547pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11548    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
11549}
11550
11551/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11552///
11553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
11554#[inline]
11555#[target_feature(enable = "avx512bw,avx512vl")]
11556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11557#[cfg_attr(test, assert_instr(vpmovswb))]
11558pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11559    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
11560}
11561
11562/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11563///
11564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
11565#[inline]
11566#[target_feature(enable = "avx512bw")]
11567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11568#[cfg_attr(test, assert_instr(vpmovwb))]
11569pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11570    vpmovwbmem(mem_addr, a.as_i16x32(), k);
11571}
11572
11573/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11574///
11575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
11576#[inline]
11577#[target_feature(enable = "avx512bw,avx512vl")]
11578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11579#[cfg_attr(test, assert_instr(vpmovwb))]
11580pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11581    vpmovwbmem256(mem_addr, a.as_i16x16(), k);
11582}
11583
11584/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11585///
11586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
11587#[inline]
11588#[target_feature(enable = "avx512bw,avx512vl")]
11589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11590#[cfg_attr(test, assert_instr(vpmovwb))]
11591pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11592    vpmovwbmem128(mem_addr, a.as_i16x8(), k);
11593}
11594
11595/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11596///
11597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
11598#[inline]
11599#[target_feature(enable = "avx512bw")]
11600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11601#[cfg_attr(test, assert_instr(vpmovuswb))]
11602pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11603    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
11604}
11605
11606/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11607///
11608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
11609#[inline]
11610#[target_feature(enable = "avx512bw,avx512vl")]
11611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11612#[cfg_attr(test, assert_instr(vpmovuswb))]
11613pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11614    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
11615}
11616
11617/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11618///
11619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
11620#[inline]
11621#[target_feature(enable = "avx512bw,avx512vl")]
11622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11623#[cfg_attr(test, assert_instr(vpmovuswb))]
11624pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11625    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
11626}
11627
11628#[allow(improper_ctypes)]
11629unsafe extern "C" {
11630    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
11631    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
11632
11633    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
11634    fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
11635
11636    #[link_name = "llvm.x86.avx512.packssdw.512"]
11637    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
11638    #[link_name = "llvm.x86.avx512.packsswb.512"]
11639    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
11640    #[link_name = "llvm.x86.avx512.packusdw.512"]
11641    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
11642    #[link_name = "llvm.x86.avx512.packuswb.512"]
11643    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
11644
11645    #[link_name = "llvm.x86.avx512.psll.w.512"]
11646    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
11647
11648    #[link_name = "llvm.x86.avx512.psllv.w.512"]
11649    fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
11650    #[link_name = "llvm.x86.avx512.psllv.w.256"]
11651    fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
11652    #[link_name = "llvm.x86.avx512.psllv.w.128"]
11653    fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
11654
11655    #[link_name = "llvm.x86.avx512.psrl.w.512"]
11656    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
11657
11658    #[link_name = "llvm.x86.avx512.psrlv.w.512"]
11659    fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
11660    #[link_name = "llvm.x86.avx512.psrlv.w.256"]
11661    fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
11662    #[link_name = "llvm.x86.avx512.psrlv.w.128"]
11663    fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
11664
11665    #[link_name = "llvm.x86.avx512.psra.w.512"]
11666    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
11667
11668    #[link_name = "llvm.x86.avx512.psrav.w.512"]
11669    fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
11670    #[link_name = "llvm.x86.avx512.psrav.w.256"]
11671    fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
11672    #[link_name = "llvm.x86.avx512.psrav.w.128"]
11673    fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
11674
11675    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
11676    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
11677    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
11678    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
11679    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
11680    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
11681
11682    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
11683    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
11684    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
11685    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
11686    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
11687    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
11688
11689    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
11690    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
11691
11692    #[link_name = "llvm.x86.avx512.psad.bw.512"]
11693    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
11694
11695    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
11696    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
11697    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
11698    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
11699    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
11700    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
11701
11702    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
11703    fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
11704    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
11705    fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
11706    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
11707    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
11708
11709    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
11710    fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
11711    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
11712    fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
11713    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
11714    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
11715
11716    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
11717    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11718    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
11719    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11720    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
11721    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11722
11723    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
11724    fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11725    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
11726    fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11727    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
11728    fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11729
11730    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
11731    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11732    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
11733    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11734    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
11735    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11736
11737    #[link_name = "llvm.x86.avx512.mask.loadu.b.128"]
11738    fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
11739    #[link_name = "llvm.x86.avx512.mask.loadu.w.128"]
11740    fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
11741    #[link_name = "llvm.x86.avx512.mask.loadu.b.256"]
11742    fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
11743    #[link_name = "llvm.x86.avx512.mask.loadu.w.256"]
11744    fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
11745    #[link_name = "llvm.x86.avx512.mask.loadu.b.512"]
11746    fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
11747    #[link_name = "llvm.x86.avx512.mask.loadu.w.512"]
11748    fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
11749
11750    #[link_name = "llvm.x86.avx512.mask.storeu.b.128"]
11751    fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16);
11752    #[link_name = "llvm.x86.avx512.mask.storeu.w.128"]
11753    fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8);
11754    #[link_name = "llvm.x86.avx512.mask.storeu.b.256"]
11755    fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32);
11756    #[link_name = "llvm.x86.avx512.mask.storeu.w.256"]
11757    fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16);
11758    #[link_name = "llvm.x86.avx512.mask.storeu.b.512"]
11759    fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64);
11760    #[link_name = "llvm.x86.avx512.mask.storeu.w.512"]
11761    fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32);
11762
11763}
11764
11765#[cfg(test)]
11766mod tests {
11767
11768    use stdarch_test::simd_test;
11769
11770    use crate::core_arch::x86::*;
11771    use crate::hint::black_box;
11772    use crate::mem::{self};
11773
11774    #[simd_test(enable = "avx512bw")]
11775    unsafe fn test_mm512_abs_epi16() {
11776        let a = _mm512_set1_epi16(-1);
11777        let r = _mm512_abs_epi16(a);
11778        let e = _mm512_set1_epi16(1);
11779        assert_eq_m512i(r, e);
11780    }
11781
11782    #[simd_test(enable = "avx512bw")]
11783    unsafe fn test_mm512_mask_abs_epi16() {
11784        let a = _mm512_set1_epi16(-1);
11785        let r = _mm512_mask_abs_epi16(a, 0, a);
11786        assert_eq_m512i(r, a);
11787        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
11788        #[rustfmt::skip]
11789        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11790                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11791        assert_eq_m512i(r, e);
11792    }
11793
11794    #[simd_test(enable = "avx512bw")]
11795    unsafe fn test_mm512_maskz_abs_epi16() {
11796        let a = _mm512_set1_epi16(-1);
11797        let r = _mm512_maskz_abs_epi16(0, a);
11798        assert_eq_m512i(r, _mm512_setzero_si512());
11799        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
11800        #[rustfmt::skip]
11801        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11802                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11803        assert_eq_m512i(r, e);
11804    }
11805
11806    #[simd_test(enable = "avx512bw,avx512vl")]
11807    unsafe fn test_mm256_mask_abs_epi16() {
11808        let a = _mm256_set1_epi16(-1);
11809        let r = _mm256_mask_abs_epi16(a, 0, a);
11810        assert_eq_m256i(r, a);
11811        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
11812        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11813        assert_eq_m256i(r, e);
11814    }
11815
11816    #[simd_test(enable = "avx512bw,avx512vl")]
11817    unsafe fn test_mm256_maskz_abs_epi16() {
11818        let a = _mm256_set1_epi16(-1);
11819        let r = _mm256_maskz_abs_epi16(0, a);
11820        assert_eq_m256i(r, _mm256_setzero_si256());
11821        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
11822        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11823        assert_eq_m256i(r, e);
11824    }
11825
11826    #[simd_test(enable = "avx512bw,avx512vl")]
11827    unsafe fn test_mm_mask_abs_epi16() {
11828        let a = _mm_set1_epi16(-1);
11829        let r = _mm_mask_abs_epi16(a, 0, a);
11830        assert_eq_m128i(r, a);
11831        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
11832        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
11833        assert_eq_m128i(r, e);
11834    }
11835
11836    #[simd_test(enable = "avx512bw,avx512vl")]
11837    unsafe fn test_mm_maskz_abs_epi16() {
11838        let a = _mm_set1_epi16(-1);
11839        let r = _mm_maskz_abs_epi16(0, a);
11840        assert_eq_m128i(r, _mm_setzero_si128());
11841        let r = _mm_maskz_abs_epi16(0b00001111, a);
11842        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
11843        assert_eq_m128i(r, e);
11844    }
11845
11846    #[simd_test(enable = "avx512bw")]
11847    unsafe fn test_mm512_abs_epi8() {
11848        let a = _mm512_set1_epi8(-1);
11849        let r = _mm512_abs_epi8(a);
11850        let e = _mm512_set1_epi8(1);
11851        assert_eq_m512i(r, e);
11852    }
11853
11854    #[simd_test(enable = "avx512bw")]
11855    unsafe fn test_mm512_mask_abs_epi8() {
11856        let a = _mm512_set1_epi8(-1);
11857        let r = _mm512_mask_abs_epi8(a, 0, a);
11858        assert_eq_m512i(r, a);
11859        let r = _mm512_mask_abs_epi8(
11860            a,
11861            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11862            a,
11863        );
11864        #[rustfmt::skip]
11865        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11866                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11867                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11868                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11869        assert_eq_m512i(r, e);
11870    }
11871
11872    #[simd_test(enable = "avx512bw")]
11873    unsafe fn test_mm512_maskz_abs_epi8() {
11874        let a = _mm512_set1_epi8(-1);
11875        let r = _mm512_maskz_abs_epi8(0, a);
11876        assert_eq_m512i(r, _mm512_setzero_si512());
11877        let r = _mm512_maskz_abs_epi8(
11878            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11879            a,
11880        );
11881        #[rustfmt::skip]
11882        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11883                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11884                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11885                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11886        assert_eq_m512i(r, e);
11887    }
11888
11889    #[simd_test(enable = "avx512bw,avx512vl")]
11890    unsafe fn test_mm256_mask_abs_epi8() {
11891        let a = _mm256_set1_epi8(-1);
11892        let r = _mm256_mask_abs_epi8(a, 0, a);
11893        assert_eq_m256i(r, a);
11894        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
11895        #[rustfmt::skip]
11896        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11897                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11898        assert_eq_m256i(r, e);
11899    }
11900
11901    #[simd_test(enable = "avx512bw,avx512vl")]
11902    unsafe fn test_mm256_maskz_abs_epi8() {
11903        let a = _mm256_set1_epi8(-1);
11904        let r = _mm256_maskz_abs_epi8(0, a);
11905        assert_eq_m256i(r, _mm256_setzero_si256());
11906        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
11907        #[rustfmt::skip]
11908        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11909                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11910        assert_eq_m256i(r, e);
11911    }
11912
11913    #[simd_test(enable = "avx512bw,avx512vl")]
11914    unsafe fn test_mm_mask_abs_epi8() {
11915        let a = _mm_set1_epi8(-1);
11916        let r = _mm_mask_abs_epi8(a, 0, a);
11917        assert_eq_m128i(r, a);
11918        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
11919        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11920        assert_eq_m128i(r, e);
11921    }
11922
11923    #[simd_test(enable = "avx512bw,avx512vl")]
11924    unsafe fn test_mm_maskz_abs_epi8() {
11925        let a = _mm_set1_epi8(-1);
11926        let r = _mm_maskz_abs_epi8(0, a);
11927        assert_eq_m128i(r, _mm_setzero_si128());
11928        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
11929        #[rustfmt::skip]
11930        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11931        assert_eq_m128i(r, e);
11932    }
11933
11934    #[simd_test(enable = "avx512bw")]
11935    unsafe fn test_mm512_add_epi16() {
11936        let a = _mm512_set1_epi16(1);
11937        let b = _mm512_set1_epi16(2);
11938        let r = _mm512_add_epi16(a, b);
11939        let e = _mm512_set1_epi16(3);
11940        assert_eq_m512i(r, e);
11941    }
11942
11943    #[simd_test(enable = "avx512bw")]
11944    unsafe fn test_mm512_mask_add_epi16() {
11945        let a = _mm512_set1_epi16(1);
11946        let b = _mm512_set1_epi16(2);
11947        let r = _mm512_mask_add_epi16(a, 0, a, b);
11948        assert_eq_m512i(r, a);
11949        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
11950        #[rustfmt::skip]
11951        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11952                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11953        assert_eq_m512i(r, e);
11954    }
11955
11956    #[simd_test(enable = "avx512bw")]
11957    unsafe fn test_mm512_maskz_add_epi16() {
11958        let a = _mm512_set1_epi16(1);
11959        let b = _mm512_set1_epi16(2);
11960        let r = _mm512_maskz_add_epi16(0, a, b);
11961        assert_eq_m512i(r, _mm512_setzero_si512());
11962        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
11963        #[rustfmt::skip]
11964        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11965                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11966        assert_eq_m512i(r, e);
11967    }
11968
11969    #[simd_test(enable = "avx512bw,avx512vl")]
11970    unsafe fn test_mm256_mask_add_epi16() {
11971        let a = _mm256_set1_epi16(1);
11972        let b = _mm256_set1_epi16(2);
11973        let r = _mm256_mask_add_epi16(a, 0, a, b);
11974        assert_eq_m256i(r, a);
11975        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
11976        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11977        assert_eq_m256i(r, e);
11978    }
11979
11980    #[simd_test(enable = "avx512bw,avx512vl")]
11981    unsafe fn test_mm256_maskz_add_epi16() {
11982        let a = _mm256_set1_epi16(1);
11983        let b = _mm256_set1_epi16(2);
11984        let r = _mm256_maskz_add_epi16(0, a, b);
11985        assert_eq_m256i(r, _mm256_setzero_si256());
11986        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
11987        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11988        assert_eq_m256i(r, e);
11989    }
11990
11991    #[simd_test(enable = "avx512bw,avx512vl")]
11992    unsafe fn test_mm_mask_add_epi16() {
11993        let a = _mm_set1_epi16(1);
11994        let b = _mm_set1_epi16(2);
11995        let r = _mm_mask_add_epi16(a, 0, a, b);
11996        assert_eq_m128i(r, a);
11997        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
11998        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
11999        assert_eq_m128i(r, e);
12000    }
12001
12002    #[simd_test(enable = "avx512bw,avx512vl")]
12003    unsafe fn test_mm_maskz_add_epi16() {
12004        let a = _mm_set1_epi16(1);
12005        let b = _mm_set1_epi16(2);
12006        let r = _mm_maskz_add_epi16(0, a, b);
12007        assert_eq_m128i(r, _mm_setzero_si128());
12008        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12009        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12010        assert_eq_m128i(r, e);
12011    }
12012
12013    #[simd_test(enable = "avx512bw")]
12014    unsafe fn test_mm512_add_epi8() {
12015        let a = _mm512_set1_epi8(1);
12016        let b = _mm512_set1_epi8(2);
12017        let r = _mm512_add_epi8(a, b);
12018        let e = _mm512_set1_epi8(3);
12019        assert_eq_m512i(r, e);
12020    }
12021
12022    #[simd_test(enable = "avx512bw")]
12023    unsafe fn test_mm512_mask_add_epi8() {
12024        let a = _mm512_set1_epi8(1);
12025        let b = _mm512_set1_epi8(2);
12026        let r = _mm512_mask_add_epi8(a, 0, a, b);
12027        assert_eq_m512i(r, a);
12028        let r = _mm512_mask_add_epi8(
12029            a,
12030            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12031            a,
12032            b,
12033        );
12034        #[rustfmt::skip]
12035        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12036                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12037                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12038                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12039        assert_eq_m512i(r, e);
12040    }
12041
12042    #[simd_test(enable = "avx512bw")]
12043    unsafe fn test_mm512_maskz_add_epi8() {
12044        let a = _mm512_set1_epi8(1);
12045        let b = _mm512_set1_epi8(2);
12046        let r = _mm512_maskz_add_epi8(0, a, b);
12047        assert_eq_m512i(r, _mm512_setzero_si512());
12048        let r = _mm512_maskz_add_epi8(
12049            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12050            a,
12051            b,
12052        );
12053        #[rustfmt::skip]
12054        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12055                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12056                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12057                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12058        assert_eq_m512i(r, e);
12059    }
12060
12061    #[simd_test(enable = "avx512bw,avx512vl")]
12062    unsafe fn test_mm256_mask_add_epi8() {
12063        let a = _mm256_set1_epi8(1);
12064        let b = _mm256_set1_epi8(2);
12065        let r = _mm256_mask_add_epi8(a, 0, a, b);
12066        assert_eq_m256i(r, a);
12067        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12068        #[rustfmt::skip]
12069        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12070                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12071        assert_eq_m256i(r, e);
12072    }
12073
12074    #[simd_test(enable = "avx512bw,avx512vl")]
12075    unsafe fn test_mm256_maskz_add_epi8() {
12076        let a = _mm256_set1_epi8(1);
12077        let b = _mm256_set1_epi8(2);
12078        let r = _mm256_maskz_add_epi8(0, a, b);
12079        assert_eq_m256i(r, _mm256_setzero_si256());
12080        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12081        #[rustfmt::skip]
12082        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12083                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12084        assert_eq_m256i(r, e);
12085    }
12086
12087    #[simd_test(enable = "avx512bw,avx512vl")]
12088    unsafe fn test_mm_mask_add_epi8() {
12089        let a = _mm_set1_epi8(1);
12090        let b = _mm_set1_epi8(2);
12091        let r = _mm_mask_add_epi8(a, 0, a, b);
12092        assert_eq_m128i(r, a);
12093        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12094        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12095        assert_eq_m128i(r, e);
12096    }
12097
12098    #[simd_test(enable = "avx512bw,avx512vl")]
12099    unsafe fn test_mm_maskz_add_epi8() {
12100        let a = _mm_set1_epi8(1);
12101        let b = _mm_set1_epi8(2);
12102        let r = _mm_maskz_add_epi8(0, a, b);
12103        assert_eq_m128i(r, _mm_setzero_si128());
12104        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12105        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12106        assert_eq_m128i(r, e);
12107    }
12108
12109    #[simd_test(enable = "avx512bw")]
12110    unsafe fn test_mm512_adds_epu16() {
12111        let a = _mm512_set1_epi16(1);
12112        let b = _mm512_set1_epi16(u16::MAX as i16);
12113        let r = _mm512_adds_epu16(a, b);
12114        let e = _mm512_set1_epi16(u16::MAX as i16);
12115        assert_eq_m512i(r, e);
12116    }
12117
12118    #[simd_test(enable = "avx512bw")]
12119    unsafe fn test_mm512_mask_adds_epu16() {
12120        let a = _mm512_set1_epi16(1);
12121        let b = _mm512_set1_epi16(u16::MAX as i16);
12122        let r = _mm512_mask_adds_epu16(a, 0, a, b);
12123        assert_eq_m512i(r, a);
12124        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12125        #[rustfmt::skip]
12126        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12127                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12128        assert_eq_m512i(r, e);
12129    }
12130
12131    #[simd_test(enable = "avx512bw")]
12132    unsafe fn test_mm512_maskz_adds_epu16() {
12133        let a = _mm512_set1_epi16(1);
12134        let b = _mm512_set1_epi16(u16::MAX as i16);
12135        let r = _mm512_maskz_adds_epu16(0, a, b);
12136        assert_eq_m512i(r, _mm512_setzero_si512());
12137        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
12138        #[rustfmt::skip]
12139        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12140                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12141        assert_eq_m512i(r, e);
12142    }
12143
12144    #[simd_test(enable = "avx512bw,avx512vl")]
12145    unsafe fn test_mm256_mask_adds_epu16() {
12146        let a = _mm256_set1_epi16(1);
12147        let b = _mm256_set1_epi16(u16::MAX as i16);
12148        let r = _mm256_mask_adds_epu16(a, 0, a, b);
12149        assert_eq_m256i(r, a);
12150        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
12151        #[rustfmt::skip]
12152        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12153        assert_eq_m256i(r, e);
12154    }
12155
12156    #[simd_test(enable = "avx512bw,avx512vl")]
12157    unsafe fn test_mm256_maskz_adds_epu16() {
12158        let a = _mm256_set1_epi16(1);
12159        let b = _mm256_set1_epi16(u16::MAX as i16);
12160        let r = _mm256_maskz_adds_epu16(0, a, b);
12161        assert_eq_m256i(r, _mm256_setzero_si256());
12162        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
12163        #[rustfmt::skip]
12164        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12165        assert_eq_m256i(r, e);
12166    }
12167
12168    #[simd_test(enable = "avx512bw,avx512vl")]
12169    unsafe fn test_mm_mask_adds_epu16() {
12170        let a = _mm_set1_epi16(1);
12171        let b = _mm_set1_epi16(u16::MAX as i16);
12172        let r = _mm_mask_adds_epu16(a, 0, a, b);
12173        assert_eq_m128i(r, a);
12174        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
12175        #[rustfmt::skip]
12176        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12177        assert_eq_m128i(r, e);
12178    }
12179
12180    #[simd_test(enable = "avx512bw,avx512vl")]
12181    unsafe fn test_mm_maskz_adds_epu16() {
12182        let a = _mm_set1_epi16(1);
12183        let b = _mm_set1_epi16(u16::MAX as i16);
12184        let r = _mm_maskz_adds_epu16(0, a, b);
12185        assert_eq_m128i(r, _mm_setzero_si128());
12186        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
12187        #[rustfmt::skip]
12188        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12189        assert_eq_m128i(r, e);
12190    }
12191
12192    #[simd_test(enable = "avx512bw")]
12193    unsafe fn test_mm512_adds_epu8() {
12194        let a = _mm512_set1_epi8(1);
12195        let b = _mm512_set1_epi8(u8::MAX as i8);
12196        let r = _mm512_adds_epu8(a, b);
12197        let e = _mm512_set1_epi8(u8::MAX as i8);
12198        assert_eq_m512i(r, e);
12199    }
12200
12201    #[simd_test(enable = "avx512bw")]
12202    unsafe fn test_mm512_mask_adds_epu8() {
12203        let a = _mm512_set1_epi8(1);
12204        let b = _mm512_set1_epi8(u8::MAX as i8);
12205        let r = _mm512_mask_adds_epu8(a, 0, a, b);
12206        assert_eq_m512i(r, a);
12207        let r = _mm512_mask_adds_epu8(
12208            a,
12209            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12210            a,
12211            b,
12212        );
12213        #[rustfmt::skip]
12214        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12215                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12216                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12217                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12218        assert_eq_m512i(r, e);
12219    }
12220
12221    #[simd_test(enable = "avx512bw")]
12222    unsafe fn test_mm512_maskz_adds_epu8() {
12223        let a = _mm512_set1_epi8(1);
12224        let b = _mm512_set1_epi8(u8::MAX as i8);
12225        let r = _mm512_maskz_adds_epu8(0, a, b);
12226        assert_eq_m512i(r, _mm512_setzero_si512());
12227        let r = _mm512_maskz_adds_epu8(
12228            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12229            a,
12230            b,
12231        );
12232        #[rustfmt::skip]
12233        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12234                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12235                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12236                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12237        assert_eq_m512i(r, e);
12238    }
12239
12240    #[simd_test(enable = "avx512bw,avx512vl")]
12241    unsafe fn test_mm256_mask_adds_epu8() {
12242        let a = _mm256_set1_epi8(1);
12243        let b = _mm256_set1_epi8(u8::MAX as i8);
12244        let r = _mm256_mask_adds_epu8(a, 0, a, b);
12245        assert_eq_m256i(r, a);
12246        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12247        #[rustfmt::skip]
12248        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12249                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12250        assert_eq_m256i(r, e);
12251    }
12252
12253    #[simd_test(enable = "avx512bw,avx512vl")]
12254    unsafe fn test_mm256_maskz_adds_epu8() {
12255        let a = _mm256_set1_epi8(1);
12256        let b = _mm256_set1_epi8(u8::MAX as i8);
12257        let r = _mm256_maskz_adds_epu8(0, a, b);
12258        assert_eq_m256i(r, _mm256_setzero_si256());
12259        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
12260        #[rustfmt::skip]
12261        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12262                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12263        assert_eq_m256i(r, e);
12264    }
12265
12266    #[simd_test(enable = "avx512bw,avx512vl")]
12267    unsafe fn test_mm_mask_adds_epu8() {
12268        let a = _mm_set1_epi8(1);
12269        let b = _mm_set1_epi8(u8::MAX as i8);
12270        let r = _mm_mask_adds_epu8(a, 0, a, b);
12271        assert_eq_m128i(r, a);
12272        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
12273        #[rustfmt::skip]
12274        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12275        assert_eq_m128i(r, e);
12276    }
12277
12278    #[simd_test(enable = "avx512bw,avx512vl")]
12279    unsafe fn test_mm_maskz_adds_epu8() {
12280        let a = _mm_set1_epi8(1);
12281        let b = _mm_set1_epi8(u8::MAX as i8);
12282        let r = _mm_maskz_adds_epu8(0, a, b);
12283        assert_eq_m128i(r, _mm_setzero_si128());
12284        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
12285        #[rustfmt::skip]
12286        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12287        assert_eq_m128i(r, e);
12288    }
12289
12290    #[simd_test(enable = "avx512bw")]
12291    unsafe fn test_mm512_adds_epi16() {
12292        let a = _mm512_set1_epi16(1);
12293        let b = _mm512_set1_epi16(i16::MAX);
12294        let r = _mm512_adds_epi16(a, b);
12295        let e = _mm512_set1_epi16(i16::MAX);
12296        assert_eq_m512i(r, e);
12297    }
12298
12299    #[simd_test(enable = "avx512bw")]
12300    unsafe fn test_mm512_mask_adds_epi16() {
12301        let a = _mm512_set1_epi16(1);
12302        let b = _mm512_set1_epi16(i16::MAX);
12303        let r = _mm512_mask_adds_epi16(a, 0, a, b);
12304        assert_eq_m512i(r, a);
12305        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12306        #[rustfmt::skip]
12307        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12308                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12309        assert_eq_m512i(r, e);
12310    }
12311
12312    #[simd_test(enable = "avx512bw")]
12313    unsafe fn test_mm512_maskz_adds_epi16() {
12314        let a = _mm512_set1_epi16(1);
12315        let b = _mm512_set1_epi16(i16::MAX);
12316        let r = _mm512_maskz_adds_epi16(0, a, b);
12317        assert_eq_m512i(r, _mm512_setzero_si512());
12318        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
12319        #[rustfmt::skip]
12320        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12321                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12322        assert_eq_m512i(r, e);
12323    }
12324
12325    #[simd_test(enable = "avx512bw,avx512vl")]
12326    unsafe fn test_mm256_mask_adds_epi16() {
12327        let a = _mm256_set1_epi16(1);
12328        let b = _mm256_set1_epi16(i16::MAX);
12329        let r = _mm256_mask_adds_epi16(a, 0, a, b);
12330        assert_eq_m256i(r, a);
12331        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
12332        #[rustfmt::skip]
12333        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12334        assert_eq_m256i(r, e);
12335    }
12336
12337    #[simd_test(enable = "avx512bw,avx512vl")]
12338    unsafe fn test_mm256_maskz_adds_epi16() {
12339        let a = _mm256_set1_epi16(1);
12340        let b = _mm256_set1_epi16(i16::MAX);
12341        let r = _mm256_maskz_adds_epi16(0, a, b);
12342        assert_eq_m256i(r, _mm256_setzero_si256());
12343        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
12344        #[rustfmt::skip]
12345        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12346        assert_eq_m256i(r, e);
12347    }
12348
12349    #[simd_test(enable = "avx512bw,avx512vl")]
12350    unsafe fn test_mm_mask_adds_epi16() {
12351        let a = _mm_set1_epi16(1);
12352        let b = _mm_set1_epi16(i16::MAX);
12353        let r = _mm_mask_adds_epi16(a, 0, a, b);
12354        assert_eq_m128i(r, a);
12355        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
12356        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12357        assert_eq_m128i(r, e);
12358    }
12359
12360    #[simd_test(enable = "avx512bw,avx512vl")]
12361    unsafe fn test_mm_maskz_adds_epi16() {
12362        let a = _mm_set1_epi16(1);
12363        let b = _mm_set1_epi16(i16::MAX);
12364        let r = _mm_maskz_adds_epi16(0, a, b);
12365        assert_eq_m128i(r, _mm_setzero_si128());
12366        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
12367        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12368        assert_eq_m128i(r, e);
12369    }
12370
12371    #[simd_test(enable = "avx512bw")]
12372    unsafe fn test_mm512_adds_epi8() {
12373        let a = _mm512_set1_epi8(1);
12374        let b = _mm512_set1_epi8(i8::MAX);
12375        let r = _mm512_adds_epi8(a, b);
12376        let e = _mm512_set1_epi8(i8::MAX);
12377        assert_eq_m512i(r, e);
12378    }
12379
12380    #[simd_test(enable = "avx512bw")]
12381    unsafe fn test_mm512_mask_adds_epi8() {
12382        let a = _mm512_set1_epi8(1);
12383        let b = _mm512_set1_epi8(i8::MAX);
12384        let r = _mm512_mask_adds_epi8(a, 0, a, b);
12385        assert_eq_m512i(r, a);
12386        let r = _mm512_mask_adds_epi8(
12387            a,
12388            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12389            a,
12390            b,
12391        );
12392        #[rustfmt::skip]
12393        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12394                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12395                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12396                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12397        assert_eq_m512i(r, e);
12398    }
12399
12400    #[simd_test(enable = "avx512bw")]
12401    unsafe fn test_mm512_maskz_adds_epi8() {
12402        let a = _mm512_set1_epi8(1);
12403        let b = _mm512_set1_epi8(i8::MAX);
12404        let r = _mm512_maskz_adds_epi8(0, a, b);
12405        assert_eq_m512i(r, _mm512_setzero_si512());
12406        let r = _mm512_maskz_adds_epi8(
12407            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12408            a,
12409            b,
12410        );
12411        #[rustfmt::skip]
12412        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12413                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12414                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12415                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12416        assert_eq_m512i(r, e);
12417    }
12418
12419    #[simd_test(enable = "avx512bw,avx512vl")]
12420    unsafe fn test_mm256_mask_adds_epi8() {
12421        let a = _mm256_set1_epi8(1);
12422        let b = _mm256_set1_epi8(i8::MAX);
12423        let r = _mm256_mask_adds_epi8(a, 0, a, b);
12424        assert_eq_m256i(r, a);
12425        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12426        #[rustfmt::skip]
12427        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12428                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12429        assert_eq_m256i(r, e);
12430    }
12431
12432    #[simd_test(enable = "avx512bw,avx512vl")]
12433    unsafe fn test_mm256_maskz_adds_epi8() {
12434        let a = _mm256_set1_epi8(1);
12435        let b = _mm256_set1_epi8(i8::MAX);
12436        let r = _mm256_maskz_adds_epi8(0, a, b);
12437        assert_eq_m256i(r, _mm256_setzero_si256());
12438        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
12439        #[rustfmt::skip]
12440        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12441                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12442        assert_eq_m256i(r, e);
12443    }
12444
12445    #[simd_test(enable = "avx512bw,avx512vl")]
12446    unsafe fn test_mm_mask_adds_epi8() {
12447        let a = _mm_set1_epi8(1);
12448        let b = _mm_set1_epi8(i8::MAX);
12449        let r = _mm_mask_adds_epi8(a, 0, a, b);
12450        assert_eq_m128i(r, a);
12451        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
12452        #[rustfmt::skip]
12453        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12454        assert_eq_m128i(r, e);
12455    }
12456
12457    #[simd_test(enable = "avx512bw,avx512vl")]
12458    unsafe fn test_mm_maskz_adds_epi8() {
12459        let a = _mm_set1_epi8(1);
12460        let b = _mm_set1_epi8(i8::MAX);
12461        let r = _mm_maskz_adds_epi8(0, a, b);
12462        assert_eq_m128i(r, _mm_setzero_si128());
12463        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
12464        #[rustfmt::skip]
12465        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12466        assert_eq_m128i(r, e);
12467    }
12468
12469    #[simd_test(enable = "avx512bw")]
12470    unsafe fn test_mm512_sub_epi16() {
12471        let a = _mm512_set1_epi16(1);
12472        let b = _mm512_set1_epi16(2);
12473        let r = _mm512_sub_epi16(a, b);
12474        let e = _mm512_set1_epi16(-1);
12475        assert_eq_m512i(r, e);
12476    }
12477
12478    #[simd_test(enable = "avx512bw")]
12479    unsafe fn test_mm512_mask_sub_epi16() {
12480        let a = _mm512_set1_epi16(1);
12481        let b = _mm512_set1_epi16(2);
12482        let r = _mm512_mask_sub_epi16(a, 0, a, b);
12483        assert_eq_m512i(r, a);
12484        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12485        #[rustfmt::skip]
12486        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12487                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12488        assert_eq_m512i(r, e);
12489    }
12490
12491    #[simd_test(enable = "avx512bw")]
12492    unsafe fn test_mm512_maskz_sub_epi16() {
12493        let a = _mm512_set1_epi16(1);
12494        let b = _mm512_set1_epi16(2);
12495        let r = _mm512_maskz_sub_epi16(0, a, b);
12496        assert_eq_m512i(r, _mm512_setzero_si512());
12497        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
12498        #[rustfmt::skip]
12499        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12500                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12501        assert_eq_m512i(r, e);
12502    }
12503
12504    #[simd_test(enable = "avx512bw,avx512vl")]
12505    unsafe fn test_mm256_mask_sub_epi16() {
12506        let a = _mm256_set1_epi16(1);
12507        let b = _mm256_set1_epi16(2);
12508        let r = _mm256_mask_sub_epi16(a, 0, a, b);
12509        assert_eq_m256i(r, a);
12510        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
12511        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12512        assert_eq_m256i(r, e);
12513    }
12514
12515    #[simd_test(enable = "avx512bw,avx512vl")]
12516    unsafe fn test_mm256_maskz_sub_epi16() {
12517        let a = _mm256_set1_epi16(1);
12518        let b = _mm256_set1_epi16(2);
12519        let r = _mm256_maskz_sub_epi16(0, a, b);
12520        assert_eq_m256i(r, _mm256_setzero_si256());
12521        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
12522        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12523        assert_eq_m256i(r, e);
12524    }
12525
12526    #[simd_test(enable = "avx512bw,avx512vl")]
12527    unsafe fn test_mm_mask_sub_epi16() {
12528        let a = _mm_set1_epi16(1);
12529        let b = _mm_set1_epi16(2);
12530        let r = _mm_mask_sub_epi16(a, 0, a, b);
12531        assert_eq_m128i(r, a);
12532        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
12533        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
12534        assert_eq_m128i(r, e);
12535    }
12536
12537    #[simd_test(enable = "avx512bw,avx512vl")]
12538    unsafe fn test_mm_maskz_sub_epi16() {
12539        let a = _mm_set1_epi16(1);
12540        let b = _mm_set1_epi16(2);
12541        let r = _mm_maskz_sub_epi16(0, a, b);
12542        assert_eq_m128i(r, _mm_setzero_si128());
12543        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
12544        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
12545        assert_eq_m128i(r, e);
12546    }
12547
12548    #[simd_test(enable = "avx512bw")]
12549    unsafe fn test_mm512_sub_epi8() {
12550        let a = _mm512_set1_epi8(1);
12551        let b = _mm512_set1_epi8(2);
12552        let r = _mm512_sub_epi8(a, b);
12553        let e = _mm512_set1_epi8(-1);
12554        assert_eq_m512i(r, e);
12555    }
12556
12557    #[simd_test(enable = "avx512bw")]
12558    unsafe fn test_mm512_mask_sub_epi8() {
12559        let a = _mm512_set1_epi8(1);
12560        let b = _mm512_set1_epi8(2);
12561        let r = _mm512_mask_sub_epi8(a, 0, a, b);
12562        assert_eq_m512i(r, a);
12563        let r = _mm512_mask_sub_epi8(
12564            a,
12565            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12566            a,
12567            b,
12568        );
12569        #[rustfmt::skip]
12570        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12571                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12572                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12573                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12574        assert_eq_m512i(r, e);
12575    }
12576
12577    #[simd_test(enable = "avx512bw")]
12578    unsafe fn test_mm512_maskz_sub_epi8() {
12579        let a = _mm512_set1_epi8(1);
12580        let b = _mm512_set1_epi8(2);
12581        let r = _mm512_maskz_sub_epi8(0, a, b);
12582        assert_eq_m512i(r, _mm512_setzero_si512());
12583        let r = _mm512_maskz_sub_epi8(
12584            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12585            a,
12586            b,
12587        );
12588        #[rustfmt::skip]
12589        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12590                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12591                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12592                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12593        assert_eq_m512i(r, e);
12594    }
12595
12596    #[simd_test(enable = "avx512bw,avx512vl")]
12597    unsafe fn test_mm256_mask_sub_epi8() {
12598        let a = _mm256_set1_epi8(1);
12599        let b = _mm256_set1_epi8(2);
12600        let r = _mm256_mask_sub_epi8(a, 0, a, b);
12601        assert_eq_m256i(r, a);
12602        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12603        #[rustfmt::skip]
12604        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12605                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12606        assert_eq_m256i(r, e);
12607    }
12608
12609    #[simd_test(enable = "avx512bw,avx512vl")]
12610    unsafe fn test_mm256_maskz_sub_epi8() {
12611        let a = _mm256_set1_epi8(1);
12612        let b = _mm256_set1_epi8(2);
12613        let r = _mm256_maskz_sub_epi8(0, a, b);
12614        assert_eq_m256i(r, _mm256_setzero_si256());
12615        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
12616        #[rustfmt::skip]
12617        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12618                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12619        assert_eq_m256i(r, e);
12620    }
12621
12622    #[simd_test(enable = "avx512bw,avx512vl")]
12623    unsafe fn test_mm_mask_sub_epi8() {
12624        let a = _mm_set1_epi8(1);
12625        let b = _mm_set1_epi8(2);
12626        let r = _mm_mask_sub_epi8(a, 0, a, b);
12627        assert_eq_m128i(r, a);
12628        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
12629        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12630        assert_eq_m128i(r, e);
12631    }
12632
12633    #[simd_test(enable = "avx512bw,avx512vl")]
12634    unsafe fn test_mm_maskz_sub_epi8() {
12635        let a = _mm_set1_epi8(1);
12636        let b = _mm_set1_epi8(2);
12637        let r = _mm_maskz_sub_epi8(0, a, b);
12638        assert_eq_m128i(r, _mm_setzero_si128());
12639        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
12640        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12641        assert_eq_m128i(r, e);
12642    }
12643
12644    #[simd_test(enable = "avx512bw")]
12645    unsafe fn test_mm512_subs_epu16() {
12646        let a = _mm512_set1_epi16(1);
12647        let b = _mm512_set1_epi16(u16::MAX as i16);
12648        let r = _mm512_subs_epu16(a, b);
12649        let e = _mm512_set1_epi16(0);
12650        assert_eq_m512i(r, e);
12651    }
12652
12653    #[simd_test(enable = "avx512bw")]
12654    unsafe fn test_mm512_mask_subs_epu16() {
12655        let a = _mm512_set1_epi16(1);
12656        let b = _mm512_set1_epi16(u16::MAX as i16);
12657        let r = _mm512_mask_subs_epu16(a, 0, a, b);
12658        assert_eq_m512i(r, a);
12659        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12660        #[rustfmt::skip]
12661        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12662                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12663        assert_eq_m512i(r, e);
12664    }
12665
12666    #[simd_test(enable = "avx512bw")]
12667    unsafe fn test_mm512_maskz_subs_epu16() {
12668        let a = _mm512_set1_epi16(1);
12669        let b = _mm512_set1_epi16(u16::MAX as i16);
12670        let r = _mm512_maskz_subs_epu16(0, a, b);
12671        assert_eq_m512i(r, _mm512_setzero_si512());
12672        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
12673        #[rustfmt::skip]
12674        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12675                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12676        assert_eq_m512i(r, e);
12677    }
12678
12679    #[simd_test(enable = "avx512bw,avx512vl")]
12680    unsafe fn test_mm256_mask_subs_epu16() {
12681        let a = _mm256_set1_epi16(1);
12682        let b = _mm256_set1_epi16(u16::MAX as i16);
12683        let r = _mm256_mask_subs_epu16(a, 0, a, b);
12684        assert_eq_m256i(r, a);
12685        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
12686        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12687        assert_eq_m256i(r, e);
12688    }
12689
12690    #[simd_test(enable = "avx512bw,avx512vl")]
12691    unsafe fn test_mm256_maskz_subs_epu16() {
12692        let a = _mm256_set1_epi16(1);
12693        let b = _mm256_set1_epi16(u16::MAX as i16);
12694        let r = _mm256_maskz_subs_epu16(0, a, b);
12695        assert_eq_m256i(r, _mm256_setzero_si256());
12696        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
12697        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12698        assert_eq_m256i(r, e);
12699    }
12700
12701    #[simd_test(enable = "avx512bw,avx512vl")]
12702    unsafe fn test_mm_mask_subs_epu16() {
12703        let a = _mm_set1_epi16(1);
12704        let b = _mm_set1_epi16(u16::MAX as i16);
12705        let r = _mm_mask_subs_epu16(a, 0, a, b);
12706        assert_eq_m128i(r, a);
12707        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
12708        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12709        assert_eq_m128i(r, e);
12710    }
12711
12712    #[simd_test(enable = "avx512bw,avx512vl")]
12713    unsafe fn test_mm_maskz_subs_epu16() {
12714        let a = _mm_set1_epi16(1);
12715        let b = _mm_set1_epi16(u16::MAX as i16);
12716        let r = _mm_maskz_subs_epu16(0, a, b);
12717        assert_eq_m128i(r, _mm_setzero_si128());
12718        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
12719        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12720        assert_eq_m128i(r, e);
12721    }
12722
12723    #[simd_test(enable = "avx512bw")]
12724    unsafe fn test_mm512_subs_epu8() {
12725        let a = _mm512_set1_epi8(1);
12726        let b = _mm512_set1_epi8(u8::MAX as i8);
12727        let r = _mm512_subs_epu8(a, b);
12728        let e = _mm512_set1_epi8(0);
12729        assert_eq_m512i(r, e);
12730    }
12731
12732    #[simd_test(enable = "avx512bw")]
12733    unsafe fn test_mm512_mask_subs_epu8() {
12734        let a = _mm512_set1_epi8(1);
12735        let b = _mm512_set1_epi8(u8::MAX as i8);
12736        let r = _mm512_mask_subs_epu8(a, 0, a, b);
12737        assert_eq_m512i(r, a);
12738        let r = _mm512_mask_subs_epu8(
12739            a,
12740            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12741            a,
12742            b,
12743        );
12744        #[rustfmt::skip]
12745        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12746                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12747                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12748                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12749        assert_eq_m512i(r, e);
12750    }
12751
12752    #[simd_test(enable = "avx512bw")]
12753    unsafe fn test_mm512_maskz_subs_epu8() {
12754        let a = _mm512_set1_epi8(1);
12755        let b = _mm512_set1_epi8(u8::MAX as i8);
12756        let r = _mm512_maskz_subs_epu8(0, a, b);
12757        assert_eq_m512i(r, _mm512_setzero_si512());
12758        let r = _mm512_maskz_subs_epu8(
12759            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12760            a,
12761            b,
12762        );
12763        #[rustfmt::skip]
12764        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12765                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12766                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12767                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12768        assert_eq_m512i(r, e);
12769    }
12770
12771    #[simd_test(enable = "avx512bw,avx512vl")]
12772    unsafe fn test_mm256_mask_subs_epu8() {
12773        let a = _mm256_set1_epi8(1);
12774        let b = _mm256_set1_epi8(u8::MAX as i8);
12775        let r = _mm256_mask_subs_epu8(a, 0, a, b);
12776        assert_eq_m256i(r, a);
12777        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12778        #[rustfmt::skip]
12779        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12780                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12781        assert_eq_m256i(r, e);
12782    }
12783
12784    #[simd_test(enable = "avx512bw,avx512vl")]
12785    unsafe fn test_mm256_maskz_subs_epu8() {
12786        let a = _mm256_set1_epi8(1);
12787        let b = _mm256_set1_epi8(u8::MAX as i8);
12788        let r = _mm256_maskz_subs_epu8(0, a, b);
12789        assert_eq_m256i(r, _mm256_setzero_si256());
12790        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
12791        #[rustfmt::skip]
12792        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12793                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12794        assert_eq_m256i(r, e);
12795    }
12796
12797    #[simd_test(enable = "avx512bw,avx512vl")]
12798    unsafe fn test_mm_mask_subs_epu8() {
12799        let a = _mm_set1_epi8(1);
12800        let b = _mm_set1_epi8(u8::MAX as i8);
12801        let r = _mm_mask_subs_epu8(a, 0, a, b);
12802        assert_eq_m128i(r, a);
12803        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
12804        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12805        assert_eq_m128i(r, e);
12806    }
12807
12808    #[simd_test(enable = "avx512bw,avx512vl")]
12809    unsafe fn test_mm_maskz_subs_epu8() {
12810        let a = _mm_set1_epi8(1);
12811        let b = _mm_set1_epi8(u8::MAX as i8);
12812        let r = _mm_maskz_subs_epu8(0, a, b);
12813        assert_eq_m128i(r, _mm_setzero_si128());
12814        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
12815        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12816        assert_eq_m128i(r, e);
12817    }
12818
12819    #[simd_test(enable = "avx512bw")]
12820    unsafe fn test_mm512_subs_epi16() {
12821        let a = _mm512_set1_epi16(-1);
12822        let b = _mm512_set1_epi16(i16::MAX);
12823        let r = _mm512_subs_epi16(a, b);
12824        let e = _mm512_set1_epi16(i16::MIN);
12825        assert_eq_m512i(r, e);
12826    }
12827
12828    #[simd_test(enable = "avx512bw")]
12829    unsafe fn test_mm512_mask_subs_epi16() {
12830        let a = _mm512_set1_epi16(-1);
12831        let b = _mm512_set1_epi16(i16::MAX);
12832        let r = _mm512_mask_subs_epi16(a, 0, a, b);
12833        assert_eq_m512i(r, a);
12834        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12835        #[rustfmt::skip]
12836        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12837                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12838        assert_eq_m512i(r, e);
12839    }
12840
12841    #[simd_test(enable = "avx512bw")]
12842    unsafe fn test_mm512_maskz_subs_epi16() {
12843        let a = _mm512_set1_epi16(-1);
12844        let b = _mm512_set1_epi16(i16::MAX);
12845        let r = _mm512_maskz_subs_epi16(0, a, b);
12846        assert_eq_m512i(r, _mm512_setzero_si512());
12847        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
12848        #[rustfmt::skip]
12849        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12850                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12851        assert_eq_m512i(r, e);
12852    }
12853
12854    #[simd_test(enable = "avx512bw,avx512vl")]
12855    unsafe fn test_mm256_mask_subs_epi16() {
12856        let a = _mm256_set1_epi16(-1);
12857        let b = _mm256_set1_epi16(i16::MAX);
12858        let r = _mm256_mask_subs_epi16(a, 0, a, b);
12859        assert_eq_m256i(r, a);
12860        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
12861        #[rustfmt::skip]
12862        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12863        assert_eq_m256i(r, e);
12864    }
12865
12866    #[simd_test(enable = "avx512bw,avx512vl")]
12867    unsafe fn test_mm256_maskz_subs_epi16() {
12868        let a = _mm256_set1_epi16(-1);
12869        let b = _mm256_set1_epi16(i16::MAX);
12870        let r = _mm256_maskz_subs_epi16(0, a, b);
12871        assert_eq_m256i(r, _mm256_setzero_si256());
12872        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
12873        #[rustfmt::skip]
12874        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12875        assert_eq_m256i(r, e);
12876    }
12877
12878    #[simd_test(enable = "avx512bw,avx512vl")]
12879    unsafe fn test_mm_mask_subs_epi16() {
12880        let a = _mm_set1_epi16(-1);
12881        let b = _mm_set1_epi16(i16::MAX);
12882        let r = _mm_mask_subs_epi16(a, 0, a, b);
12883        assert_eq_m128i(r, a);
12884        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
12885        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12886        assert_eq_m128i(r, e);
12887    }
12888
12889    #[simd_test(enable = "avx512bw,avx512vl")]
12890    unsafe fn test_mm_maskz_subs_epi16() {
12891        let a = _mm_set1_epi16(-1);
12892        let b = _mm_set1_epi16(i16::MAX);
12893        let r = _mm_maskz_subs_epi16(0, a, b);
12894        assert_eq_m128i(r, _mm_setzero_si128());
12895        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
12896        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12897        assert_eq_m128i(r, e);
12898    }
12899
12900    #[simd_test(enable = "avx512bw")]
12901    unsafe fn test_mm512_subs_epi8() {
12902        let a = _mm512_set1_epi8(-1);
12903        let b = _mm512_set1_epi8(i8::MAX);
12904        let r = _mm512_subs_epi8(a, b);
12905        let e = _mm512_set1_epi8(i8::MIN);
12906        assert_eq_m512i(r, e);
12907    }
12908
12909    #[simd_test(enable = "avx512bw")]
12910    unsafe fn test_mm512_mask_subs_epi8() {
12911        let a = _mm512_set1_epi8(-1);
12912        let b = _mm512_set1_epi8(i8::MAX);
12913        let r = _mm512_mask_subs_epi8(a, 0, a, b);
12914        assert_eq_m512i(r, a);
12915        let r = _mm512_mask_subs_epi8(
12916            a,
12917            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12918            a,
12919            b,
12920        );
12921        #[rustfmt::skip]
12922        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12923                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12924                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12925                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12926        assert_eq_m512i(r, e);
12927    }
12928
12929    #[simd_test(enable = "avx512bw")]
12930    unsafe fn test_mm512_maskz_subs_epi8() {
12931        let a = _mm512_set1_epi8(-1);
12932        let b = _mm512_set1_epi8(i8::MAX);
12933        let r = _mm512_maskz_subs_epi8(0, a, b);
12934        assert_eq_m512i(r, _mm512_setzero_si512());
12935        let r = _mm512_maskz_subs_epi8(
12936            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12937            a,
12938            b,
12939        );
12940        #[rustfmt::skip]
12941        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12942                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12943                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12944                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12945        assert_eq_m512i(r, e);
12946    }
12947
12948    #[simd_test(enable = "avx512bw,avx512vl")]
12949    unsafe fn test_mm256_mask_subs_epi8() {
12950        let a = _mm256_set1_epi8(-1);
12951        let b = _mm256_set1_epi8(i8::MAX);
12952        let r = _mm256_mask_subs_epi8(a, 0, a, b);
12953        assert_eq_m256i(r, a);
12954        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12955        #[rustfmt::skip]
12956        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12957                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12958        assert_eq_m256i(r, e);
12959    }
12960
12961    #[simd_test(enable = "avx512bw,avx512vl")]
12962    unsafe fn test_mm256_maskz_subs_epi8() {
12963        let a = _mm256_set1_epi8(-1);
12964        let b = _mm256_set1_epi8(i8::MAX);
12965        let r = _mm256_maskz_subs_epi8(0, a, b);
12966        assert_eq_m256i(r, _mm256_setzero_si256());
12967        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
12968        #[rustfmt::skip]
12969        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12970                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12971        assert_eq_m256i(r, e);
12972    }
12973
12974    #[simd_test(enable = "avx512bw,avx512vl")]
12975    unsafe fn test_mm_mask_subs_epi8() {
12976        let a = _mm_set1_epi8(-1);
12977        let b = _mm_set1_epi8(i8::MAX);
12978        let r = _mm_mask_subs_epi8(a, 0, a, b);
12979        assert_eq_m128i(r, a);
12980        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
12981        #[rustfmt::skip]
12982        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12983        assert_eq_m128i(r, e);
12984    }
12985
12986    #[simd_test(enable = "avx512bw,avx512vl")]
12987    unsafe fn test_mm_maskz_subs_epi8() {
12988        let a = _mm_set1_epi8(-1);
12989        let b = _mm_set1_epi8(i8::MAX);
12990        let r = _mm_maskz_subs_epi8(0, a, b);
12991        assert_eq_m128i(r, _mm_setzero_si128());
12992        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
12993        #[rustfmt::skip]
12994        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12995        assert_eq_m128i(r, e);
12996    }
12997
12998    #[simd_test(enable = "avx512bw")]
12999    unsafe fn test_mm512_mulhi_epu16() {
13000        let a = _mm512_set1_epi16(1);
13001        let b = _mm512_set1_epi16(1);
13002        let r = _mm512_mulhi_epu16(a, b);
13003        let e = _mm512_set1_epi16(0);
13004        assert_eq_m512i(r, e);
13005    }
13006
13007    #[simd_test(enable = "avx512bw")]
13008    unsafe fn test_mm512_mask_mulhi_epu16() {
13009        let a = _mm512_set1_epi16(1);
13010        let b = _mm512_set1_epi16(1);
13011        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13012        assert_eq_m512i(r, a);
13013        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13014        #[rustfmt::skip]
13015        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13016                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13017        assert_eq_m512i(r, e);
13018    }
13019
13020    #[simd_test(enable = "avx512bw")]
13021    unsafe fn test_mm512_maskz_mulhi_epu16() {
13022        let a = _mm512_set1_epi16(1);
13023        let b = _mm512_set1_epi16(1);
13024        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13025        assert_eq_m512i(r, _mm512_setzero_si512());
13026        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13027        #[rustfmt::skip]
13028        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13029                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13030        assert_eq_m512i(r, e);
13031    }
13032
13033    #[simd_test(enable = "avx512bw,avx512vl")]
13034    unsafe fn test_mm256_mask_mulhi_epu16() {
13035        let a = _mm256_set1_epi16(1);
13036        let b = _mm256_set1_epi16(1);
13037        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13038        assert_eq_m256i(r, a);
13039        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13040        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13041        assert_eq_m256i(r, e);
13042    }
13043
13044    #[simd_test(enable = "avx512bw,avx512vl")]
13045    unsafe fn test_mm256_maskz_mulhi_epu16() {
13046        let a = _mm256_set1_epi16(1);
13047        let b = _mm256_set1_epi16(1);
13048        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13049        assert_eq_m256i(r, _mm256_setzero_si256());
13050        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13051        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13052        assert_eq_m256i(r, e);
13053    }
13054
13055    #[simd_test(enable = "avx512bw,avx512vl")]
13056    unsafe fn test_mm_mask_mulhi_epu16() {
13057        let a = _mm_set1_epi16(1);
13058        let b = _mm_set1_epi16(1);
13059        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13060        assert_eq_m128i(r, a);
13061        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13062        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13063        assert_eq_m128i(r, e);
13064    }
13065
13066    #[simd_test(enable = "avx512bw,avx512vl")]
13067    unsafe fn test_mm_maskz_mulhi_epu16() {
13068        let a = _mm_set1_epi16(1);
13069        let b = _mm_set1_epi16(1);
13070        let r = _mm_maskz_mulhi_epu16(0, a, b);
13071        assert_eq_m128i(r, _mm_setzero_si128());
13072        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13073        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13074        assert_eq_m128i(r, e);
13075    }
13076
13077    #[simd_test(enable = "avx512bw")]
13078    unsafe fn test_mm512_mulhi_epi16() {
13079        let a = _mm512_set1_epi16(1);
13080        let b = _mm512_set1_epi16(1);
13081        let r = _mm512_mulhi_epi16(a, b);
13082        let e = _mm512_set1_epi16(0);
13083        assert_eq_m512i(r, e);
13084    }
13085
13086    #[simd_test(enable = "avx512bw")]
13087    unsafe fn test_mm512_mask_mulhi_epi16() {
13088        let a = _mm512_set1_epi16(1);
13089        let b = _mm512_set1_epi16(1);
13090        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13091        assert_eq_m512i(r, a);
13092        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13093        #[rustfmt::skip]
13094        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13095                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13096        assert_eq_m512i(r, e);
13097    }
13098
13099    #[simd_test(enable = "avx512bw")]
13100    unsafe fn test_mm512_maskz_mulhi_epi16() {
13101        let a = _mm512_set1_epi16(1);
13102        let b = _mm512_set1_epi16(1);
13103        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13104        assert_eq_m512i(r, _mm512_setzero_si512());
13105        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13106        #[rustfmt::skip]
13107        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13108                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13109        assert_eq_m512i(r, e);
13110    }
13111
13112    #[simd_test(enable = "avx512bw,avx512vl")]
13113    unsafe fn test_mm256_mask_mulhi_epi16() {
13114        let a = _mm256_set1_epi16(1);
13115        let b = _mm256_set1_epi16(1);
13116        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
13117        assert_eq_m256i(r, a);
13118        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
13119        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13120        assert_eq_m256i(r, e);
13121    }
13122
13123    #[simd_test(enable = "avx512bw,avx512vl")]
13124    unsafe fn test_mm256_maskz_mulhi_epi16() {
13125        let a = _mm256_set1_epi16(1);
13126        let b = _mm256_set1_epi16(1);
13127        let r = _mm256_maskz_mulhi_epi16(0, a, b);
13128        assert_eq_m256i(r, _mm256_setzero_si256());
13129        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
13130        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13131        assert_eq_m256i(r, e);
13132    }
13133
13134    #[simd_test(enable = "avx512bw,avx512vl")]
13135    unsafe fn test_mm_mask_mulhi_epi16() {
13136        let a = _mm_set1_epi16(1);
13137        let b = _mm_set1_epi16(1);
13138        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
13139        assert_eq_m128i(r, a);
13140        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
13141        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13142        assert_eq_m128i(r, e);
13143    }
13144
13145    #[simd_test(enable = "avx512bw,avx512vl")]
13146    unsafe fn test_mm_maskz_mulhi_epi16() {
13147        let a = _mm_set1_epi16(1);
13148        let b = _mm_set1_epi16(1);
13149        let r = _mm_maskz_mulhi_epi16(0, a, b);
13150        assert_eq_m128i(r, _mm_setzero_si128());
13151        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
13152        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13153        assert_eq_m128i(r, e);
13154    }
13155
13156    #[simd_test(enable = "avx512bw")]
13157    unsafe fn test_mm512_mulhrs_epi16() {
13158        let a = _mm512_set1_epi16(1);
13159        let b = _mm512_set1_epi16(1);
13160        let r = _mm512_mulhrs_epi16(a, b);
13161        let e = _mm512_set1_epi16(0);
13162        assert_eq_m512i(r, e);
13163    }
13164
13165    #[simd_test(enable = "avx512bw")]
13166    unsafe fn test_mm512_mask_mulhrs_epi16() {
13167        let a = _mm512_set1_epi16(1);
13168        let b = _mm512_set1_epi16(1);
13169        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
13170        assert_eq_m512i(r, a);
13171        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13172        #[rustfmt::skip]
13173        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13174                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13175        assert_eq_m512i(r, e);
13176    }
13177
13178    #[simd_test(enable = "avx512bw")]
13179    unsafe fn test_mm512_maskz_mulhrs_epi16() {
13180        let a = _mm512_set1_epi16(1);
13181        let b = _mm512_set1_epi16(1);
13182        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
13183        assert_eq_m512i(r, _mm512_setzero_si512());
13184        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13185        #[rustfmt::skip]
13186        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13187                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13188        assert_eq_m512i(r, e);
13189    }
13190
13191    #[simd_test(enable = "avx512bw,avx512vl")]
13192    unsafe fn test_mm256_mask_mulhrs_epi16() {
13193        let a = _mm256_set1_epi16(1);
13194        let b = _mm256_set1_epi16(1);
13195        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
13196        assert_eq_m256i(r, a);
13197        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
13198        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13199        assert_eq_m256i(r, e);
13200    }
13201
13202    #[simd_test(enable = "avx512bw,avx512vl")]
13203    unsafe fn test_mm256_maskz_mulhrs_epi16() {
13204        let a = _mm256_set1_epi16(1);
13205        let b = _mm256_set1_epi16(1);
13206        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
13207        assert_eq_m256i(r, _mm256_setzero_si256());
13208        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
13209        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13210        assert_eq_m256i(r, e);
13211    }
13212
13213    #[simd_test(enable = "avx512bw,avx512vl")]
13214    unsafe fn test_mm_mask_mulhrs_epi16() {
13215        let a = _mm_set1_epi16(1);
13216        let b = _mm_set1_epi16(1);
13217        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
13218        assert_eq_m128i(r, a);
13219        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
13220        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13221        assert_eq_m128i(r, e);
13222    }
13223
13224    #[simd_test(enable = "avx512bw,avx512vl")]
13225    unsafe fn test_mm_maskz_mulhrs_epi16() {
13226        let a = _mm_set1_epi16(1);
13227        let b = _mm_set1_epi16(1);
13228        let r = _mm_maskz_mulhrs_epi16(0, a, b);
13229        assert_eq_m128i(r, _mm_setzero_si128());
13230        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
13231        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13232        assert_eq_m128i(r, e);
13233    }
13234
13235    #[simd_test(enable = "avx512bw")]
13236    unsafe fn test_mm512_mullo_epi16() {
13237        let a = _mm512_set1_epi16(1);
13238        let b = _mm512_set1_epi16(1);
13239        let r = _mm512_mullo_epi16(a, b);
13240        let e = _mm512_set1_epi16(1);
13241        assert_eq_m512i(r, e);
13242    }
13243
13244    #[simd_test(enable = "avx512bw")]
13245    unsafe fn test_mm512_mask_mullo_epi16() {
13246        let a = _mm512_set1_epi16(1);
13247        let b = _mm512_set1_epi16(1);
13248        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
13249        assert_eq_m512i(r, a);
13250        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13251        #[rustfmt::skip]
13252        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13253                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13254        assert_eq_m512i(r, e);
13255    }
13256
13257    #[simd_test(enable = "avx512bw")]
13258    unsafe fn test_mm512_maskz_mullo_epi16() {
13259        let a = _mm512_set1_epi16(1);
13260        let b = _mm512_set1_epi16(1);
13261        let r = _mm512_maskz_mullo_epi16(0, a, b);
13262        assert_eq_m512i(r, _mm512_setzero_si512());
13263        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
13264        #[rustfmt::skip]
13265        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13266                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13267        assert_eq_m512i(r, e);
13268    }
13269
13270    #[simd_test(enable = "avx512bw,avx512vl")]
13271    unsafe fn test_mm256_mask_mullo_epi16() {
13272        let a = _mm256_set1_epi16(1);
13273        let b = _mm256_set1_epi16(1);
13274        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
13275        assert_eq_m256i(r, a);
13276        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
13277        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13278        assert_eq_m256i(r, e);
13279    }
13280
13281    #[simd_test(enable = "avx512bw,avx512vl")]
13282    unsafe fn test_mm256_maskz_mullo_epi16() {
13283        let a = _mm256_set1_epi16(1);
13284        let b = _mm256_set1_epi16(1);
13285        let r = _mm256_maskz_mullo_epi16(0, a, b);
13286        assert_eq_m256i(r, _mm256_setzero_si256());
13287        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
13288        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13289        assert_eq_m256i(r, e);
13290    }
13291
13292    #[simd_test(enable = "avx512bw,avx512vl")]
13293    unsafe fn test_mm_mask_mullo_epi16() {
13294        let a = _mm_set1_epi16(1);
13295        let b = _mm_set1_epi16(1);
13296        let r = _mm_mask_mullo_epi16(a, 0, a, b);
13297        assert_eq_m128i(r, a);
13298        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
13299        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
13300        assert_eq_m128i(r, e);
13301    }
13302
13303    #[simd_test(enable = "avx512bw,avx512vl")]
13304    unsafe fn test_mm_maskz_mullo_epi16() {
13305        let a = _mm_set1_epi16(1);
13306        let b = _mm_set1_epi16(1);
13307        let r = _mm_maskz_mullo_epi16(0, a, b);
13308        assert_eq_m128i(r, _mm_setzero_si128());
13309        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
13310        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
13311        assert_eq_m128i(r, e);
13312    }
13313
13314    #[simd_test(enable = "avx512bw")]
13315    unsafe fn test_mm512_max_epu16() {
13316        #[rustfmt::skip]
13317        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13318                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13319        #[rustfmt::skip]
13320        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13321                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13322        let r = _mm512_max_epu16(a, b);
13323        #[rustfmt::skip]
13324        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13325                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13326        assert_eq_m512i(r, e);
13327    }
13328
13329    #[simd_test(enable = "avx512f")]
13330    unsafe fn test_mm512_mask_max_epu16() {
13331        #[rustfmt::skip]
13332        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13333                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13334        #[rustfmt::skip]
13335        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13336                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13337        let r = _mm512_mask_max_epu16(a, 0, a, b);
13338        assert_eq_m512i(r, a);
13339        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13340        #[rustfmt::skip]
13341        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13342                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13343        assert_eq_m512i(r, e);
13344    }
13345
13346    #[simd_test(enable = "avx512f")]
13347    unsafe fn test_mm512_maskz_max_epu16() {
13348        #[rustfmt::skip]
13349        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13350                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13351        #[rustfmt::skip]
13352        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13353                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13354        let r = _mm512_maskz_max_epu16(0, a, b);
13355        assert_eq_m512i(r, _mm512_setzero_si512());
13356        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
13357        #[rustfmt::skip]
13358        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13359                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13360        assert_eq_m512i(r, e);
13361    }
13362
13363    #[simd_test(enable = "avx512f,avx512vl")]
13364    unsafe fn test_mm256_mask_max_epu16() {
13365        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13366        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13367        let r = _mm256_mask_max_epu16(a, 0, a, b);
13368        assert_eq_m256i(r, a);
13369        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
13370        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13371        assert_eq_m256i(r, e);
13372    }
13373
13374    #[simd_test(enable = "avx512f,avx512vl")]
13375    unsafe fn test_mm256_maskz_max_epu16() {
13376        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13377        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13378        let r = _mm256_maskz_max_epu16(0, a, b);
13379        assert_eq_m256i(r, _mm256_setzero_si256());
13380        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
13381        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13382        assert_eq_m256i(r, e);
13383    }
13384
13385    #[simd_test(enable = "avx512f,avx512vl")]
13386    unsafe fn test_mm_mask_max_epu16() {
13387        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13388        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13389        let r = _mm_mask_max_epu16(a, 0, a, b);
13390        assert_eq_m128i(r, a);
13391        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
13392        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13393        assert_eq_m128i(r, e);
13394    }
13395
13396    #[simd_test(enable = "avx512f,avx512vl")]
13397    unsafe fn test_mm_maskz_max_epu16() {
13398        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13399        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13400        let r = _mm_maskz_max_epu16(0, a, b);
13401        assert_eq_m128i(r, _mm_setzero_si128());
13402        let r = _mm_maskz_max_epu16(0b00001111, a, b);
13403        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13404        assert_eq_m128i(r, e);
13405    }
13406
13407    #[simd_test(enable = "avx512bw")]
13408    unsafe fn test_mm512_max_epu8() {
13409        #[rustfmt::skip]
13410        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13411                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13412                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13413                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13414        #[rustfmt::skip]
13415        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13416                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13417                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13418                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13419        let r = _mm512_max_epu8(a, b);
13420        #[rustfmt::skip]
13421        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13422                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13423                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13424                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13425        assert_eq_m512i(r, e);
13426    }
13427
13428    #[simd_test(enable = "avx512f")]
13429    unsafe fn test_mm512_mask_max_epu8() {
13430        #[rustfmt::skip]
13431        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13432                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13433                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13434                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13435        #[rustfmt::skip]
13436        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13437                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13438                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13439                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13440        let r = _mm512_mask_max_epu8(a, 0, a, b);
13441        assert_eq_m512i(r, a);
13442        let r = _mm512_mask_max_epu8(
13443            a,
13444            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13445            a,
13446            b,
13447        );
13448        #[rustfmt::skip]
13449        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13450                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13451                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13452                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13453        assert_eq_m512i(r, e);
13454    }
13455
13456    #[simd_test(enable = "avx512f")]
13457    unsafe fn test_mm512_maskz_max_epu8() {
13458        #[rustfmt::skip]
13459        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13460                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13461                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13462                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13463        #[rustfmt::skip]
13464        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13465                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13466                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13467                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13468        let r = _mm512_maskz_max_epu8(0, a, b);
13469        assert_eq_m512i(r, _mm512_setzero_si512());
13470        let r = _mm512_maskz_max_epu8(
13471            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13472            a,
13473            b,
13474        );
13475        #[rustfmt::skip]
13476        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13477                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13478                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13479                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13480        assert_eq_m512i(r, e);
13481    }
13482
13483    #[simd_test(enable = "avx512f,avx512vl")]
13484    unsafe fn test_mm256_mask_max_epu8() {
13485        #[rustfmt::skip]
13486        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13487                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13488        #[rustfmt::skip]
13489        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13490                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13491        let r = _mm256_mask_max_epu8(a, 0, a, b);
13492        assert_eq_m256i(r, a);
13493        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13494        #[rustfmt::skip]
13495        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13496                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13497        assert_eq_m256i(r, e);
13498    }
13499
13500    #[simd_test(enable = "avx512f,avx512vl")]
13501    unsafe fn test_mm256_maskz_max_epu8() {
13502        #[rustfmt::skip]
13503        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13504                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13505        #[rustfmt::skip]
13506        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13507                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13508        let r = _mm256_maskz_max_epu8(0, a, b);
13509        assert_eq_m256i(r, _mm256_setzero_si256());
13510        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
13511        #[rustfmt::skip]
13512        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13513                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13514        assert_eq_m256i(r, e);
13515    }
13516
13517    #[simd_test(enable = "avx512f,avx512vl")]
13518    unsafe fn test_mm_mask_max_epu8() {
13519        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13520        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13521        let r = _mm_mask_max_epu8(a, 0, a, b);
13522        assert_eq_m128i(r, a);
13523        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
13524        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13525        assert_eq_m128i(r, e);
13526    }
13527
13528    #[simd_test(enable = "avx512f,avx512vl")]
13529    unsafe fn test_mm_maskz_max_epu8() {
13530        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13531        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13532        let r = _mm_maskz_max_epu8(0, a, b);
13533        assert_eq_m128i(r, _mm_setzero_si128());
13534        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
13535        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13536        assert_eq_m128i(r, e);
13537    }
13538
13539    #[simd_test(enable = "avx512bw")]
13540    unsafe fn test_mm512_max_epi16() {
13541        #[rustfmt::skip]
13542        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13543                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13544        #[rustfmt::skip]
13545        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13546                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13547        let r = _mm512_max_epi16(a, b);
13548        #[rustfmt::skip]
13549        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13550                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13551        assert_eq_m512i(r, e);
13552    }
13553
13554    #[simd_test(enable = "avx512f")]
13555    unsafe fn test_mm512_mask_max_epi16() {
13556        #[rustfmt::skip]
13557        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13558                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13559        #[rustfmt::skip]
13560        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13561                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13562        let r = _mm512_mask_max_epi16(a, 0, a, b);
13563        assert_eq_m512i(r, a);
13564        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13565        #[rustfmt::skip]
13566        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13567                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13568        assert_eq_m512i(r, e);
13569    }
13570
13571    #[simd_test(enable = "avx512f")]
13572    unsafe fn test_mm512_maskz_max_epi16() {
13573        #[rustfmt::skip]
13574        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13575                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13576        #[rustfmt::skip]
13577        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13578                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13579        let r = _mm512_maskz_max_epi16(0, a, b);
13580        assert_eq_m512i(r, _mm512_setzero_si512());
13581        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
13582        #[rustfmt::skip]
13583        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13584                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13585        assert_eq_m512i(r, e);
13586    }
13587
13588    #[simd_test(enable = "avx512f,avx512vl")]
13589    unsafe fn test_mm256_mask_max_epi16() {
13590        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13591        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13592        let r = _mm256_mask_max_epi16(a, 0, a, b);
13593        assert_eq_m256i(r, a);
13594        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
13595        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13596        assert_eq_m256i(r, e);
13597    }
13598
13599    #[simd_test(enable = "avx512f,avx512vl")]
13600    unsafe fn test_mm256_maskz_max_epi16() {
13601        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13602        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13603        let r = _mm256_maskz_max_epi16(0, a, b);
13604        assert_eq_m256i(r, _mm256_setzero_si256());
13605        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
13606        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13607        assert_eq_m256i(r, e);
13608    }
13609
13610    #[simd_test(enable = "avx512f,avx512vl")]
13611    unsafe fn test_mm_mask_max_epi16() {
13612        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13613        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13614        let r = _mm_mask_max_epi16(a, 0, a, b);
13615        assert_eq_m128i(r, a);
13616        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
13617        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13618        assert_eq_m128i(r, e);
13619    }
13620
13621    #[simd_test(enable = "avx512f,avx512vl")]
13622    unsafe fn test_mm_maskz_max_epi16() {
13623        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13624        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13625        let r = _mm_maskz_max_epi16(0, a, b);
13626        assert_eq_m128i(r, _mm_setzero_si128());
13627        let r = _mm_maskz_max_epi16(0b00001111, a, b);
13628        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13629        assert_eq_m128i(r, e);
13630    }
13631
13632    #[simd_test(enable = "avx512bw")]
13633    unsafe fn test_mm512_max_epi8() {
13634        #[rustfmt::skip]
13635        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13636                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13637                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13638                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13639        #[rustfmt::skip]
13640        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13641                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13642                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13643                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13644        let r = _mm512_max_epi8(a, b);
13645        #[rustfmt::skip]
13646        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13647                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13648                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13649                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13650        assert_eq_m512i(r, e);
13651    }
13652
13653    #[simd_test(enable = "avx512f")]
13654    unsafe fn test_mm512_mask_max_epi8() {
13655        #[rustfmt::skip]
13656        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13657                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13658                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13659                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13660        #[rustfmt::skip]
13661        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13662                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13663                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13664                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13665        let r = _mm512_mask_max_epi8(a, 0, a, b);
13666        assert_eq_m512i(r, a);
13667        let r = _mm512_mask_max_epi8(
13668            a,
13669            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13670            a,
13671            b,
13672        );
13673        #[rustfmt::skip]
13674        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13675                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13676                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13677                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13678        assert_eq_m512i(r, e);
13679    }
13680
13681    #[simd_test(enable = "avx512f")]
13682    unsafe fn test_mm512_maskz_max_epi8() {
13683        #[rustfmt::skip]
13684        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13685                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13686                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13687                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13688        #[rustfmt::skip]
13689        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13690                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13691                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13692                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13693        let r = _mm512_maskz_max_epi8(0, a, b);
13694        assert_eq_m512i(r, _mm512_setzero_si512());
13695        let r = _mm512_maskz_max_epi8(
13696            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13697            a,
13698            b,
13699        );
13700        #[rustfmt::skip]
13701        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13702                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13703                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13704                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13705        assert_eq_m512i(r, e);
13706    }
13707
13708    #[simd_test(enable = "avx512f,avx512vl")]
13709    unsafe fn test_mm256_mask_max_epi8() {
13710        #[rustfmt::skip]
13711        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13712                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13713        #[rustfmt::skip]
13714        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13715                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13716        let r = _mm256_mask_max_epi8(a, 0, a, b);
13717        assert_eq_m256i(r, a);
13718        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13719        #[rustfmt::skip]
13720        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13721                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13722        assert_eq_m256i(r, e);
13723    }
13724
13725    #[simd_test(enable = "avx512f,avx512vl")]
13726    unsafe fn test_mm256_maskz_max_epi8() {
13727        #[rustfmt::skip]
13728        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13729                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13730        #[rustfmt::skip]
13731        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13732                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13733        let r = _mm256_maskz_max_epi8(0, a, b);
13734        assert_eq_m256i(r, _mm256_setzero_si256());
13735        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
13736        #[rustfmt::skip]
13737        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13738                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13739        assert_eq_m256i(r, e);
13740    }
13741
13742    #[simd_test(enable = "avx512f,avx512vl")]
13743    unsafe fn test_mm_mask_max_epi8() {
13744        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13745        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13746        let r = _mm_mask_max_epi8(a, 0, a, b);
13747        assert_eq_m128i(r, a);
13748        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
13749        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13750        assert_eq_m128i(r, e);
13751    }
13752
13753    #[simd_test(enable = "avx512f,avx512vl")]
13754    unsafe fn test_mm_maskz_max_epi8() {
13755        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13756        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13757        let r = _mm_maskz_max_epi8(0, a, b);
13758        assert_eq_m128i(r, _mm_setzero_si128());
13759        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
13760        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13761        assert_eq_m128i(r, e);
13762    }
13763
13764    #[simd_test(enable = "avx512bw")]
13765    unsafe fn test_mm512_min_epu16() {
13766        #[rustfmt::skip]
13767        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13768                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13769        #[rustfmt::skip]
13770        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13771                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13772        let r = _mm512_min_epu16(a, b);
13773        #[rustfmt::skip]
13774        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13775                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13776        assert_eq_m512i(r, e);
13777    }
13778
13779    #[simd_test(enable = "avx512f")]
13780    unsafe fn test_mm512_mask_min_epu16() {
13781        #[rustfmt::skip]
13782        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13783                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13784        #[rustfmt::skip]
13785        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13786                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13787        let r = _mm512_mask_min_epu16(a, 0, a, b);
13788        assert_eq_m512i(r, a);
13789        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13790        #[rustfmt::skip]
13791        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13792                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13793        assert_eq_m512i(r, e);
13794    }
13795
13796    #[simd_test(enable = "avx512f")]
13797    unsafe fn test_mm512_maskz_min_epu16() {
13798        #[rustfmt::skip]
13799        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13800                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13801        #[rustfmt::skip]
13802        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13803                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13804        let r = _mm512_maskz_min_epu16(0, a, b);
13805        assert_eq_m512i(r, _mm512_setzero_si512());
13806        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
13807        #[rustfmt::skip]
13808        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13809                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13810        assert_eq_m512i(r, e);
13811    }
13812
13813    #[simd_test(enable = "avx512f,avx512vl")]
13814    unsafe fn test_mm256_mask_min_epu16() {
13815        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13816        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13817        let r = _mm256_mask_min_epu16(a, 0, a, b);
13818        assert_eq_m256i(r, a);
13819        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
13820        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13821        assert_eq_m256i(r, e);
13822    }
13823
13824    #[simd_test(enable = "avx512f,avx512vl")]
13825    unsafe fn test_mm256_maskz_min_epu16() {
13826        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13827        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13828        let r = _mm256_maskz_min_epu16(0, a, b);
13829        assert_eq_m256i(r, _mm256_setzero_si256());
13830        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
13831        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13832        assert_eq_m256i(r, e);
13833    }
13834
13835    #[simd_test(enable = "avx512f,avx512vl")]
13836    unsafe fn test_mm_mask_min_epu16() {
13837        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13838        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13839        let r = _mm_mask_min_epu16(a, 0, a, b);
13840        assert_eq_m128i(r, a);
13841        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
13842        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
13843        assert_eq_m128i(r, e);
13844    }
13845
13846    #[simd_test(enable = "avx512f,avx512vl")]
13847    unsafe fn test_mm_maskz_min_epu16() {
13848        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13849        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13850        let r = _mm_maskz_min_epu16(0, a, b);
13851        assert_eq_m128i(r, _mm_setzero_si128());
13852        let r = _mm_maskz_min_epu16(0b00001111, a, b);
13853        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
13854        assert_eq_m128i(r, e);
13855    }
13856
13857    #[simd_test(enable = "avx512bw")]
13858    unsafe fn test_mm512_min_epu8() {
13859        #[rustfmt::skip]
13860        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13861                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13862                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13863                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13864        #[rustfmt::skip]
13865        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13866                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13867                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13868                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13869        let r = _mm512_min_epu8(a, b);
13870        #[rustfmt::skip]
13871        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13872                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13873                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13874                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13875        assert_eq_m512i(r, e);
13876    }
13877
13878    #[simd_test(enable = "avx512f")]
13879    unsafe fn test_mm512_mask_min_epu8() {
13880        #[rustfmt::skip]
13881        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13882                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13883                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13884                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13885        #[rustfmt::skip]
13886        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13887                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13888                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13889                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13890        let r = _mm512_mask_min_epu8(a, 0, a, b);
13891        assert_eq_m512i(r, a);
13892        let r = _mm512_mask_min_epu8(
13893            a,
13894            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13895            a,
13896            b,
13897        );
13898        #[rustfmt::skip]
13899        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13900                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13901                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13902                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13903        assert_eq_m512i(r, e);
13904    }
13905
13906    #[simd_test(enable = "avx512f")]
13907    unsafe fn test_mm512_maskz_min_epu8() {
13908        #[rustfmt::skip]
13909        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13910                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13911                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13912                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13913        #[rustfmt::skip]
13914        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13915                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13916                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13917                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13918        let r = _mm512_maskz_min_epu8(0, a, b);
13919        assert_eq_m512i(r, _mm512_setzero_si512());
13920        let r = _mm512_maskz_min_epu8(
13921            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13922            a,
13923            b,
13924        );
13925        #[rustfmt::skip]
13926        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13927                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13928                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13929                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13930        assert_eq_m512i(r, e);
13931    }
13932
13933    #[simd_test(enable = "avx512f,avx512vl")]
13934    unsafe fn test_mm256_mask_min_epu8() {
13935        #[rustfmt::skip]
13936        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13937                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13938        #[rustfmt::skip]
13939        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13940                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13941        let r = _mm256_mask_min_epu8(a, 0, a, b);
13942        assert_eq_m256i(r, a);
13943        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13944        #[rustfmt::skip]
13945        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13946                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13947        assert_eq_m256i(r, e);
13948    }
13949
13950    #[simd_test(enable = "avx512f,avx512vl")]
13951    unsafe fn test_mm256_maskz_min_epu8() {
13952        #[rustfmt::skip]
13953        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13954                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13955        #[rustfmt::skip]
13956        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13957                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13958        let r = _mm256_maskz_min_epu8(0, a, b);
13959        assert_eq_m256i(r, _mm256_setzero_si256());
13960        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
13961        #[rustfmt::skip]
13962        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13963                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13964        assert_eq_m256i(r, e);
13965    }
13966
13967    #[simd_test(enable = "avx512f,avx512vl")]
13968    unsafe fn test_mm_mask_min_epu8() {
13969        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13970        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13971        let r = _mm_mask_min_epu8(a, 0, a, b);
13972        assert_eq_m128i(r, a);
13973        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
13974        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13975        assert_eq_m128i(r, e);
13976    }
13977
13978    #[simd_test(enable = "avx512f,avx512vl")]
13979    unsafe fn test_mm_maskz_min_epu8() {
13980        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13981        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13982        let r = _mm_maskz_min_epu8(0, a, b);
13983        assert_eq_m128i(r, _mm_setzero_si128());
13984        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
13985        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13986        assert_eq_m128i(r, e);
13987    }
13988
13989    #[simd_test(enable = "avx512bw")]
13990    unsafe fn test_mm512_min_epi16() {
13991        #[rustfmt::skip]
13992        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13993                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13994        #[rustfmt::skip]
13995        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13996                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13997        let r = _mm512_min_epi16(a, b);
13998        #[rustfmt::skip]
13999        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14000                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14001        assert_eq_m512i(r, e);
14002    }
14003
14004    #[simd_test(enable = "avx512f")]
14005    unsafe fn test_mm512_mask_min_epi16() {
14006        #[rustfmt::skip]
14007        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14008                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14009        #[rustfmt::skip]
14010        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14011                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14012        let r = _mm512_mask_min_epi16(a, 0, a, b);
14013        assert_eq_m512i(r, a);
14014        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14015        #[rustfmt::skip]
14016        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14017                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14018        assert_eq_m512i(r, e);
14019    }
14020
14021    #[simd_test(enable = "avx512f")]
14022    unsafe fn test_mm512_maskz_min_epi16() {
14023        #[rustfmt::skip]
14024        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14025                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14026        #[rustfmt::skip]
14027        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14028                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14029        let r = _mm512_maskz_min_epi16(0, a, b);
14030        assert_eq_m512i(r, _mm512_setzero_si512());
14031        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14032        #[rustfmt::skip]
14033        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14034                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14035        assert_eq_m512i(r, e);
14036    }
14037
14038    #[simd_test(enable = "avx512f,avx512vl")]
14039    unsafe fn test_mm256_mask_min_epi16() {
14040        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14041        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14042        let r = _mm256_mask_min_epi16(a, 0, a, b);
14043        assert_eq_m256i(r, a);
14044        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14045        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14046        assert_eq_m256i(r, e);
14047    }
14048
14049    #[simd_test(enable = "avx512f,avx512vl")]
14050    unsafe fn test_mm256_maskz_min_epi16() {
14051        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14052        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14053        let r = _mm256_maskz_min_epi16(0, a, b);
14054        assert_eq_m256i(r, _mm256_setzero_si256());
14055        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14056        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14057        assert_eq_m256i(r, e);
14058    }
14059
14060    #[simd_test(enable = "avx512f,avx512vl")]
14061    unsafe fn test_mm_mask_min_epi16() {
14062        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14063        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14064        let r = _mm_mask_min_epi16(a, 0, a, b);
14065        assert_eq_m128i(r, a);
14066        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14067        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14068        assert_eq_m128i(r, e);
14069    }
14070
14071    #[simd_test(enable = "avx512f,avx512vl")]
14072    unsafe fn test_mm_maskz_min_epi16() {
14073        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14074        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14075        let r = _mm_maskz_min_epi16(0, a, b);
14076        assert_eq_m128i(r, _mm_setzero_si128());
14077        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14078        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14079        assert_eq_m128i(r, e);
14080    }
14081
14082    #[simd_test(enable = "avx512bw")]
14083    unsafe fn test_mm512_min_epi8() {
14084        #[rustfmt::skip]
14085        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14086                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14087                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14088                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14089        #[rustfmt::skip]
14090        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14091                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14092                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14093                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14094        let r = _mm512_min_epi8(a, b);
14095        #[rustfmt::skip]
14096        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14097                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14098                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14099                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14100        assert_eq_m512i(r, e);
14101    }
14102
14103    #[simd_test(enable = "avx512f")]
14104    unsafe fn test_mm512_mask_min_epi8() {
14105        #[rustfmt::skip]
14106        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14107                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14108                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14109                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14110        #[rustfmt::skip]
14111        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14112                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14113                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14114                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14115        let r = _mm512_mask_min_epi8(a, 0, a, b);
14116        assert_eq_m512i(r, a);
14117        let r = _mm512_mask_min_epi8(
14118            a,
14119            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14120            a,
14121            b,
14122        );
14123        #[rustfmt::skip]
14124        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14125                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14126                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14127                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14128        assert_eq_m512i(r, e);
14129    }
14130
14131    #[simd_test(enable = "avx512f")]
14132    unsafe fn test_mm512_maskz_min_epi8() {
14133        #[rustfmt::skip]
14134        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14135                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14136                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14137                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14138        #[rustfmt::skip]
14139        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14140                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14141                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14142                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14143        let r = _mm512_maskz_min_epi8(0, a, b);
14144        assert_eq_m512i(r, _mm512_setzero_si512());
14145        let r = _mm512_maskz_min_epi8(
14146            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14147            a,
14148            b,
14149        );
14150        #[rustfmt::skip]
14151        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14152                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14153                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14154                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14155        assert_eq_m512i(r, e);
14156    }
14157
14158    #[simd_test(enable = "avx512f,avx512vl")]
14159    unsafe fn test_mm256_mask_min_epi8() {
14160        #[rustfmt::skip]
14161        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14162                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14163        #[rustfmt::skip]
14164        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14165                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14166        let r = _mm256_mask_min_epi8(a, 0, a, b);
14167        assert_eq_m256i(r, a);
14168        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14169        #[rustfmt::skip]
14170        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14171                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14172        assert_eq_m256i(r, e);
14173    }
14174
14175    #[simd_test(enable = "avx512f,avx512vl")]
14176    unsafe fn test_mm256_maskz_min_epi8() {
14177        #[rustfmt::skip]
14178        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14179                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14180        #[rustfmt::skip]
14181        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14182                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14183        let r = _mm256_maskz_min_epi8(0, a, b);
14184        assert_eq_m256i(r, _mm256_setzero_si256());
14185        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
14186        #[rustfmt::skip]
14187        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14188                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14189        assert_eq_m256i(r, e);
14190    }
14191
14192    #[simd_test(enable = "avx512f,avx512vl")]
14193    unsafe fn test_mm_mask_min_epi8() {
14194        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14195        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14196        let r = _mm_mask_min_epi8(a, 0, a, b);
14197        assert_eq_m128i(r, a);
14198        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
14199        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14200        assert_eq_m128i(r, e);
14201    }
14202
14203    #[simd_test(enable = "avx512f,avx512vl")]
14204    unsafe fn test_mm_maskz_min_epi8() {
14205        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14206        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14207        let r = _mm_maskz_min_epi8(0, a, b);
14208        assert_eq_m128i(r, _mm_setzero_si128());
14209        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
14210        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14211        assert_eq_m128i(r, e);
14212    }
14213
14214    #[simd_test(enable = "avx512bw")]
14215    unsafe fn test_mm512_cmplt_epu16_mask() {
14216        let a = _mm512_set1_epi16(-2);
14217        let b = _mm512_set1_epi16(-1);
14218        let m = _mm512_cmplt_epu16_mask(a, b);
14219        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14220    }
14221
14222    #[simd_test(enable = "avx512bw")]
14223    unsafe fn test_mm512_mask_cmplt_epu16_mask() {
14224        let a = _mm512_set1_epi16(-2);
14225        let b = _mm512_set1_epi16(-1);
14226        let mask = 0b01010101_01010101_01010101_01010101;
14227        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
14228        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14229    }
14230
14231    #[simd_test(enable = "avx512bw,avx512vl")]
14232    unsafe fn test_mm256_cmplt_epu16_mask() {
14233        let a = _mm256_set1_epi16(-2);
14234        let b = _mm256_set1_epi16(-1);
14235        let m = _mm256_cmplt_epu16_mask(a, b);
14236        assert_eq!(m, 0b11111111_11111111);
14237    }
14238
14239    #[simd_test(enable = "avx512bw,avx512vl")]
14240    unsafe fn test_mm256_mask_cmplt_epu16_mask() {
14241        let a = _mm256_set1_epi16(-2);
14242        let b = _mm256_set1_epi16(-1);
14243        let mask = 0b01010101_01010101;
14244        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
14245        assert_eq!(r, 0b01010101_01010101);
14246    }
14247
14248    #[simd_test(enable = "avx512bw,avx512vl")]
14249    unsafe fn test_mm_cmplt_epu16_mask() {
14250        let a = _mm_set1_epi16(-2);
14251        let b = _mm_set1_epi16(-1);
14252        let m = _mm_cmplt_epu16_mask(a, b);
14253        assert_eq!(m, 0b11111111);
14254    }
14255
14256    #[simd_test(enable = "avx512bw,avx512vl")]
14257    unsafe fn test_mm_mask_cmplt_epu16_mask() {
14258        let a = _mm_set1_epi16(-2);
14259        let b = _mm_set1_epi16(-1);
14260        let mask = 0b01010101;
14261        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
14262        assert_eq!(r, 0b01010101);
14263    }
14264
14265    #[simd_test(enable = "avx512bw")]
14266    unsafe fn test_mm512_cmplt_epu8_mask() {
14267        let a = _mm512_set1_epi8(-2);
14268        let b = _mm512_set1_epi8(-1);
14269        let m = _mm512_cmplt_epu8_mask(a, b);
14270        assert_eq!(
14271            m,
14272            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14273        );
14274    }
14275
14276    #[simd_test(enable = "avx512bw")]
14277    unsafe fn test_mm512_mask_cmplt_epu8_mask() {
14278        let a = _mm512_set1_epi8(-2);
14279        let b = _mm512_set1_epi8(-1);
14280        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14281        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
14282        assert_eq!(
14283            r,
14284            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14285        );
14286    }
14287
14288    #[simd_test(enable = "avx512bw,avx512vl")]
14289    unsafe fn test_mm256_cmplt_epu8_mask() {
14290        let a = _mm256_set1_epi8(-2);
14291        let b = _mm256_set1_epi8(-1);
14292        let m = _mm256_cmplt_epu8_mask(a, b);
14293        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14294    }
14295
14296    #[simd_test(enable = "avx512bw,avx512vl")]
14297    unsafe fn test_mm256_mask_cmplt_epu8_mask() {
14298        let a = _mm256_set1_epi8(-2);
14299        let b = _mm256_set1_epi8(-1);
14300        let mask = 0b01010101_01010101_01010101_01010101;
14301        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
14302        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14303    }
14304
14305    #[simd_test(enable = "avx512bw,avx512vl")]
14306    unsafe fn test_mm_cmplt_epu8_mask() {
14307        let a = _mm_set1_epi8(-2);
14308        let b = _mm_set1_epi8(-1);
14309        let m = _mm_cmplt_epu8_mask(a, b);
14310        assert_eq!(m, 0b11111111_11111111);
14311    }
14312
14313    #[simd_test(enable = "avx512bw,avx512vl")]
14314    unsafe fn test_mm_mask_cmplt_epu8_mask() {
14315        let a = _mm_set1_epi8(-2);
14316        let b = _mm_set1_epi8(-1);
14317        let mask = 0b01010101_01010101;
14318        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
14319        assert_eq!(r, 0b01010101_01010101);
14320    }
14321
14322    #[simd_test(enable = "avx512bw")]
14323    unsafe fn test_mm512_cmplt_epi16_mask() {
14324        let a = _mm512_set1_epi16(-2);
14325        let b = _mm512_set1_epi16(-1);
14326        let m = _mm512_cmplt_epi16_mask(a, b);
14327        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14328    }
14329
14330    #[simd_test(enable = "avx512bw")]
14331    unsafe fn test_mm512_mask_cmplt_epi16_mask() {
14332        let a = _mm512_set1_epi16(-2);
14333        let b = _mm512_set1_epi16(-1);
14334        let mask = 0b01010101_01010101_01010101_01010101;
14335        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
14336        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14337    }
14338
14339    #[simd_test(enable = "avx512bw,avx512vl")]
14340    unsafe fn test_mm256_cmplt_epi16_mask() {
14341        let a = _mm256_set1_epi16(-2);
14342        let b = _mm256_set1_epi16(-1);
14343        let m = _mm256_cmplt_epi16_mask(a, b);
14344        assert_eq!(m, 0b11111111_11111111);
14345    }
14346
14347    #[simd_test(enable = "avx512bw,avx512vl")]
14348    unsafe fn test_mm256_mask_cmplt_epi16_mask() {
14349        let a = _mm256_set1_epi16(-2);
14350        let b = _mm256_set1_epi16(-1);
14351        let mask = 0b01010101_01010101;
14352        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
14353        assert_eq!(r, 0b01010101_01010101);
14354    }
14355
14356    #[simd_test(enable = "avx512bw,avx512vl")]
14357    unsafe fn test_mm_cmplt_epi16_mask() {
14358        let a = _mm_set1_epi16(-2);
14359        let b = _mm_set1_epi16(-1);
14360        let m = _mm_cmplt_epi16_mask(a, b);
14361        assert_eq!(m, 0b11111111);
14362    }
14363
14364    #[simd_test(enable = "avx512bw,avx512vl")]
14365    unsafe fn test_mm_mask_cmplt_epi16_mask() {
14366        let a = _mm_set1_epi16(-2);
14367        let b = _mm_set1_epi16(-1);
14368        let mask = 0b01010101;
14369        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
14370        assert_eq!(r, 0b01010101);
14371    }
14372
14373    #[simd_test(enable = "avx512bw")]
14374    unsafe fn test_mm512_cmplt_epi8_mask() {
14375        let a = _mm512_set1_epi8(-2);
14376        let b = _mm512_set1_epi8(-1);
14377        let m = _mm512_cmplt_epi8_mask(a, b);
14378        assert_eq!(
14379            m,
14380            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14381        );
14382    }
14383
14384    #[simd_test(enable = "avx512bw")]
14385    unsafe fn test_mm512_mask_cmplt_epi8_mask() {
14386        let a = _mm512_set1_epi8(-2);
14387        let b = _mm512_set1_epi8(-1);
14388        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14389        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
14390        assert_eq!(
14391            r,
14392            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14393        );
14394    }
14395
14396    #[simd_test(enable = "avx512bw,avx512vl")]
14397    unsafe fn test_mm256_cmplt_epi8_mask() {
14398        let a = _mm256_set1_epi8(-2);
14399        let b = _mm256_set1_epi8(-1);
14400        let m = _mm256_cmplt_epi8_mask(a, b);
14401        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14402    }
14403
14404    #[simd_test(enable = "avx512bw,avx512vl")]
14405    unsafe fn test_mm256_mask_cmplt_epi8_mask() {
14406        let a = _mm256_set1_epi8(-2);
14407        let b = _mm256_set1_epi8(-1);
14408        let mask = 0b01010101_01010101_01010101_01010101;
14409        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
14410        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14411    }
14412
14413    #[simd_test(enable = "avx512bw,avx512vl")]
14414    unsafe fn test_mm_cmplt_epi8_mask() {
14415        let a = _mm_set1_epi8(-2);
14416        let b = _mm_set1_epi8(-1);
14417        let m = _mm_cmplt_epi8_mask(a, b);
14418        assert_eq!(m, 0b11111111_11111111);
14419    }
14420
14421    #[simd_test(enable = "avx512bw,avx512vl")]
14422    unsafe fn test_mm_mask_cmplt_epi8_mask() {
14423        let a = _mm_set1_epi8(-2);
14424        let b = _mm_set1_epi8(-1);
14425        let mask = 0b01010101_01010101;
14426        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
14427        assert_eq!(r, 0b01010101_01010101);
14428    }
14429
14430    #[simd_test(enable = "avx512bw")]
14431    unsafe fn test_mm512_cmpgt_epu16_mask() {
14432        let a = _mm512_set1_epi16(2);
14433        let b = _mm512_set1_epi16(1);
14434        let m = _mm512_cmpgt_epu16_mask(a, b);
14435        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14436    }
14437
14438    #[simd_test(enable = "avx512bw")]
14439    unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
14440        let a = _mm512_set1_epi16(2);
14441        let b = _mm512_set1_epi16(1);
14442        let mask = 0b01010101_01010101_01010101_01010101;
14443        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
14444        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14445    }
14446
14447    #[simd_test(enable = "avx512bw,avx512vl")]
14448    unsafe fn test_mm256_cmpgt_epu16_mask() {
14449        let a = _mm256_set1_epi16(2);
14450        let b = _mm256_set1_epi16(1);
14451        let m = _mm256_cmpgt_epu16_mask(a, b);
14452        assert_eq!(m, 0b11111111_11111111);
14453    }
14454
14455    #[simd_test(enable = "avx512bw,avx512vl")]
14456    unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
14457        let a = _mm256_set1_epi16(2);
14458        let b = _mm256_set1_epi16(1);
14459        let mask = 0b01010101_01010101;
14460        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
14461        assert_eq!(r, 0b01010101_01010101);
14462    }
14463
14464    #[simd_test(enable = "avx512bw,avx512vl")]
14465    unsafe fn test_mm_cmpgt_epu16_mask() {
14466        let a = _mm_set1_epi16(2);
14467        let b = _mm_set1_epi16(1);
14468        let m = _mm_cmpgt_epu16_mask(a, b);
14469        assert_eq!(m, 0b11111111);
14470    }
14471
14472    #[simd_test(enable = "avx512bw,avx512vl")]
14473    unsafe fn test_mm_mask_cmpgt_epu16_mask() {
14474        let a = _mm_set1_epi16(2);
14475        let b = _mm_set1_epi16(1);
14476        let mask = 0b01010101;
14477        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
14478        assert_eq!(r, 0b01010101);
14479    }
14480
14481    #[simd_test(enable = "avx512bw")]
14482    unsafe fn test_mm512_cmpgt_epu8_mask() {
14483        let a = _mm512_set1_epi8(2);
14484        let b = _mm512_set1_epi8(1);
14485        let m = _mm512_cmpgt_epu8_mask(a, b);
14486        assert_eq!(
14487            m,
14488            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14489        );
14490    }
14491
14492    #[simd_test(enable = "avx512bw")]
14493    unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
14494        let a = _mm512_set1_epi8(2);
14495        let b = _mm512_set1_epi8(1);
14496        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14497        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
14498        assert_eq!(
14499            r,
14500            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14501        );
14502    }
14503
14504    #[simd_test(enable = "avx512bw,avx512vl")]
14505    unsafe fn test_mm256_cmpgt_epu8_mask() {
14506        let a = _mm256_set1_epi8(2);
14507        let b = _mm256_set1_epi8(1);
14508        let m = _mm256_cmpgt_epu8_mask(a, b);
14509        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14510    }
14511
14512    #[simd_test(enable = "avx512bw,avx512vl")]
14513    unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
14514        let a = _mm256_set1_epi8(2);
14515        let b = _mm256_set1_epi8(1);
14516        let mask = 0b01010101_01010101_01010101_01010101;
14517        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
14518        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14519    }
14520
14521    #[simd_test(enable = "avx512bw,avx512vl")]
14522    unsafe fn test_mm_cmpgt_epu8_mask() {
14523        let a = _mm_set1_epi8(2);
14524        let b = _mm_set1_epi8(1);
14525        let m = _mm_cmpgt_epu8_mask(a, b);
14526        assert_eq!(m, 0b11111111_11111111);
14527    }
14528
14529    #[simd_test(enable = "avx512bw,avx512vl")]
14530    unsafe fn test_mm_mask_cmpgt_epu8_mask() {
14531        let a = _mm_set1_epi8(2);
14532        let b = _mm_set1_epi8(1);
14533        let mask = 0b01010101_01010101;
14534        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
14535        assert_eq!(r, 0b01010101_01010101);
14536    }
14537
14538    #[simd_test(enable = "avx512bw")]
14539    unsafe fn test_mm512_cmpgt_epi16_mask() {
14540        let a = _mm512_set1_epi16(2);
14541        let b = _mm512_set1_epi16(-1);
14542        let m = _mm512_cmpgt_epi16_mask(a, b);
14543        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14544    }
14545
14546    #[simd_test(enable = "avx512bw")]
14547    unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
14548        let a = _mm512_set1_epi16(2);
14549        let b = _mm512_set1_epi16(-1);
14550        let mask = 0b01010101_01010101_01010101_01010101;
14551        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
14552        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14553    }
14554
14555    #[simd_test(enable = "avx512bw,avx512vl")]
14556    unsafe fn test_mm256_cmpgt_epi16_mask() {
14557        let a = _mm256_set1_epi16(2);
14558        let b = _mm256_set1_epi16(-1);
14559        let m = _mm256_cmpgt_epi16_mask(a, b);
14560        assert_eq!(m, 0b11111111_11111111);
14561    }
14562
14563    #[simd_test(enable = "avx512bw,avx512vl")]
14564    unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
14565        let a = _mm256_set1_epi16(2);
14566        let b = _mm256_set1_epi16(-1);
14567        let mask = 0b001010101_01010101;
14568        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
14569        assert_eq!(r, 0b01010101_01010101);
14570    }
14571
14572    #[simd_test(enable = "avx512bw,avx512vl")]
14573    unsafe fn test_mm_cmpgt_epi16_mask() {
14574        let a = _mm_set1_epi16(2);
14575        let b = _mm_set1_epi16(-1);
14576        let m = _mm_cmpgt_epi16_mask(a, b);
14577        assert_eq!(m, 0b11111111);
14578    }
14579
14580    #[simd_test(enable = "avx512bw,avx512vl")]
14581    unsafe fn test_mm_mask_cmpgt_epi16_mask() {
14582        let a = _mm_set1_epi16(2);
14583        let b = _mm_set1_epi16(-1);
14584        let mask = 0b01010101;
14585        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
14586        assert_eq!(r, 0b01010101);
14587    }
14588
14589    #[simd_test(enable = "avx512bw")]
14590    unsafe fn test_mm512_cmpgt_epi8_mask() {
14591        let a = _mm512_set1_epi8(2);
14592        let b = _mm512_set1_epi8(-1);
14593        let m = _mm512_cmpgt_epi8_mask(a, b);
14594        assert_eq!(
14595            m,
14596            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14597        );
14598    }
14599
14600    #[simd_test(enable = "avx512bw")]
14601    unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
14602        let a = _mm512_set1_epi8(2);
14603        let b = _mm512_set1_epi8(-1);
14604        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14605        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
14606        assert_eq!(
14607            r,
14608            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14609        );
14610    }
14611
14612    #[simd_test(enable = "avx512bw,avx512vl")]
14613    unsafe fn test_mm256_cmpgt_epi8_mask() {
14614        let a = _mm256_set1_epi8(2);
14615        let b = _mm256_set1_epi8(-1);
14616        let m = _mm256_cmpgt_epi8_mask(a, b);
14617        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14618    }
14619
14620    #[simd_test(enable = "avx512bw,avx512vl")]
14621    unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
14622        let a = _mm256_set1_epi8(2);
14623        let b = _mm256_set1_epi8(-1);
14624        let mask = 0b01010101_01010101_01010101_01010101;
14625        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
14626        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14627    }
14628
14629    #[simd_test(enable = "avx512bw,avx512vl")]
14630    unsafe fn test_mm_cmpgt_epi8_mask() {
14631        let a = _mm_set1_epi8(2);
14632        let b = _mm_set1_epi8(-1);
14633        let m = _mm_cmpgt_epi8_mask(a, b);
14634        assert_eq!(m, 0b11111111_11111111);
14635    }
14636
14637    #[simd_test(enable = "avx512bw,avx512vl")]
14638    unsafe fn test_mm_mask_cmpgt_epi8_mask() {
14639        let a = _mm_set1_epi8(2);
14640        let b = _mm_set1_epi8(-1);
14641        let mask = 0b01010101_01010101;
14642        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
14643        assert_eq!(r, 0b01010101_01010101);
14644    }
14645
14646    #[simd_test(enable = "avx512bw")]
14647    unsafe fn test_mm512_cmple_epu16_mask() {
14648        let a = _mm512_set1_epi16(-1);
14649        let b = _mm512_set1_epi16(-1);
14650        let m = _mm512_cmple_epu16_mask(a, b);
14651        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14652    }
14653
14654    #[simd_test(enable = "avx512bw")]
14655    unsafe fn test_mm512_mask_cmple_epu16_mask() {
14656        let a = _mm512_set1_epi16(-1);
14657        let b = _mm512_set1_epi16(-1);
14658        let mask = 0b01010101_01010101_01010101_01010101;
14659        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
14660        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14661    }
14662
14663    #[simd_test(enable = "avx512bw,avx512vl")]
14664    unsafe fn test_mm256_cmple_epu16_mask() {
14665        let a = _mm256_set1_epi16(-1);
14666        let b = _mm256_set1_epi16(-1);
14667        let m = _mm256_cmple_epu16_mask(a, b);
14668        assert_eq!(m, 0b11111111_11111111);
14669    }
14670
14671    #[simd_test(enable = "avx512bw,avx512vl")]
14672    unsafe fn test_mm256_mask_cmple_epu16_mask() {
14673        let a = _mm256_set1_epi16(-1);
14674        let b = _mm256_set1_epi16(-1);
14675        let mask = 0b01010101_01010101;
14676        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
14677        assert_eq!(r, 0b01010101_01010101);
14678    }
14679
14680    #[simd_test(enable = "avx512bw,avx512vl")]
14681    unsafe fn test_mm_cmple_epu16_mask() {
14682        let a = _mm_set1_epi16(-1);
14683        let b = _mm_set1_epi16(-1);
14684        let m = _mm_cmple_epu16_mask(a, b);
14685        assert_eq!(m, 0b11111111);
14686    }
14687
14688    #[simd_test(enable = "avx512bw,avx512vl")]
14689    unsafe fn test_mm_mask_cmple_epu16_mask() {
14690        let a = _mm_set1_epi16(-1);
14691        let b = _mm_set1_epi16(-1);
14692        let mask = 0b01010101;
14693        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
14694        assert_eq!(r, 0b01010101);
14695    }
14696
14697    #[simd_test(enable = "avx512bw")]
14698    unsafe fn test_mm512_cmple_epu8_mask() {
14699        let a = _mm512_set1_epi8(-1);
14700        let b = _mm512_set1_epi8(-1);
14701        let m = _mm512_cmple_epu8_mask(a, b);
14702        assert_eq!(
14703            m,
14704            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14705        );
14706    }
14707
14708    #[simd_test(enable = "avx512bw")]
14709    unsafe fn test_mm512_mask_cmple_epu8_mask() {
14710        let a = _mm512_set1_epi8(-1);
14711        let b = _mm512_set1_epi8(-1);
14712        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14713        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
14714        assert_eq!(
14715            r,
14716            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14717        );
14718    }
14719
14720    #[simd_test(enable = "avx512bw,avx512vl")]
14721    unsafe fn test_mm256_cmple_epu8_mask() {
14722        let a = _mm256_set1_epi8(-1);
14723        let b = _mm256_set1_epi8(-1);
14724        let m = _mm256_cmple_epu8_mask(a, b);
14725        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14726    }
14727
14728    #[simd_test(enable = "avx512bw,avx512vl")]
14729    unsafe fn test_mm256_mask_cmple_epu8_mask() {
14730        let a = _mm256_set1_epi8(-1);
14731        let b = _mm256_set1_epi8(-1);
14732        let mask = 0b01010101_01010101_01010101_01010101;
14733        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
14734        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14735    }
14736
14737    #[simd_test(enable = "avx512bw,avx512vl")]
14738    unsafe fn test_mm_cmple_epu8_mask() {
14739        let a = _mm_set1_epi8(-1);
14740        let b = _mm_set1_epi8(-1);
14741        let m = _mm_cmple_epu8_mask(a, b);
14742        assert_eq!(m, 0b11111111_11111111);
14743    }
14744
14745    #[simd_test(enable = "avx512bw,avx512vl")]
14746    unsafe fn test_mm_mask_cmple_epu8_mask() {
14747        let a = _mm_set1_epi8(-1);
14748        let b = _mm_set1_epi8(-1);
14749        let mask = 0b01010101_01010101;
14750        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
14751        assert_eq!(r, 0b01010101_01010101);
14752    }
14753
14754    #[simd_test(enable = "avx512bw")]
14755    unsafe fn test_mm512_cmple_epi16_mask() {
14756        let a = _mm512_set1_epi16(-1);
14757        let b = _mm512_set1_epi16(-1);
14758        let m = _mm512_cmple_epi16_mask(a, b);
14759        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14760    }
14761
14762    #[simd_test(enable = "avx512bw")]
14763    unsafe fn test_mm512_mask_cmple_epi16_mask() {
14764        let a = _mm512_set1_epi16(-1);
14765        let b = _mm512_set1_epi16(-1);
14766        let mask = 0b01010101_01010101_01010101_01010101;
14767        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
14768        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14769    }
14770
14771    #[simd_test(enable = "avx512bw,avx512vl")]
14772    unsafe fn test_mm256_cmple_epi16_mask() {
14773        let a = _mm256_set1_epi16(-1);
14774        let b = _mm256_set1_epi16(-1);
14775        let m = _mm256_cmple_epi16_mask(a, b);
14776        assert_eq!(m, 0b11111111_11111111);
14777    }
14778
14779    #[simd_test(enable = "avx512bw,avx512vl")]
14780    unsafe fn test_mm256_mask_cmple_epi16_mask() {
14781        let a = _mm256_set1_epi16(-1);
14782        let b = _mm256_set1_epi16(-1);
14783        let mask = 0b01010101_01010101;
14784        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
14785        assert_eq!(r, 0b01010101_01010101);
14786    }
14787
14788    #[simd_test(enable = "avx512bw,avx512vl")]
14789    unsafe fn test_mm_cmple_epi16_mask() {
14790        let a = _mm_set1_epi16(-1);
14791        let b = _mm_set1_epi16(-1);
14792        let m = _mm_cmple_epi16_mask(a, b);
14793        assert_eq!(m, 0b11111111);
14794    }
14795
14796    #[simd_test(enable = "avx512bw,avx512vl")]
14797    unsafe fn test_mm_mask_cmple_epi16_mask() {
14798        let a = _mm_set1_epi16(-1);
14799        let b = _mm_set1_epi16(-1);
14800        let mask = 0b01010101;
14801        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
14802        assert_eq!(r, 0b01010101);
14803    }
14804
14805    #[simd_test(enable = "avx512bw")]
14806    unsafe fn test_mm512_cmple_epi8_mask() {
14807        let a = _mm512_set1_epi8(-1);
14808        let b = _mm512_set1_epi8(-1);
14809        let m = _mm512_cmple_epi8_mask(a, b);
14810        assert_eq!(
14811            m,
14812            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14813        );
14814    }
14815
14816    #[simd_test(enable = "avx512bw")]
14817    unsafe fn test_mm512_mask_cmple_epi8_mask() {
14818        let a = _mm512_set1_epi8(-1);
14819        let b = _mm512_set1_epi8(-1);
14820        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14821        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
14822        assert_eq!(
14823            r,
14824            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14825        );
14826    }
14827
14828    #[simd_test(enable = "avx512bw,avx512vl")]
14829    unsafe fn test_mm256_cmple_epi8_mask() {
14830        let a = _mm256_set1_epi8(-1);
14831        let b = _mm256_set1_epi8(-1);
14832        let m = _mm256_cmple_epi8_mask(a, b);
14833        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14834    }
14835
14836    #[simd_test(enable = "avx512bw,avx512vl")]
14837    unsafe fn test_mm256_mask_cmple_epi8_mask() {
14838        let a = _mm256_set1_epi8(-1);
14839        let b = _mm256_set1_epi8(-1);
14840        let mask = 0b01010101_01010101_01010101_01010101;
14841        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
14842        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14843    }
14844
14845    #[simd_test(enable = "avx512bw,avx512vl")]
14846    unsafe fn test_mm_cmple_epi8_mask() {
14847        let a = _mm_set1_epi8(-1);
14848        let b = _mm_set1_epi8(-1);
14849        let m = _mm_cmple_epi8_mask(a, b);
14850        assert_eq!(m, 0b11111111_11111111);
14851    }
14852
14853    #[simd_test(enable = "avx512bw,avx512vl")]
14854    unsafe fn test_mm_mask_cmple_epi8_mask() {
14855        let a = _mm_set1_epi8(-1);
14856        let b = _mm_set1_epi8(-1);
14857        let mask = 0b01010101_01010101;
14858        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
14859        assert_eq!(r, 0b01010101_01010101);
14860    }
14861
14862    #[simd_test(enable = "avx512bw")]
14863    unsafe fn test_mm512_cmpge_epu16_mask() {
14864        let a = _mm512_set1_epi16(1);
14865        let b = _mm512_set1_epi16(1);
14866        let m = _mm512_cmpge_epu16_mask(a, b);
14867        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14868    }
14869
14870    #[simd_test(enable = "avx512bw")]
14871    unsafe fn test_mm512_mask_cmpge_epu16_mask() {
14872        let a = _mm512_set1_epi16(1);
14873        let b = _mm512_set1_epi16(1);
14874        let mask = 0b01010101_01010101_01010101_01010101;
14875        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
14876        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14877    }
14878
14879    #[simd_test(enable = "avx512bw,avx512vl")]
14880    unsafe fn test_mm256_cmpge_epu16_mask() {
14881        let a = _mm256_set1_epi16(1);
14882        let b = _mm256_set1_epi16(1);
14883        let m = _mm256_cmpge_epu16_mask(a, b);
14884        assert_eq!(m, 0b11111111_11111111);
14885    }
14886
14887    #[simd_test(enable = "avx512bw,avx512vl")]
14888    unsafe fn test_mm256_mask_cmpge_epu16_mask() {
14889        let a = _mm256_set1_epi16(1);
14890        let b = _mm256_set1_epi16(1);
14891        let mask = 0b01010101_01010101;
14892        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
14893        assert_eq!(r, 0b01010101_01010101);
14894    }
14895
14896    #[simd_test(enable = "avx512bw,avx512vl")]
14897    unsafe fn test_mm_cmpge_epu16_mask() {
14898        let a = _mm_set1_epi16(1);
14899        let b = _mm_set1_epi16(1);
14900        let m = _mm_cmpge_epu16_mask(a, b);
14901        assert_eq!(m, 0b11111111);
14902    }
14903
14904    #[simd_test(enable = "avx512bw,avx512vl")]
14905    unsafe fn test_mm_mask_cmpge_epu16_mask() {
14906        let a = _mm_set1_epi16(1);
14907        let b = _mm_set1_epi16(1);
14908        let mask = 0b01010101;
14909        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
14910        assert_eq!(r, 0b01010101);
14911    }
14912
14913    #[simd_test(enable = "avx512bw")]
14914    unsafe fn test_mm512_cmpge_epu8_mask() {
14915        let a = _mm512_set1_epi8(1);
14916        let b = _mm512_set1_epi8(1);
14917        let m = _mm512_cmpge_epu8_mask(a, b);
14918        assert_eq!(
14919            m,
14920            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14921        );
14922    }
14923
14924    #[simd_test(enable = "avx512bw")]
14925    unsafe fn test_mm512_mask_cmpge_epu8_mask() {
14926        let a = _mm512_set1_epi8(1);
14927        let b = _mm512_set1_epi8(1);
14928        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14929        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
14930        assert_eq!(
14931            r,
14932            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14933        );
14934    }
14935
14936    #[simd_test(enable = "avx512bw,avx512vl")]
14937    unsafe fn test_mm256_cmpge_epu8_mask() {
14938        let a = _mm256_set1_epi8(1);
14939        let b = _mm256_set1_epi8(1);
14940        let m = _mm256_cmpge_epu8_mask(a, b);
14941        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14942    }
14943
14944    #[simd_test(enable = "avx512bw,avx512vl")]
14945    unsafe fn test_mm256_mask_cmpge_epu8_mask() {
14946        let a = _mm256_set1_epi8(1);
14947        let b = _mm256_set1_epi8(1);
14948        let mask = 0b01010101_01010101_01010101_01010101;
14949        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
14950        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14951    }
14952
14953    #[simd_test(enable = "avx512bw,avx512vl")]
14954    unsafe fn test_mm_cmpge_epu8_mask() {
14955        let a = _mm_set1_epi8(1);
14956        let b = _mm_set1_epi8(1);
14957        let m = _mm_cmpge_epu8_mask(a, b);
14958        assert_eq!(m, 0b11111111_11111111);
14959    }
14960
14961    #[simd_test(enable = "avx512bw,avx512vl")]
14962    unsafe fn test_mm_mask_cmpge_epu8_mask() {
14963        let a = _mm_set1_epi8(1);
14964        let b = _mm_set1_epi8(1);
14965        let mask = 0b01010101_01010101;
14966        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
14967        assert_eq!(r, 0b01010101_01010101);
14968    }
14969
14970    #[simd_test(enable = "avx512bw")]
14971    unsafe fn test_mm512_cmpge_epi16_mask() {
14972        let a = _mm512_set1_epi16(-1);
14973        let b = _mm512_set1_epi16(-1);
14974        let m = _mm512_cmpge_epi16_mask(a, b);
14975        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14976    }
14977
14978    #[simd_test(enable = "avx512bw")]
14979    unsafe fn test_mm512_mask_cmpge_epi16_mask() {
14980        let a = _mm512_set1_epi16(-1);
14981        let b = _mm512_set1_epi16(-1);
14982        let mask = 0b01010101_01010101_01010101_01010101;
14983        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
14984        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14985    }
14986
14987    #[simd_test(enable = "avx512bw,avx512vl")]
14988    unsafe fn test_mm256_cmpge_epi16_mask() {
14989        let a = _mm256_set1_epi16(-1);
14990        let b = _mm256_set1_epi16(-1);
14991        let m = _mm256_cmpge_epi16_mask(a, b);
14992        assert_eq!(m, 0b11111111_11111111);
14993    }
14994
14995    #[simd_test(enable = "avx512bw,avx512vl")]
14996    unsafe fn test_mm256_mask_cmpge_epi16_mask() {
14997        let a = _mm256_set1_epi16(-1);
14998        let b = _mm256_set1_epi16(-1);
14999        let mask = 0b01010101_01010101;
15000        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15001        assert_eq!(r, 0b01010101_01010101);
15002    }
15003
15004    #[simd_test(enable = "avx512bw,avx512vl")]
15005    unsafe fn test_mm_cmpge_epi16_mask() {
15006        let a = _mm_set1_epi16(-1);
15007        let b = _mm_set1_epi16(-1);
15008        let m = _mm_cmpge_epi16_mask(a, b);
15009        assert_eq!(m, 0b11111111);
15010    }
15011
15012    #[simd_test(enable = "avx512bw,avx512vl")]
15013    unsafe fn test_mm_mask_cmpge_epi16_mask() {
15014        let a = _mm_set1_epi16(-1);
15015        let b = _mm_set1_epi16(-1);
15016        let mask = 0b01010101;
15017        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15018        assert_eq!(r, 0b01010101);
15019    }
15020
15021    #[simd_test(enable = "avx512bw")]
15022    unsafe fn test_mm512_cmpge_epi8_mask() {
15023        let a = _mm512_set1_epi8(-1);
15024        let b = _mm512_set1_epi8(-1);
15025        let m = _mm512_cmpge_epi8_mask(a, b);
15026        assert_eq!(
15027            m,
15028            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15029        );
15030    }
15031
15032    #[simd_test(enable = "avx512bw")]
15033    unsafe fn test_mm512_mask_cmpge_epi8_mask() {
15034        let a = _mm512_set1_epi8(-1);
15035        let b = _mm512_set1_epi8(-1);
15036        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15037        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15038        assert_eq!(
15039            r,
15040            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15041        );
15042    }
15043
15044    #[simd_test(enable = "avx512bw,avx512vl")]
15045    unsafe fn test_mm256_cmpge_epi8_mask() {
15046        let a = _mm256_set1_epi8(-1);
15047        let b = _mm256_set1_epi8(-1);
15048        let m = _mm256_cmpge_epi8_mask(a, b);
15049        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15050    }
15051
15052    #[simd_test(enable = "avx512bw,avx512vl")]
15053    unsafe fn test_mm256_mask_cmpge_epi8_mask() {
15054        let a = _mm256_set1_epi8(-1);
15055        let b = _mm256_set1_epi8(-1);
15056        let mask = 0b01010101_01010101_01010101_01010101;
15057        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15058        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15059    }
15060
15061    #[simd_test(enable = "avx512bw,avx512vl")]
15062    unsafe fn test_mm_cmpge_epi8_mask() {
15063        let a = _mm_set1_epi8(-1);
15064        let b = _mm_set1_epi8(-1);
15065        let m = _mm_cmpge_epi8_mask(a, b);
15066        assert_eq!(m, 0b11111111_11111111);
15067    }
15068
15069    #[simd_test(enable = "avx512bw,avx512vl")]
15070    unsafe fn test_mm_mask_cmpge_epi8_mask() {
15071        let a = _mm_set1_epi8(-1);
15072        let b = _mm_set1_epi8(-1);
15073        let mask = 0b01010101_01010101;
15074        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15075        assert_eq!(r, 0b01010101_01010101);
15076    }
15077
15078    #[simd_test(enable = "avx512bw")]
15079    unsafe fn test_mm512_cmpeq_epu16_mask() {
15080        let a = _mm512_set1_epi16(1);
15081        let b = _mm512_set1_epi16(1);
15082        let m = _mm512_cmpeq_epu16_mask(a, b);
15083        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15084    }
15085
15086    #[simd_test(enable = "avx512bw")]
15087    unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
15088        let a = _mm512_set1_epi16(1);
15089        let b = _mm512_set1_epi16(1);
15090        let mask = 0b01010101_01010101_01010101_01010101;
15091        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15092        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15093    }
15094
15095    #[simd_test(enable = "avx512bw,avx512vl")]
15096    unsafe fn test_mm256_cmpeq_epu16_mask() {
15097        let a = _mm256_set1_epi16(1);
15098        let b = _mm256_set1_epi16(1);
15099        let m = _mm256_cmpeq_epu16_mask(a, b);
15100        assert_eq!(m, 0b11111111_11111111);
15101    }
15102
15103    #[simd_test(enable = "avx512bw,avx512vl")]
15104    unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
15105        let a = _mm256_set1_epi16(1);
15106        let b = _mm256_set1_epi16(1);
15107        let mask = 0b01010101_01010101;
15108        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15109        assert_eq!(r, 0b01010101_01010101);
15110    }
15111
15112    #[simd_test(enable = "avx512bw,avx512vl")]
15113    unsafe fn test_mm_cmpeq_epu16_mask() {
15114        let a = _mm_set1_epi16(1);
15115        let b = _mm_set1_epi16(1);
15116        let m = _mm_cmpeq_epu16_mask(a, b);
15117        assert_eq!(m, 0b11111111);
15118    }
15119
15120    #[simd_test(enable = "avx512bw,avx512vl")]
15121    unsafe fn test_mm_mask_cmpeq_epu16_mask() {
15122        let a = _mm_set1_epi16(1);
15123        let b = _mm_set1_epi16(1);
15124        let mask = 0b01010101;
15125        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
15126        assert_eq!(r, 0b01010101);
15127    }
15128
15129    #[simd_test(enable = "avx512bw")]
15130    unsafe fn test_mm512_cmpeq_epu8_mask() {
15131        let a = _mm512_set1_epi8(1);
15132        let b = _mm512_set1_epi8(1);
15133        let m = _mm512_cmpeq_epu8_mask(a, b);
15134        assert_eq!(
15135            m,
15136            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15137        );
15138    }
15139
15140    #[simd_test(enable = "avx512bw")]
15141    unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
15142        let a = _mm512_set1_epi8(1);
15143        let b = _mm512_set1_epi8(1);
15144        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15145        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
15146        assert_eq!(
15147            r,
15148            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15149        );
15150    }
15151
15152    #[simd_test(enable = "avx512bw,avx512vl")]
15153    unsafe fn test_mm256_cmpeq_epu8_mask() {
15154        let a = _mm256_set1_epi8(1);
15155        let b = _mm256_set1_epi8(1);
15156        let m = _mm256_cmpeq_epu8_mask(a, b);
15157        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15158    }
15159
15160    #[simd_test(enable = "avx512bw,avx512vl")]
15161    unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
15162        let a = _mm256_set1_epi8(1);
15163        let b = _mm256_set1_epi8(1);
15164        let mask = 0b01010101_01010101_01010101_01010101;
15165        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
15166        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15167    }
15168
15169    #[simd_test(enable = "avx512bw,avx512vl")]
15170    unsafe fn test_mm_cmpeq_epu8_mask() {
15171        let a = _mm_set1_epi8(1);
15172        let b = _mm_set1_epi8(1);
15173        let m = _mm_cmpeq_epu8_mask(a, b);
15174        assert_eq!(m, 0b11111111_11111111);
15175    }
15176
15177    #[simd_test(enable = "avx512bw,avx512vl")]
15178    unsafe fn test_mm_mask_cmpeq_epu8_mask() {
15179        let a = _mm_set1_epi8(1);
15180        let b = _mm_set1_epi8(1);
15181        let mask = 0b01010101_01010101;
15182        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
15183        assert_eq!(r, 0b01010101_01010101);
15184    }
15185
15186    #[simd_test(enable = "avx512bw")]
15187    unsafe fn test_mm512_cmpeq_epi16_mask() {
15188        let a = _mm512_set1_epi16(-1);
15189        let b = _mm512_set1_epi16(-1);
15190        let m = _mm512_cmpeq_epi16_mask(a, b);
15191        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15192    }
15193
15194    #[simd_test(enable = "avx512bw")]
15195    unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
15196        let a = _mm512_set1_epi16(-1);
15197        let b = _mm512_set1_epi16(-1);
15198        let mask = 0b01010101_01010101_01010101_01010101;
15199        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
15200        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15201    }
15202
15203    #[simd_test(enable = "avx512bw,avx512vl")]
15204    unsafe fn test_mm256_cmpeq_epi16_mask() {
15205        let a = _mm256_set1_epi16(-1);
15206        let b = _mm256_set1_epi16(-1);
15207        let m = _mm256_cmpeq_epi16_mask(a, b);
15208        assert_eq!(m, 0b11111111_11111111);
15209    }
15210
15211    #[simd_test(enable = "avx512bw,avx512vl")]
15212    unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
15213        let a = _mm256_set1_epi16(-1);
15214        let b = _mm256_set1_epi16(-1);
15215        let mask = 0b01010101_01010101;
15216        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
15217        assert_eq!(r, 0b01010101_01010101);
15218    }
15219
15220    #[simd_test(enable = "avx512bw,avx512vl")]
15221    unsafe fn test_mm_cmpeq_epi16_mask() {
15222        let a = _mm_set1_epi16(-1);
15223        let b = _mm_set1_epi16(-1);
15224        let m = _mm_cmpeq_epi16_mask(a, b);
15225        assert_eq!(m, 0b11111111);
15226    }
15227
15228    #[simd_test(enable = "avx512bw,avx512vl")]
15229    unsafe fn test_mm_mask_cmpeq_epi16_mask() {
15230        let a = _mm_set1_epi16(-1);
15231        let b = _mm_set1_epi16(-1);
15232        let mask = 0b01010101;
15233        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
15234        assert_eq!(r, 0b01010101);
15235    }
15236
15237    #[simd_test(enable = "avx512bw")]
15238    unsafe fn test_mm512_cmpeq_epi8_mask() {
15239        let a = _mm512_set1_epi8(-1);
15240        let b = _mm512_set1_epi8(-1);
15241        let m = _mm512_cmpeq_epi8_mask(a, b);
15242        assert_eq!(
15243            m,
15244            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15245        );
15246    }
15247
15248    #[simd_test(enable = "avx512bw")]
15249    unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
15250        let a = _mm512_set1_epi8(-1);
15251        let b = _mm512_set1_epi8(-1);
15252        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15253        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
15254        assert_eq!(
15255            r,
15256            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15257        );
15258    }
15259
15260    #[simd_test(enable = "avx512bw,avx512vl")]
15261    unsafe fn test_mm256_cmpeq_epi8_mask() {
15262        let a = _mm256_set1_epi8(-1);
15263        let b = _mm256_set1_epi8(-1);
15264        let m = _mm256_cmpeq_epi8_mask(a, b);
15265        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15266    }
15267
15268    #[simd_test(enable = "avx512bw,avx512vl")]
15269    unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
15270        let a = _mm256_set1_epi8(-1);
15271        let b = _mm256_set1_epi8(-1);
15272        let mask = 0b01010101_01010101_01010101_01010101;
15273        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
15274        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15275    }
15276
15277    #[simd_test(enable = "avx512bw,avx512vl")]
15278    unsafe fn test_mm_cmpeq_epi8_mask() {
15279        let a = _mm_set1_epi8(-1);
15280        let b = _mm_set1_epi8(-1);
15281        let m = _mm_cmpeq_epi8_mask(a, b);
15282        assert_eq!(m, 0b11111111_11111111);
15283    }
15284
15285    #[simd_test(enable = "avx512bw,avx512vl")]
15286    unsafe fn test_mm_mask_cmpeq_epi8_mask() {
15287        let a = _mm_set1_epi8(-1);
15288        let b = _mm_set1_epi8(-1);
15289        let mask = 0b01010101_01010101;
15290        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
15291        assert_eq!(r, 0b01010101_01010101);
15292    }
15293
15294    #[simd_test(enable = "avx512bw")]
15295    unsafe fn test_mm512_cmpneq_epu16_mask() {
15296        let a = _mm512_set1_epi16(2);
15297        let b = _mm512_set1_epi16(1);
15298        let m = _mm512_cmpneq_epu16_mask(a, b);
15299        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15300    }
15301
15302    #[simd_test(enable = "avx512bw")]
15303    unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
15304        let a = _mm512_set1_epi16(2);
15305        let b = _mm512_set1_epi16(1);
15306        let mask = 0b01010101_01010101_01010101_01010101;
15307        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
15308        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15309    }
15310
15311    #[simd_test(enable = "avx512bw,avx512vl")]
15312    unsafe fn test_mm256_cmpneq_epu16_mask() {
15313        let a = _mm256_set1_epi16(2);
15314        let b = _mm256_set1_epi16(1);
15315        let m = _mm256_cmpneq_epu16_mask(a, b);
15316        assert_eq!(m, 0b11111111_11111111);
15317    }
15318
15319    #[simd_test(enable = "avx512bw,avx512vl")]
15320    unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
15321        let a = _mm256_set1_epi16(2);
15322        let b = _mm256_set1_epi16(1);
15323        let mask = 0b01010101_01010101;
15324        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
15325        assert_eq!(r, 0b01010101_01010101);
15326    }
15327
15328    #[simd_test(enable = "avx512bw,avx512vl")]
15329    unsafe fn test_mm_cmpneq_epu16_mask() {
15330        let a = _mm_set1_epi16(2);
15331        let b = _mm_set1_epi16(1);
15332        let m = _mm_cmpneq_epu16_mask(a, b);
15333        assert_eq!(m, 0b11111111);
15334    }
15335
15336    #[simd_test(enable = "avx512bw,avx512vl")]
15337    unsafe fn test_mm_mask_cmpneq_epu16_mask() {
15338        let a = _mm_set1_epi16(2);
15339        let b = _mm_set1_epi16(1);
15340        let mask = 0b01010101;
15341        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
15342        assert_eq!(r, 0b01010101);
15343    }
15344
15345    #[simd_test(enable = "avx512bw")]
15346    unsafe fn test_mm512_cmpneq_epu8_mask() {
15347        let a = _mm512_set1_epi8(2);
15348        let b = _mm512_set1_epi8(1);
15349        let m = _mm512_cmpneq_epu8_mask(a, b);
15350        assert_eq!(
15351            m,
15352            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15353        );
15354    }
15355
15356    #[simd_test(enable = "avx512bw")]
15357    unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
15358        let a = _mm512_set1_epi8(2);
15359        let b = _mm512_set1_epi8(1);
15360        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15361        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
15362        assert_eq!(
15363            r,
15364            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15365        );
15366    }
15367
15368    #[simd_test(enable = "avx512bw,avx512vl")]
15369    unsafe fn test_mm256_cmpneq_epu8_mask() {
15370        let a = _mm256_set1_epi8(2);
15371        let b = _mm256_set1_epi8(1);
15372        let m = _mm256_cmpneq_epu8_mask(a, b);
15373        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15374    }
15375
15376    #[simd_test(enable = "avx512bw,avx512vl")]
15377    unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
15378        let a = _mm256_set1_epi8(2);
15379        let b = _mm256_set1_epi8(1);
15380        let mask = 0b01010101_01010101_01010101_01010101;
15381        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
15382        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15383    }
15384
15385    #[simd_test(enable = "avx512bw,avx512vl")]
15386    unsafe fn test_mm_cmpneq_epu8_mask() {
15387        let a = _mm_set1_epi8(2);
15388        let b = _mm_set1_epi8(1);
15389        let m = _mm_cmpneq_epu8_mask(a, b);
15390        assert_eq!(m, 0b11111111_11111111);
15391    }
15392
15393    #[simd_test(enable = "avx512bw,avx512vl")]
15394    unsafe fn test_mm_mask_cmpneq_epu8_mask() {
15395        let a = _mm_set1_epi8(2);
15396        let b = _mm_set1_epi8(1);
15397        let mask = 0b01010101_01010101;
15398        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
15399        assert_eq!(r, 0b01010101_01010101);
15400    }
15401
15402    #[simd_test(enable = "avx512bw")]
15403    unsafe fn test_mm512_cmpneq_epi16_mask() {
15404        let a = _mm512_set1_epi16(1);
15405        let b = _mm512_set1_epi16(-1);
15406        let m = _mm512_cmpneq_epi16_mask(a, b);
15407        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15408    }
15409
15410    #[simd_test(enable = "avx512bw")]
15411    unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
15412        let a = _mm512_set1_epi16(1);
15413        let b = _mm512_set1_epi16(-1);
15414        let mask = 0b01010101_01010101_01010101_01010101;
15415        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
15416        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15417    }
15418
15419    #[simd_test(enable = "avx512bw,avx512vl")]
15420    unsafe fn test_mm256_cmpneq_epi16_mask() {
15421        let a = _mm256_set1_epi16(1);
15422        let b = _mm256_set1_epi16(-1);
15423        let m = _mm256_cmpneq_epi16_mask(a, b);
15424        assert_eq!(m, 0b11111111_11111111);
15425    }
15426
15427    #[simd_test(enable = "avx512bw,avx512vl")]
15428    unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
15429        let a = _mm256_set1_epi16(1);
15430        let b = _mm256_set1_epi16(-1);
15431        let mask = 0b01010101_01010101;
15432        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
15433        assert_eq!(r, 0b01010101_01010101);
15434    }
15435
15436    #[simd_test(enable = "avx512bw,avx512vl")]
15437    unsafe fn test_mm_cmpneq_epi16_mask() {
15438        let a = _mm_set1_epi16(1);
15439        let b = _mm_set1_epi16(-1);
15440        let m = _mm_cmpneq_epi16_mask(a, b);
15441        assert_eq!(m, 0b11111111);
15442    }
15443
15444    #[simd_test(enable = "avx512bw,avx512vl")]
15445    unsafe fn test_mm_mask_cmpneq_epi16_mask() {
15446        let a = _mm_set1_epi16(1);
15447        let b = _mm_set1_epi16(-1);
15448        let mask = 0b01010101;
15449        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
15450        assert_eq!(r, 0b01010101);
15451    }
15452
15453    #[simd_test(enable = "avx512bw")]
15454    unsafe fn test_mm512_cmpneq_epi8_mask() {
15455        let a = _mm512_set1_epi8(1);
15456        let b = _mm512_set1_epi8(-1);
15457        let m = _mm512_cmpneq_epi8_mask(a, b);
15458        assert_eq!(
15459            m,
15460            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15461        );
15462    }
15463
15464    #[simd_test(enable = "avx512bw")]
15465    unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
15466        let a = _mm512_set1_epi8(1);
15467        let b = _mm512_set1_epi8(-1);
15468        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15469        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
15470        assert_eq!(
15471            r,
15472            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15473        );
15474    }
15475
15476    #[simd_test(enable = "avx512bw,avx512vl")]
15477    unsafe fn test_mm256_cmpneq_epi8_mask() {
15478        let a = _mm256_set1_epi8(1);
15479        let b = _mm256_set1_epi8(-1);
15480        let m = _mm256_cmpneq_epi8_mask(a, b);
15481        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15482    }
15483
15484    #[simd_test(enable = "avx512bw,avx512vl")]
15485    unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
15486        let a = _mm256_set1_epi8(1);
15487        let b = _mm256_set1_epi8(-1);
15488        let mask = 0b01010101_01010101_01010101_01010101;
15489        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
15490        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15491    }
15492
15493    #[simd_test(enable = "avx512bw,avx512vl")]
15494    unsafe fn test_mm_cmpneq_epi8_mask() {
15495        let a = _mm_set1_epi8(1);
15496        let b = _mm_set1_epi8(-1);
15497        let m = _mm_cmpneq_epi8_mask(a, b);
15498        assert_eq!(m, 0b11111111_11111111);
15499    }
15500
15501    #[simd_test(enable = "avx512bw,avx512vl")]
15502    unsafe fn test_mm_mask_cmpneq_epi8_mask() {
15503        let a = _mm_set1_epi8(1);
15504        let b = _mm_set1_epi8(-1);
15505        let mask = 0b01010101_01010101;
15506        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
15507        assert_eq!(r, 0b01010101_01010101);
15508    }
15509
15510    #[simd_test(enable = "avx512bw")]
15511    unsafe fn test_mm512_cmp_epu16_mask() {
15512        let a = _mm512_set1_epi16(0);
15513        let b = _mm512_set1_epi16(1);
15514        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15515        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15516    }
15517
15518    #[simd_test(enable = "avx512bw")]
15519    unsafe fn test_mm512_mask_cmp_epu16_mask() {
15520        let a = _mm512_set1_epi16(0);
15521        let b = _mm512_set1_epi16(1);
15522        let mask = 0b01010101_01010101_01010101_01010101;
15523        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15524        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15525    }
15526
15527    #[simd_test(enable = "avx512bw,avx512vl")]
15528    unsafe fn test_mm256_cmp_epu16_mask() {
15529        let a = _mm256_set1_epi16(0);
15530        let b = _mm256_set1_epi16(1);
15531        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15532        assert_eq!(m, 0b11111111_11111111);
15533    }
15534
15535    #[simd_test(enable = "avx512bw,avx512vl")]
15536    unsafe fn test_mm256_mask_cmp_epu16_mask() {
15537        let a = _mm256_set1_epi16(0);
15538        let b = _mm256_set1_epi16(1);
15539        let mask = 0b01010101_01010101;
15540        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15541        assert_eq!(r, 0b01010101_01010101);
15542    }
15543
15544    #[simd_test(enable = "avx512bw,avx512vl")]
15545    unsafe fn test_mm_cmp_epu16_mask() {
15546        let a = _mm_set1_epi16(0);
15547        let b = _mm_set1_epi16(1);
15548        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15549        assert_eq!(m, 0b11111111);
15550    }
15551
15552    #[simd_test(enable = "avx512bw,avx512vl")]
15553    unsafe fn test_mm_mask_cmp_epu16_mask() {
15554        let a = _mm_set1_epi16(0);
15555        let b = _mm_set1_epi16(1);
15556        let mask = 0b01010101;
15557        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15558        assert_eq!(r, 0b01010101);
15559    }
15560
15561    #[simd_test(enable = "avx512bw")]
15562    unsafe fn test_mm512_cmp_epu8_mask() {
15563        let a = _mm512_set1_epi8(0);
15564        let b = _mm512_set1_epi8(1);
15565        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15566        assert_eq!(
15567            m,
15568            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15569        );
15570    }
15571
15572    #[simd_test(enable = "avx512bw")]
15573    unsafe fn test_mm512_mask_cmp_epu8_mask() {
15574        let a = _mm512_set1_epi8(0);
15575        let b = _mm512_set1_epi8(1);
15576        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15577        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15578        assert_eq!(
15579            r,
15580            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15581        );
15582    }
15583
15584    #[simd_test(enable = "avx512bw,avx512vl")]
15585    unsafe fn test_mm256_cmp_epu8_mask() {
15586        let a = _mm256_set1_epi8(0);
15587        let b = _mm256_set1_epi8(1);
15588        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15589        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15590    }
15591
15592    #[simd_test(enable = "avx512bw,avx512vl")]
15593    unsafe fn test_mm256_mask_cmp_epu8_mask() {
15594        let a = _mm256_set1_epi8(0);
15595        let b = _mm256_set1_epi8(1);
15596        let mask = 0b01010101_01010101_01010101_01010101;
15597        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15598        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15599    }
15600
15601    #[simd_test(enable = "avx512bw,avx512vl")]
15602    unsafe fn test_mm_cmp_epu8_mask() {
15603        let a = _mm_set1_epi8(0);
15604        let b = _mm_set1_epi8(1);
15605        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15606        assert_eq!(m, 0b11111111_11111111);
15607    }
15608
15609    #[simd_test(enable = "avx512bw,avx512vl")]
15610    unsafe fn test_mm_mask_cmp_epu8_mask() {
15611        let a = _mm_set1_epi8(0);
15612        let b = _mm_set1_epi8(1);
15613        let mask = 0b01010101_01010101;
15614        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15615        assert_eq!(r, 0b01010101_01010101);
15616    }
15617
15618    #[simd_test(enable = "avx512bw")]
15619    unsafe fn test_mm512_cmp_epi16_mask() {
15620        let a = _mm512_set1_epi16(0);
15621        let b = _mm512_set1_epi16(1);
15622        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15623        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15624    }
15625
15626    #[simd_test(enable = "avx512bw")]
15627    unsafe fn test_mm512_mask_cmp_epi16_mask() {
15628        let a = _mm512_set1_epi16(0);
15629        let b = _mm512_set1_epi16(1);
15630        let mask = 0b01010101_01010101_01010101_01010101;
15631        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15632        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15633    }
15634
15635    #[simd_test(enable = "avx512bw,avx512vl")]
15636    unsafe fn test_mm256_cmp_epi16_mask() {
15637        let a = _mm256_set1_epi16(0);
15638        let b = _mm256_set1_epi16(1);
15639        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15640        assert_eq!(m, 0b11111111_11111111);
15641    }
15642
15643    #[simd_test(enable = "avx512bw,avx512vl")]
15644    unsafe fn test_mm256_mask_cmp_epi16_mask() {
15645        let a = _mm256_set1_epi16(0);
15646        let b = _mm256_set1_epi16(1);
15647        let mask = 0b01010101_01010101;
15648        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15649        assert_eq!(r, 0b01010101_01010101);
15650    }
15651
15652    #[simd_test(enable = "avx512bw,avx512vl")]
15653    unsafe fn test_mm_cmp_epi16_mask() {
15654        let a = _mm_set1_epi16(0);
15655        let b = _mm_set1_epi16(1);
15656        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15657        assert_eq!(m, 0b11111111);
15658    }
15659
15660    #[simd_test(enable = "avx512bw,avx512vl")]
15661    unsafe fn test_mm_mask_cmp_epi16_mask() {
15662        let a = _mm_set1_epi16(0);
15663        let b = _mm_set1_epi16(1);
15664        let mask = 0b01010101;
15665        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15666        assert_eq!(r, 0b01010101);
15667    }
15668
15669    #[simd_test(enable = "avx512bw")]
15670    unsafe fn test_mm512_cmp_epi8_mask() {
15671        let a = _mm512_set1_epi8(0);
15672        let b = _mm512_set1_epi8(1);
15673        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15674        assert_eq!(
15675            m,
15676            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15677        );
15678    }
15679
15680    #[simd_test(enable = "avx512bw")]
15681    unsafe fn test_mm512_mask_cmp_epi8_mask() {
15682        let a = _mm512_set1_epi8(0);
15683        let b = _mm512_set1_epi8(1);
15684        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15685        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15686        assert_eq!(
15687            r,
15688            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15689        );
15690    }
15691
15692    #[simd_test(enable = "avx512bw,avx512vl")]
15693    unsafe fn test_mm256_cmp_epi8_mask() {
15694        let a = _mm256_set1_epi8(0);
15695        let b = _mm256_set1_epi8(1);
15696        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15697        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15698    }
15699
15700    #[simd_test(enable = "avx512bw,avx512vl")]
15701    unsafe fn test_mm256_mask_cmp_epi8_mask() {
15702        let a = _mm256_set1_epi8(0);
15703        let b = _mm256_set1_epi8(1);
15704        let mask = 0b01010101_01010101_01010101_01010101;
15705        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15706        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15707    }
15708
15709    #[simd_test(enable = "avx512bw,avx512vl")]
15710    unsafe fn test_mm_cmp_epi8_mask() {
15711        let a = _mm_set1_epi8(0);
15712        let b = _mm_set1_epi8(1);
15713        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15714        assert_eq!(m, 0b11111111_11111111);
15715    }
15716
15717    #[simd_test(enable = "avx512bw,avx512vl")]
15718    unsafe fn test_mm_mask_cmp_epi8_mask() {
15719        let a = _mm_set1_epi8(0);
15720        let b = _mm_set1_epi8(1);
15721        let mask = 0b01010101_01010101;
15722        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15723        assert_eq!(r, 0b01010101_01010101);
15724    }
15725
15726    #[simd_test(enable = "avx512bw,avx512vl")]
15727    unsafe fn test_mm256_reduce_add_epi16() {
15728        let a = _mm256_set1_epi16(1);
15729        let e = _mm256_reduce_add_epi16(a);
15730        assert_eq!(16, e);
15731    }
15732
15733    #[simd_test(enable = "avx512bw,avx512vl")]
15734    unsafe fn test_mm256_mask_reduce_add_epi16() {
15735        let a = _mm256_set1_epi16(1);
15736        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
15737        assert_eq!(8, e);
15738    }
15739
15740    #[simd_test(enable = "avx512bw,avx512vl")]
15741    unsafe fn test_mm_reduce_add_epi16() {
15742        let a = _mm_set1_epi16(1);
15743        let e = _mm_reduce_add_epi16(a);
15744        assert_eq!(8, e);
15745    }
15746
15747    #[simd_test(enable = "avx512bw,avx512vl")]
15748    unsafe fn test_mm_mask_reduce_add_epi16() {
15749        let a = _mm_set1_epi16(1);
15750        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
15751        assert_eq!(4, e);
15752    }
15753
15754    #[simd_test(enable = "avx512bw,avx512vl")]
15755    unsafe fn test_mm256_reduce_add_epi8() {
15756        let a = _mm256_set1_epi8(1);
15757        let e = _mm256_reduce_add_epi8(a);
15758        assert_eq!(32, e);
15759    }
15760
15761    #[simd_test(enable = "avx512bw,avx512vl")]
15762    unsafe fn test_mm256_mask_reduce_add_epi8() {
15763        let a = _mm256_set1_epi8(1);
15764        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
15765        assert_eq!(16, e);
15766    }
15767
15768    #[simd_test(enable = "avx512bw,avx512vl")]
15769    unsafe fn test_mm_reduce_add_epi8() {
15770        let a = _mm_set1_epi8(1);
15771        let e = _mm_reduce_add_epi8(a);
15772        assert_eq!(16, e);
15773    }
15774
15775    #[simd_test(enable = "avx512bw,avx512vl")]
15776    unsafe fn test_mm_mask_reduce_add_epi8() {
15777        let a = _mm_set1_epi8(1);
15778        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
15779        assert_eq!(8, e);
15780    }
15781
15782    #[simd_test(enable = "avx512bw,avx512vl")]
15783    unsafe fn test_mm256_reduce_and_epi16() {
15784        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15785        let e = _mm256_reduce_and_epi16(a);
15786        assert_eq!(0, e);
15787    }
15788
15789    #[simd_test(enable = "avx512bw,avx512vl")]
15790    unsafe fn test_mm256_mask_reduce_and_epi16() {
15791        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15792        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
15793        assert_eq!(1, e);
15794    }
15795
15796    #[simd_test(enable = "avx512bw,avx512vl")]
15797    unsafe fn test_mm_reduce_and_epi16() {
15798        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15799        let e = _mm_reduce_and_epi16(a);
15800        assert_eq!(0, e);
15801    }
15802
15803    #[simd_test(enable = "avx512bw,avx512vl")]
15804    unsafe fn test_mm_mask_reduce_and_epi16() {
15805        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15806        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
15807        assert_eq!(1, e);
15808    }
15809
15810    #[simd_test(enable = "avx512bw,avx512vl")]
15811    unsafe fn test_mm256_reduce_and_epi8() {
15812        let a = _mm256_set_epi8(
15813            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15814            2, 2, 2,
15815        );
15816        let e = _mm256_reduce_and_epi8(a);
15817        assert_eq!(0, e);
15818    }
15819
15820    #[simd_test(enable = "avx512bw,avx512vl")]
15821    unsafe fn test_mm256_mask_reduce_and_epi8() {
15822        let a = _mm256_set_epi8(
15823            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15824            2, 2, 2,
15825        );
15826        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
15827        assert_eq!(1, e);
15828    }
15829
15830    #[simd_test(enable = "avx512bw,avx512vl")]
15831    unsafe fn test_mm_reduce_and_epi8() {
15832        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15833        let e = _mm_reduce_and_epi8(a);
15834        assert_eq!(0, e);
15835    }
15836
15837    #[simd_test(enable = "avx512bw,avx512vl")]
15838    unsafe fn test_mm_mask_reduce_and_epi8() {
15839        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15840        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
15841        assert_eq!(1, e);
15842    }
15843
15844    #[simd_test(enable = "avx512bw,avx512vl")]
15845    unsafe fn test_mm256_reduce_mul_epi16() {
15846        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15847        let e = _mm256_reduce_mul_epi16(a);
15848        assert_eq!(256, e);
15849    }
15850
15851    #[simd_test(enable = "avx512bw,avx512vl")]
15852    unsafe fn test_mm256_mask_reduce_mul_epi16() {
15853        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15854        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
15855        assert_eq!(1, e);
15856    }
15857
15858    #[simd_test(enable = "avx512bw,avx512vl")]
15859    unsafe fn test_mm_reduce_mul_epi16() {
15860        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
15861        let e = _mm_reduce_mul_epi16(a);
15862        assert_eq!(16, e);
15863    }
15864
15865    #[simd_test(enable = "avx512bw,avx512vl")]
15866    unsafe fn test_mm_mask_reduce_mul_epi16() {
15867        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15868        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
15869        assert_eq!(1, e);
15870    }
15871
15872    #[simd_test(enable = "avx512bw,avx512vl")]
15873    unsafe fn test_mm256_reduce_mul_epi8() {
15874        let a = _mm256_set_epi8(
15875            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15876            2, 2, 2,
15877        );
15878        let e = _mm256_reduce_mul_epi8(a);
15879        assert_eq!(64, e);
15880    }
15881
15882    #[simd_test(enable = "avx512bw,avx512vl")]
15883    unsafe fn test_mm256_mask_reduce_mul_epi8() {
15884        let a = _mm256_set_epi8(
15885            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15886            2, 2, 2,
15887        );
15888        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
15889        assert_eq!(1, e);
15890    }
15891
15892    #[simd_test(enable = "avx512bw,avx512vl")]
15893    unsafe fn test_mm_reduce_mul_epi8() {
15894        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15895        let e = _mm_reduce_mul_epi8(a);
15896        assert_eq!(8, e);
15897    }
15898
15899    #[simd_test(enable = "avx512bw,avx512vl")]
15900    unsafe fn test_mm_mask_reduce_mul_epi8() {
15901        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15902        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
15903        assert_eq!(1, e);
15904    }
15905
15906    #[simd_test(enable = "avx512bw,avx512vl")]
15907    unsafe fn test_mm256_reduce_max_epi16() {
15908        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15909        let e: i16 = _mm256_reduce_max_epi16(a);
15910        assert_eq!(15, e);
15911    }
15912
15913    #[simd_test(enable = "avx512bw,avx512vl")]
15914    unsafe fn test_mm256_mask_reduce_max_epi16() {
15915        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15916        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
15917        assert_eq!(7, e);
15918    }
15919
15920    #[simd_test(enable = "avx512bw,avx512vl")]
15921    unsafe fn test_mm_reduce_max_epi16() {
15922        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15923        let e: i16 = _mm_reduce_max_epi16(a);
15924        assert_eq!(7, e);
15925    }
15926
15927    #[simd_test(enable = "avx512bw,avx512vl")]
15928    unsafe fn test_mm_mask_reduce_max_epi16() {
15929        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15930        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
15931        assert_eq!(3, e);
15932    }
15933
15934    #[simd_test(enable = "avx512bw,avx512vl")]
15935    unsafe fn test_mm256_reduce_max_epi8() {
15936        let a = _mm256_set_epi8(
15937            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15938            24, 25, 26, 27, 28, 29, 30, 31,
15939        );
15940        let e: i8 = _mm256_reduce_max_epi8(a);
15941        assert_eq!(31, e);
15942    }
15943
15944    #[simd_test(enable = "avx512bw,avx512vl")]
15945    unsafe fn test_mm256_mask_reduce_max_epi8() {
15946        let a = _mm256_set_epi8(
15947            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15948            24, 25, 26, 27, 28, 29, 30, 31,
15949        );
15950        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
15951        assert_eq!(15, e);
15952    }
15953
15954    #[simd_test(enable = "avx512bw,avx512vl")]
15955    unsafe fn test_mm_reduce_max_epi8() {
15956        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15957        let e: i8 = _mm_reduce_max_epi8(a);
15958        assert_eq!(15, e);
15959    }
15960
15961    #[simd_test(enable = "avx512bw,avx512vl")]
15962    unsafe fn test_mm_mask_reduce_max_epi8() {
15963        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15964        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
15965        assert_eq!(7, e);
15966    }
15967
15968    #[simd_test(enable = "avx512bw,avx512vl")]
15969    unsafe fn test_mm256_reduce_max_epu16() {
15970        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15971        let e: u16 = _mm256_reduce_max_epu16(a);
15972        assert_eq!(15, e);
15973    }
15974
15975    #[simd_test(enable = "avx512bw,avx512vl")]
15976    unsafe fn test_mm256_mask_reduce_max_epu16() {
15977        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15978        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
15979        assert_eq!(7, e);
15980    }
15981
15982    #[simd_test(enable = "avx512bw,avx512vl")]
15983    unsafe fn test_mm_reduce_max_epu16() {
15984        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15985        let e: u16 = _mm_reduce_max_epu16(a);
15986        assert_eq!(7, e);
15987    }
15988
15989    #[simd_test(enable = "avx512bw,avx512vl")]
15990    unsafe fn test_mm_mask_reduce_max_epu16() {
15991        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15992        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
15993        assert_eq!(3, e);
15994    }
15995
15996    #[simd_test(enable = "avx512bw,avx512vl")]
15997    unsafe fn test_mm256_reduce_max_epu8() {
15998        let a = _mm256_set_epi8(
15999            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16000            24, 25, 26, 27, 28, 29, 30, 31,
16001        );
16002        let e: u8 = _mm256_reduce_max_epu8(a);
16003        assert_eq!(31, e);
16004    }
16005
16006    #[simd_test(enable = "avx512bw,avx512vl")]
16007    unsafe fn test_mm256_mask_reduce_max_epu8() {
16008        let a = _mm256_set_epi8(
16009            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16010            24, 25, 26, 27, 28, 29, 30, 31,
16011        );
16012        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16013        assert_eq!(15, e);
16014    }
16015
16016    #[simd_test(enable = "avx512bw,avx512vl")]
16017    unsafe fn test_mm_reduce_max_epu8() {
16018        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16019        let e: u8 = _mm_reduce_max_epu8(a);
16020        assert_eq!(15, e);
16021    }
16022
16023    #[simd_test(enable = "avx512bw,avx512vl")]
16024    unsafe fn test_mm_mask_reduce_max_epu8() {
16025        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16026        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16027        assert_eq!(7, e);
16028    }
16029
16030    #[simd_test(enable = "avx512bw,avx512vl")]
16031    unsafe fn test_mm256_reduce_min_epi16() {
16032        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16033        let e: i16 = _mm256_reduce_min_epi16(a);
16034        assert_eq!(0, e);
16035    }
16036
16037    #[simd_test(enable = "avx512bw,avx512vl")]
16038    unsafe fn test_mm256_mask_reduce_min_epi16() {
16039        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16040        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16041        assert_eq!(0, e);
16042    }
16043
16044    #[simd_test(enable = "avx512bw,avx512vl")]
16045    unsafe fn test_mm_reduce_min_epi16() {
16046        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16047        let e: i16 = _mm_reduce_min_epi16(a);
16048        assert_eq!(0, e);
16049    }
16050
16051    #[simd_test(enable = "avx512bw,avx512vl")]
16052    unsafe fn test_mm_mask_reduce_min_epi16() {
16053        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16054        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16055        assert_eq!(0, e);
16056    }
16057
16058    #[simd_test(enable = "avx512bw,avx512vl")]
16059    unsafe fn test_mm256_reduce_min_epi8() {
16060        let a = _mm256_set_epi8(
16061            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16062            24, 25, 26, 27, 28, 29, 30, 31,
16063        );
16064        let e: i8 = _mm256_reduce_min_epi8(a);
16065        assert_eq!(0, e);
16066    }
16067
16068    #[simd_test(enable = "avx512bw,avx512vl")]
16069    unsafe fn test_mm256_mask_reduce_min_epi8() {
16070        let a = _mm256_set_epi8(
16071            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16072            24, 25, 26, 27, 28, 29, 30, 31,
16073        );
16074        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16075        assert_eq!(0, e);
16076    }
16077
16078    #[simd_test(enable = "avx512bw,avx512vl")]
16079    unsafe fn test_mm_reduce_min_epi8() {
16080        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16081        let e: i8 = _mm_reduce_min_epi8(a);
16082        assert_eq!(0, e);
16083    }
16084
16085    #[simd_test(enable = "avx512bw,avx512vl")]
16086    unsafe fn test_mm_mask_reduce_min_epi8() {
16087        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16088        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16089        assert_eq!(0, e);
16090    }
16091
16092    #[simd_test(enable = "avx512bw,avx512vl")]
16093    unsafe fn test_mm256_reduce_min_epu16() {
16094        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16095        let e: u16 = _mm256_reduce_min_epu16(a);
16096        assert_eq!(0, e);
16097    }
16098
16099    #[simd_test(enable = "avx512bw,avx512vl")]
16100    unsafe fn test_mm256_mask_reduce_min_epu16() {
16101        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16102        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16103        assert_eq!(0, e);
16104    }
16105
16106    #[simd_test(enable = "avx512bw,avx512vl")]
16107    unsafe fn test_mm_reduce_min_epu16() {
16108        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16109        let e: u16 = _mm_reduce_min_epu16(a);
16110        assert_eq!(0, e);
16111    }
16112
16113    #[simd_test(enable = "avx512bw,avx512vl")]
16114    unsafe fn test_mm_mask_reduce_min_epu16() {
16115        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16116        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
16117        assert_eq!(0, e);
16118    }
16119
16120    #[simd_test(enable = "avx512bw,avx512vl")]
16121    unsafe fn test_mm256_reduce_min_epu8() {
16122        let a = _mm256_set_epi8(
16123            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16124            24, 25, 26, 27, 28, 29, 30, 31,
16125        );
16126        let e: u8 = _mm256_reduce_min_epu8(a);
16127        assert_eq!(0, e);
16128    }
16129
16130    #[simd_test(enable = "avx512bw,avx512vl")]
16131    unsafe fn test_mm256_mask_reduce_min_epu8() {
16132        let a = _mm256_set_epi8(
16133            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16134            24, 25, 26, 27, 28, 29, 30, 31,
16135        );
16136        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
16137        assert_eq!(0, e);
16138    }
16139
16140    #[simd_test(enable = "avx512bw,avx512vl")]
16141    unsafe fn test_mm_reduce_min_epu8() {
16142        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16143        let e: u8 = _mm_reduce_min_epu8(a);
16144        assert_eq!(0, e);
16145    }
16146
16147    #[simd_test(enable = "avx512bw,avx512vl")]
16148    unsafe fn test_mm_mask_reduce_min_epu8() {
16149        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16150        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
16151        assert_eq!(0, e);
16152    }
16153
16154    #[simd_test(enable = "avx512bw,avx512vl")]
16155    unsafe fn test_mm256_reduce_or_epi16() {
16156        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16157        let e = _mm256_reduce_or_epi16(a);
16158        assert_eq!(3, e);
16159    }
16160
16161    #[simd_test(enable = "avx512bw,avx512vl")]
16162    unsafe fn test_mm256_mask_reduce_or_epi16() {
16163        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16164        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
16165        assert_eq!(1, e);
16166    }
16167
16168    #[simd_test(enable = "avx512bw,avx512vl")]
16169    unsafe fn test_mm_reduce_or_epi16() {
16170        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16171        let e = _mm_reduce_or_epi16(a);
16172        assert_eq!(3, e);
16173    }
16174
16175    #[simd_test(enable = "avx512bw,avx512vl")]
16176    unsafe fn test_mm_mask_reduce_or_epi16() {
16177        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16178        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
16179        assert_eq!(1, e);
16180    }
16181
16182    #[simd_test(enable = "avx512bw,avx512vl")]
16183    unsafe fn test_mm256_reduce_or_epi8() {
16184        let a = _mm256_set_epi8(
16185            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16186            2, 2, 2,
16187        );
16188        let e = _mm256_reduce_or_epi8(a);
16189        assert_eq!(3, e);
16190    }
16191
16192    #[simd_test(enable = "avx512bw,avx512vl")]
16193    unsafe fn test_mm256_mask_reduce_or_epi8() {
16194        let a = _mm256_set_epi8(
16195            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16196            2, 2, 2,
16197        );
16198        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
16199        assert_eq!(1, e);
16200    }
16201
16202    #[simd_test(enable = "avx512bw,avx512vl")]
16203    unsafe fn test_mm_reduce_or_epi8() {
16204        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16205        let e = _mm_reduce_or_epi8(a);
16206        assert_eq!(3, e);
16207    }
16208
16209    #[simd_test(enable = "avx512bw,avx512vl")]
16210    unsafe fn test_mm_mask_reduce_or_epi8() {
16211        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16212        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
16213        assert_eq!(1, e);
16214    }
16215
16216    #[simd_test(enable = "avx512bw")]
16217    unsafe fn test_mm512_loadu_epi16() {
16218        #[rustfmt::skip]
16219        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16220        let r = _mm512_loadu_epi16(&a[0]);
16221        #[rustfmt::skip]
16222        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16223        assert_eq_m512i(r, e);
16224    }
16225
16226    #[simd_test(enable = "avx512bw,avx512vl")]
16227    unsafe fn test_mm256_loadu_epi16() {
16228        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16229        let r = _mm256_loadu_epi16(&a[0]);
16230        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16231        assert_eq_m256i(r, e);
16232    }
16233
16234    #[simd_test(enable = "avx512bw,avx512vl")]
16235    unsafe fn test_mm_loadu_epi16() {
16236        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
16237        let r = _mm_loadu_epi16(&a[0]);
16238        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
16239        assert_eq_m128i(r, e);
16240    }
16241
16242    #[simd_test(enable = "avx512bw")]
16243    unsafe fn test_mm512_loadu_epi8() {
16244        #[rustfmt::skip]
16245        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16246                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16247        let r = _mm512_loadu_epi8(&a[0]);
16248        #[rustfmt::skip]
16249        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
16250                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16251        assert_eq_m512i(r, e);
16252    }
16253
16254    #[simd_test(enable = "avx512bw,avx512vl")]
16255    unsafe fn test_mm256_loadu_epi8() {
16256        #[rustfmt::skip]
16257        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16258        let r = _mm256_loadu_epi8(&a[0]);
16259        #[rustfmt::skip]
16260        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16261        assert_eq_m256i(r, e);
16262    }
16263
16264    #[simd_test(enable = "avx512bw,avx512vl")]
16265    unsafe fn test_mm_loadu_epi8() {
16266        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16267        let r = _mm_loadu_epi8(&a[0]);
16268        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16269        assert_eq_m128i(r, e);
16270    }
16271
16272    #[simd_test(enable = "avx512bw")]
16273    unsafe fn test_mm512_storeu_epi16() {
16274        let a = _mm512_set1_epi16(9);
16275        let mut r = _mm512_undefined_epi32();
16276        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16277        assert_eq_m512i(r, a);
16278    }
16279
16280    #[simd_test(enable = "avx512bw,avx512vl")]
16281    unsafe fn test_mm256_storeu_epi16() {
16282        let a = _mm256_set1_epi16(9);
16283        let mut r = _mm256_set1_epi32(0);
16284        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16285        assert_eq_m256i(r, a);
16286    }
16287
16288    #[simd_test(enable = "avx512bw,avx512vl")]
16289    unsafe fn test_mm_storeu_epi16() {
16290        let a = _mm_set1_epi16(9);
16291        let mut r = _mm_set1_epi32(0);
16292        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16293        assert_eq_m128i(r, a);
16294    }
16295
16296    #[simd_test(enable = "avx512bw")]
16297    unsafe fn test_mm512_storeu_epi8() {
16298        let a = _mm512_set1_epi8(9);
16299        let mut r = _mm512_undefined_epi32();
16300        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16301        assert_eq_m512i(r, a);
16302    }
16303
16304    #[simd_test(enable = "avx512bw,avx512vl")]
16305    unsafe fn test_mm256_storeu_epi8() {
16306        let a = _mm256_set1_epi8(9);
16307        let mut r = _mm256_set1_epi32(0);
16308        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16309        assert_eq_m256i(r, a);
16310    }
16311
16312    #[simd_test(enable = "avx512bw,avx512vl")]
16313    unsafe fn test_mm_storeu_epi8() {
16314        let a = _mm_set1_epi8(9);
16315        let mut r = _mm_set1_epi32(0);
16316        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16317        assert_eq_m128i(r, a);
16318    }
16319
16320    #[simd_test(enable = "avx512f,avx512bw")]
16321    unsafe fn test_mm512_mask_loadu_epi16() {
16322        let src = _mm512_set1_epi16(42);
16323        let a = &[
16324            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16325            24, 25, 26, 27, 28, 29, 30, 31, 32,
16326        ];
16327        let p = a.as_ptr();
16328        let m = 0b10101010_11001100_11101000_11001010;
16329        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
16330        let e = &[
16331            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16332            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16333        ];
16334        let e = _mm512_loadu_epi16(e.as_ptr());
16335        assert_eq_m512i(r, e);
16336    }
16337
16338    #[simd_test(enable = "avx512f,avx512bw")]
16339    unsafe fn test_mm512_maskz_loadu_epi16() {
16340        let a = &[
16341            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16342            24, 25, 26, 27, 28, 29, 30, 31, 32,
16343        ];
16344        let p = a.as_ptr();
16345        let m = 0b10101010_11001100_11101000_11001010;
16346        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
16347        let e = &[
16348            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16349            26, 0, 28, 0, 30, 0, 32,
16350        ];
16351        let e = _mm512_loadu_epi16(e.as_ptr());
16352        assert_eq_m512i(r, e);
16353    }
16354
16355    #[simd_test(enable = "avx512f,avx512bw")]
16356    unsafe fn test_mm512_mask_storeu_epi16() {
16357        let mut r = [42_i16; 32];
16358        let a = &[
16359            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16360            24, 25, 26, 27, 28, 29, 30, 31, 32,
16361        ];
16362        let a = _mm512_loadu_epi16(a.as_ptr());
16363        let m = 0b10101010_11001100_11101000_11001010;
16364        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16365        let e = &[
16366            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16367            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16368        ];
16369        let e = _mm512_loadu_epi16(e.as_ptr());
16370        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
16371    }
16372
16373    #[simd_test(enable = "avx512f,avx512bw")]
16374    unsafe fn test_mm512_mask_loadu_epi8() {
16375        let src = _mm512_set1_epi8(42);
16376        let a = &[
16377            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16378            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16379            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16380        ];
16381        let p = a.as_ptr();
16382        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16383        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
16384        let e = &[
16385            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16386            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16387            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16388        ];
16389        let e = _mm512_loadu_epi8(e.as_ptr());
16390        assert_eq_m512i(r, e);
16391    }
16392
16393    #[simd_test(enable = "avx512f,avx512bw")]
16394    unsafe fn test_mm512_maskz_loadu_epi8() {
16395        let a = &[
16396            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16397            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16398            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16399        ];
16400        let p = a.as_ptr();
16401        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16402        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
16403        let e = &[
16404            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16405            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
16406            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
16407        ];
16408        let e = _mm512_loadu_epi8(e.as_ptr());
16409        assert_eq_m512i(r, e);
16410    }
16411
16412    #[simd_test(enable = "avx512f,avx512bw")]
16413    unsafe fn test_mm512_mask_storeu_epi8() {
16414        let mut r = [42_i8; 64];
16415        let a = &[
16416            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16417            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16418            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16419        ];
16420        let a = _mm512_loadu_epi8(a.as_ptr());
16421        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16422        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16423        let e = &[
16424            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16425            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16426            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16427        ];
16428        let e = _mm512_loadu_epi8(e.as_ptr());
16429        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
16430    }
16431
16432    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16433    unsafe fn test_mm256_mask_loadu_epi16() {
16434        let src = _mm256_set1_epi16(42);
16435        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16436        let p = a.as_ptr();
16437        let m = 0b11101000_11001010;
16438        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
16439        let e = &[
16440            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16441        ];
16442        let e = _mm256_loadu_epi16(e.as_ptr());
16443        assert_eq_m256i(r, e);
16444    }
16445
16446    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16447    unsafe fn test_mm256_maskz_loadu_epi16() {
16448        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16449        let p = a.as_ptr();
16450        let m = 0b11101000_11001010;
16451        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
16452        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16453        let e = _mm256_loadu_epi16(e.as_ptr());
16454        assert_eq_m256i(r, e);
16455    }
16456
16457    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16458    unsafe fn test_mm256_mask_storeu_epi16() {
16459        let mut r = [42_i16; 16];
16460        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16461        let a = _mm256_loadu_epi16(a.as_ptr());
16462        let m = 0b11101000_11001010;
16463        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16464        let e = &[
16465            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16466        ];
16467        let e = _mm256_loadu_epi16(e.as_ptr());
16468        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
16469    }
16470
16471    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16472    unsafe fn test_mm256_mask_loadu_epi8() {
16473        let src = _mm256_set1_epi8(42);
16474        let a = &[
16475            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16476            24, 25, 26, 27, 28, 29, 30, 31, 32,
16477        ];
16478        let p = a.as_ptr();
16479        let m = 0b10101010_11001100_11101000_11001010;
16480        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
16481        let e = &[
16482            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16483            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16484        ];
16485        let e = _mm256_loadu_epi8(e.as_ptr());
16486        assert_eq_m256i(r, e);
16487    }
16488
16489    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16490    unsafe fn test_mm256_maskz_loadu_epi8() {
16491        let a = &[
16492            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16493            24, 25, 26, 27, 28, 29, 30, 31, 32,
16494        ];
16495        let p = a.as_ptr();
16496        let m = 0b10101010_11001100_11101000_11001010;
16497        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
16498        let e = &[
16499            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16500            26, 0, 28, 0, 30, 0, 32,
16501        ];
16502        let e = _mm256_loadu_epi8(e.as_ptr());
16503        assert_eq_m256i(r, e);
16504    }
16505
16506    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16507    unsafe fn test_mm256_mask_storeu_epi8() {
16508        let mut r = [42_i8; 32];
16509        let a = &[
16510            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16511            24, 25, 26, 27, 28, 29, 30, 31, 32,
16512        ];
16513        let a = _mm256_loadu_epi8(a.as_ptr());
16514        let m = 0b10101010_11001100_11101000_11001010;
16515        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16516        let e = &[
16517            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16518            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16519        ];
16520        let e = _mm256_loadu_epi8(e.as_ptr());
16521        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
16522    }
16523
16524    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16525    unsafe fn test_mm_mask_loadu_epi16() {
16526        let src = _mm_set1_epi16(42);
16527        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16528        let p = a.as_ptr();
16529        let m = 0b11001010;
16530        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
16531        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16532        let e = _mm_loadu_epi16(e.as_ptr());
16533        assert_eq_m128i(r, e);
16534    }
16535
16536    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16537    unsafe fn test_mm_maskz_loadu_epi16() {
16538        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16539        let p = a.as_ptr();
16540        let m = 0b11001010;
16541        let r = _mm_maskz_loadu_epi16(m, black_box(p));
16542        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
16543        let e = _mm_loadu_epi16(e.as_ptr());
16544        assert_eq_m128i(r, e);
16545    }
16546
16547    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16548    unsafe fn test_mm_mask_storeu_epi16() {
16549        let mut r = [42_i16; 8];
16550        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16551        let a = _mm_loadu_epi16(a.as_ptr());
16552        let m = 0b11001010;
16553        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16554        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16555        let e = _mm_loadu_epi16(e.as_ptr());
16556        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
16557    }
16558
16559    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16560    unsafe fn test_mm_mask_loadu_epi8() {
16561        let src = _mm_set1_epi8(42);
16562        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16563        let p = a.as_ptr();
16564        let m = 0b11101000_11001010;
16565        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
16566        let e = &[
16567            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16568        ];
16569        let e = _mm_loadu_epi8(e.as_ptr());
16570        assert_eq_m128i(r, e);
16571    }
16572
16573    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16574    unsafe fn test_mm_maskz_loadu_epi8() {
16575        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16576        let p = a.as_ptr();
16577        let m = 0b11101000_11001010;
16578        let r = _mm_maskz_loadu_epi8(m, black_box(p));
16579        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16580        let e = _mm_loadu_epi8(e.as_ptr());
16581        assert_eq_m128i(r, e);
16582    }
16583
16584    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16585    unsafe fn test_mm_mask_storeu_epi8() {
16586        let mut r = [42_i8; 16];
16587        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16588        let a = _mm_loadu_epi8(a.as_ptr());
16589        let m = 0b11101000_11001010;
16590        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16591        let e = &[
16592            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16593        ];
16594        let e = _mm_loadu_epi8(e.as_ptr());
16595        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
16596    }
16597
16598    #[simd_test(enable = "avx512bw")]
16599    unsafe fn test_mm512_madd_epi16() {
16600        let a = _mm512_set1_epi16(1);
16601        let b = _mm512_set1_epi16(1);
16602        let r = _mm512_madd_epi16(a, b);
16603        let e = _mm512_set1_epi32(2);
16604        assert_eq_m512i(r, e);
16605    }
16606
16607    #[simd_test(enable = "avx512bw")]
16608    unsafe fn test_mm512_mask_madd_epi16() {
16609        let a = _mm512_set1_epi16(1);
16610        let b = _mm512_set1_epi16(1);
16611        let r = _mm512_mask_madd_epi16(a, 0, a, b);
16612        assert_eq_m512i(r, a);
16613        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
16614        let e = _mm512_set_epi32(
16615            1 << 16 | 1,
16616            1 << 16 | 1,
16617            1 << 16 | 1,
16618            1 << 16 | 1,
16619            1 << 16 | 1,
16620            1 << 16 | 1,
16621            1 << 16 | 1,
16622            1 << 16 | 1,
16623            1 << 16 | 1,
16624            1 << 16 | 1,
16625            1 << 16 | 1,
16626            1 << 16 | 1,
16627            2,
16628            2,
16629            2,
16630            2,
16631        );
16632        assert_eq_m512i(r, e);
16633    }
16634
16635    #[simd_test(enable = "avx512bw")]
16636    unsafe fn test_mm512_maskz_madd_epi16() {
16637        let a = _mm512_set1_epi16(1);
16638        let b = _mm512_set1_epi16(1);
16639        let r = _mm512_maskz_madd_epi16(0, a, b);
16640        assert_eq_m512i(r, _mm512_setzero_si512());
16641        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
16642        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
16643        assert_eq_m512i(r, e);
16644    }
16645
16646    #[simd_test(enable = "avx512bw,avx512vl")]
16647    unsafe fn test_mm256_mask_madd_epi16() {
16648        let a = _mm256_set1_epi16(1);
16649        let b = _mm256_set1_epi16(1);
16650        let r = _mm256_mask_madd_epi16(a, 0, a, b);
16651        assert_eq_m256i(r, a);
16652        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
16653        let e = _mm256_set_epi32(
16654            1 << 16 | 1,
16655            1 << 16 | 1,
16656            1 << 16 | 1,
16657            1 << 16 | 1,
16658            2,
16659            2,
16660            2,
16661            2,
16662        );
16663        assert_eq_m256i(r, e);
16664    }
16665
16666    #[simd_test(enable = "avx512bw,avx512vl")]
16667    unsafe fn test_mm256_maskz_madd_epi16() {
16668        let a = _mm256_set1_epi16(1);
16669        let b = _mm256_set1_epi16(1);
16670        let r = _mm256_maskz_madd_epi16(0, a, b);
16671        assert_eq_m256i(r, _mm256_setzero_si256());
16672        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
16673        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
16674        assert_eq_m256i(r, e);
16675    }
16676
16677    #[simd_test(enable = "avx512bw,avx512vl")]
16678    unsafe fn test_mm_mask_madd_epi16() {
16679        let a = _mm_set1_epi16(1);
16680        let b = _mm_set1_epi16(1);
16681        let r = _mm_mask_madd_epi16(a, 0, a, b);
16682        assert_eq_m128i(r, a);
16683        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
16684        let e = _mm_set_epi32(2, 2, 2, 2);
16685        assert_eq_m128i(r, e);
16686    }
16687
16688    #[simd_test(enable = "avx512bw,avx512vl")]
16689    unsafe fn test_mm_maskz_madd_epi16() {
16690        let a = _mm_set1_epi16(1);
16691        let b = _mm_set1_epi16(1);
16692        let r = _mm_maskz_madd_epi16(0, a, b);
16693        assert_eq_m128i(r, _mm_setzero_si128());
16694        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
16695        let e = _mm_set_epi32(2, 2, 2, 2);
16696        assert_eq_m128i(r, e);
16697    }
16698
16699    #[simd_test(enable = "avx512bw")]
16700    unsafe fn test_mm512_maddubs_epi16() {
16701        let a = _mm512_set1_epi8(1);
16702        let b = _mm512_set1_epi8(1);
16703        let r = _mm512_maddubs_epi16(a, b);
16704        let e = _mm512_set1_epi16(2);
16705        assert_eq_m512i(r, e);
16706    }
16707
16708    #[simd_test(enable = "avx512bw")]
16709    unsafe fn test_mm512_mask_maddubs_epi16() {
16710        let a = _mm512_set1_epi8(1);
16711        let b = _mm512_set1_epi8(1);
16712        let src = _mm512_set1_epi16(1);
16713        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
16714        assert_eq_m512i(r, src);
16715        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
16716        #[rustfmt::skip]
16717        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16718                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
16719        assert_eq_m512i(r, e);
16720    }
16721
16722    #[simd_test(enable = "avx512bw")]
16723    unsafe fn test_mm512_maskz_maddubs_epi16() {
16724        let a = _mm512_set1_epi8(1);
16725        let b = _mm512_set1_epi8(1);
16726        let r = _mm512_maskz_maddubs_epi16(0, a, b);
16727        assert_eq_m512i(r, _mm512_setzero_si512());
16728        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
16729        #[rustfmt::skip]
16730        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
16731                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16732        assert_eq_m512i(r, e);
16733    }
16734
16735    #[simd_test(enable = "avx512bw,avx512vl")]
16736    unsafe fn test_mm256_mask_maddubs_epi16() {
16737        let a = _mm256_set1_epi8(1);
16738        let b = _mm256_set1_epi8(1);
16739        let src = _mm256_set1_epi16(1);
16740        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
16741        assert_eq_m256i(r, src);
16742        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
16743        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16744        assert_eq_m256i(r, e);
16745    }
16746
16747    #[simd_test(enable = "avx512bw,avx512vl")]
16748    unsafe fn test_mm256_maskz_maddubs_epi16() {
16749        let a = _mm256_set1_epi8(1);
16750        let b = _mm256_set1_epi8(1);
16751        let r = _mm256_maskz_maddubs_epi16(0, a, b);
16752        assert_eq_m256i(r, _mm256_setzero_si256());
16753        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
16754        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16755        assert_eq_m256i(r, e);
16756    }
16757
16758    #[simd_test(enable = "avx512bw,avx512vl")]
16759    unsafe fn test_mm_mask_maddubs_epi16() {
16760        let a = _mm_set1_epi8(1);
16761        let b = _mm_set1_epi8(1);
16762        let src = _mm_set1_epi16(1);
16763        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
16764        assert_eq_m128i(r, src);
16765        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
16766        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16767        assert_eq_m128i(r, e);
16768    }
16769
16770    #[simd_test(enable = "avx512bw,avx512vl")]
16771    unsafe fn test_mm_maskz_maddubs_epi16() {
16772        let a = _mm_set1_epi8(1);
16773        let b = _mm_set1_epi8(1);
16774        let r = _mm_maskz_maddubs_epi16(0, a, b);
16775        assert_eq_m128i(r, _mm_setzero_si128());
16776        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
16777        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
16778        assert_eq_m128i(r, e);
16779    }
16780
16781    #[simd_test(enable = "avx512bw")]
16782    unsafe fn test_mm512_packs_epi32() {
16783        let a = _mm512_set1_epi32(i32::MAX);
16784        let b = _mm512_set1_epi32(1);
16785        let r = _mm512_packs_epi32(a, b);
16786        #[rustfmt::skip]
16787        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
16788                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16789        assert_eq_m512i(r, e);
16790    }
16791
16792    #[simd_test(enable = "avx512bw")]
16793    unsafe fn test_mm512_mask_packs_epi32() {
16794        let a = _mm512_set1_epi32(i32::MAX);
16795        let b = _mm512_set1_epi32(1 << 16 | 1);
16796        let r = _mm512_mask_packs_epi32(a, 0, a, b);
16797        assert_eq_m512i(r, a);
16798        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16799        #[rustfmt::skip]
16800        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16801                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16802        assert_eq_m512i(r, e);
16803    }
16804
16805    #[simd_test(enable = "avx512bw")]
16806    unsafe fn test_mm512_maskz_packs_epi32() {
16807        let a = _mm512_set1_epi32(i32::MAX);
16808        let b = _mm512_set1_epi32(1);
16809        let r = _mm512_maskz_packs_epi32(0, a, b);
16810        assert_eq_m512i(r, _mm512_setzero_si512());
16811        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
16812        #[rustfmt::skip]
16813        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16814                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16815        assert_eq_m512i(r, e);
16816    }
16817
16818    #[simd_test(enable = "avx512bw,avx512vl")]
16819    unsafe fn test_mm256_mask_packs_epi32() {
16820        let a = _mm256_set1_epi32(i32::MAX);
16821        let b = _mm256_set1_epi32(1 << 16 | 1);
16822        let r = _mm256_mask_packs_epi32(a, 0, a, b);
16823        assert_eq_m256i(r, a);
16824        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
16825        #[rustfmt::skip]
16826        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16827        assert_eq_m256i(r, e);
16828    }
16829
16830    #[simd_test(enable = "avx512bw,avx512vl")]
16831    unsafe fn test_mm256_maskz_packs_epi32() {
16832        let a = _mm256_set1_epi32(i32::MAX);
16833        let b = _mm256_set1_epi32(1);
16834        let r = _mm256_maskz_packs_epi32(0, a, b);
16835        assert_eq_m256i(r, _mm256_setzero_si256());
16836        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
16837        #[rustfmt::skip]
16838        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16839        assert_eq_m256i(r, e);
16840    }
16841
16842    #[simd_test(enable = "avx512bw,avx512vl")]
16843    unsafe fn test_mm_mask_packs_epi32() {
16844        let a = _mm_set1_epi32(i32::MAX);
16845        let b = _mm_set1_epi32(1 << 16 | 1);
16846        let r = _mm_mask_packs_epi32(a, 0, a, b);
16847        assert_eq_m128i(r, a);
16848        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
16849        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16850        assert_eq_m128i(r, e);
16851    }
16852
16853    #[simd_test(enable = "avx512bw,avx512vl")]
16854    unsafe fn test_mm_maskz_packs_epi32() {
16855        let a = _mm_set1_epi32(i32::MAX);
16856        let b = _mm_set1_epi32(1);
16857        let r = _mm_maskz_packs_epi32(0, a, b);
16858        assert_eq_m128i(r, _mm_setzero_si128());
16859        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
16860        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16861        assert_eq_m128i(r, e);
16862    }
16863
16864    #[simd_test(enable = "avx512bw")]
16865    unsafe fn test_mm512_packs_epi16() {
16866        let a = _mm512_set1_epi16(i16::MAX);
16867        let b = _mm512_set1_epi16(1);
16868        let r = _mm512_packs_epi16(a, b);
16869        #[rustfmt::skip]
16870        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16871                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16872                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16873                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16874        assert_eq_m512i(r, e);
16875    }
16876
16877    #[simd_test(enable = "avx512bw")]
16878    unsafe fn test_mm512_mask_packs_epi16() {
16879        let a = _mm512_set1_epi16(i16::MAX);
16880        let b = _mm512_set1_epi16(1 << 8 | 1);
16881        let r = _mm512_mask_packs_epi16(a, 0, a, b);
16882        assert_eq_m512i(r, a);
16883        let r = _mm512_mask_packs_epi16(
16884            b,
16885            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16886            a,
16887            b,
16888        );
16889        #[rustfmt::skip]
16890        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16891                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16892                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16893                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16894        assert_eq_m512i(r, e);
16895    }
16896
16897    #[simd_test(enable = "avx512bw")]
16898    unsafe fn test_mm512_maskz_packs_epi16() {
16899        let a = _mm512_set1_epi16(i16::MAX);
16900        let b = _mm512_set1_epi16(1);
16901        let r = _mm512_maskz_packs_epi16(0, a, b);
16902        assert_eq_m512i(r, _mm512_setzero_si512());
16903        let r = _mm512_maskz_packs_epi16(
16904            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16905            a,
16906            b,
16907        );
16908        #[rustfmt::skip]
16909        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16910                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16911                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16912                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16913        assert_eq_m512i(r, e);
16914    }
16915
16916    #[simd_test(enable = "avx512bw,avx512vl")]
16917    unsafe fn test_mm256_mask_packs_epi16() {
16918        let a = _mm256_set1_epi16(i16::MAX);
16919        let b = _mm256_set1_epi16(1 << 8 | 1);
16920        let r = _mm256_mask_packs_epi16(a, 0, a, b);
16921        assert_eq_m256i(r, a);
16922        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
16923        #[rustfmt::skip]
16924        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16925                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16926        assert_eq_m256i(r, e);
16927    }
16928
16929    #[simd_test(enable = "avx512bw,avx512vl")]
16930    unsafe fn test_mm256_maskz_packs_epi16() {
16931        let a = _mm256_set1_epi16(i16::MAX);
16932        let b = _mm256_set1_epi16(1);
16933        let r = _mm256_maskz_packs_epi16(0, a, b);
16934        assert_eq_m256i(r, _mm256_setzero_si256());
16935        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
16936        #[rustfmt::skip]
16937        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16938                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16939        assert_eq_m256i(r, e);
16940    }
16941
16942    #[simd_test(enable = "avx512bw,avx512vl")]
16943    unsafe fn test_mm_mask_packs_epi16() {
16944        let a = _mm_set1_epi16(i16::MAX);
16945        let b = _mm_set1_epi16(1 << 8 | 1);
16946        let r = _mm_mask_packs_epi16(a, 0, a, b);
16947        assert_eq_m128i(r, a);
16948        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
16949        #[rustfmt::skip]
16950        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16951        assert_eq_m128i(r, e);
16952    }
16953
16954    #[simd_test(enable = "avx512bw,avx512vl")]
16955    unsafe fn test_mm_maskz_packs_epi16() {
16956        let a = _mm_set1_epi16(i16::MAX);
16957        let b = _mm_set1_epi16(1);
16958        let r = _mm_maskz_packs_epi16(0, a, b);
16959        assert_eq_m128i(r, _mm_setzero_si128());
16960        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
16961        #[rustfmt::skip]
16962        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16963        assert_eq_m128i(r, e);
16964    }
16965
16966    #[simd_test(enable = "avx512bw")]
16967    unsafe fn test_mm512_packus_epi32() {
16968        let a = _mm512_set1_epi32(-1);
16969        let b = _mm512_set1_epi32(1);
16970        let r = _mm512_packus_epi32(a, b);
16971        #[rustfmt::skip]
16972        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
16973                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
16974        assert_eq_m512i(r, e);
16975    }
16976
16977    #[simd_test(enable = "avx512bw")]
16978    unsafe fn test_mm512_mask_packus_epi32() {
16979        let a = _mm512_set1_epi32(-1);
16980        let b = _mm512_set1_epi32(1 << 16 | 1);
16981        let r = _mm512_mask_packus_epi32(a, 0, a, b);
16982        assert_eq_m512i(r, a);
16983        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16984        #[rustfmt::skip]
16985        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16986                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16987        assert_eq_m512i(r, e);
16988    }
16989
16990    #[simd_test(enable = "avx512bw")]
16991    unsafe fn test_mm512_maskz_packus_epi32() {
16992        let a = _mm512_set1_epi32(-1);
16993        let b = _mm512_set1_epi32(1);
16994        let r = _mm512_maskz_packus_epi32(0, a, b);
16995        assert_eq_m512i(r, _mm512_setzero_si512());
16996        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
16997        #[rustfmt::skip]
16998        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16999                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17000        assert_eq_m512i(r, e);
17001    }
17002
17003    #[simd_test(enable = "avx512bw,avx512vl")]
17004    unsafe fn test_mm256_mask_packus_epi32() {
17005        let a = _mm256_set1_epi32(-1);
17006        let b = _mm256_set1_epi32(1 << 16 | 1);
17007        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17008        assert_eq_m256i(r, a);
17009        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17010        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17011        assert_eq_m256i(r, e);
17012    }
17013
17014    #[simd_test(enable = "avx512bw,avx512vl")]
17015    unsafe fn test_mm256_maskz_packus_epi32() {
17016        let a = _mm256_set1_epi32(-1);
17017        let b = _mm256_set1_epi32(1);
17018        let r = _mm256_maskz_packus_epi32(0, a, b);
17019        assert_eq_m256i(r, _mm256_setzero_si256());
17020        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17021        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17022        assert_eq_m256i(r, e);
17023    }
17024
17025    #[simd_test(enable = "avx512bw,avx512vl")]
17026    unsafe fn test_mm_mask_packus_epi32() {
17027        let a = _mm_set1_epi32(-1);
17028        let b = _mm_set1_epi32(1 << 16 | 1);
17029        let r = _mm_mask_packus_epi32(a, 0, a, b);
17030        assert_eq_m128i(r, a);
17031        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17032        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17033        assert_eq_m128i(r, e);
17034    }
17035
17036    #[simd_test(enable = "avx512bw,avx512vl")]
17037    unsafe fn test_mm_maskz_packus_epi32() {
17038        let a = _mm_set1_epi32(-1);
17039        let b = _mm_set1_epi32(1);
17040        let r = _mm_maskz_packus_epi32(0, a, b);
17041        assert_eq_m128i(r, _mm_setzero_si128());
17042        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17043        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17044        assert_eq_m128i(r, e);
17045    }
17046
17047    #[simd_test(enable = "avx512bw")]
17048    unsafe fn test_mm512_packus_epi16() {
17049        let a = _mm512_set1_epi16(-1);
17050        let b = _mm512_set1_epi16(1);
17051        let r = _mm512_packus_epi16(a, b);
17052        #[rustfmt::skip]
17053        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17054                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17055                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17056                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17057        assert_eq_m512i(r, e);
17058    }
17059
17060    #[simd_test(enable = "avx512bw")]
17061    unsafe fn test_mm512_mask_packus_epi16() {
17062        let a = _mm512_set1_epi16(-1);
17063        let b = _mm512_set1_epi16(1 << 8 | 1);
17064        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17065        assert_eq_m512i(r, a);
17066        let r = _mm512_mask_packus_epi16(
17067            b,
17068            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17069            a,
17070            b,
17071        );
17072        #[rustfmt::skip]
17073        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17074                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17075                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17076                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17077        assert_eq_m512i(r, e);
17078    }
17079
17080    #[simd_test(enable = "avx512bw")]
17081    unsafe fn test_mm512_maskz_packus_epi16() {
17082        let a = _mm512_set1_epi16(-1);
17083        let b = _mm512_set1_epi16(1);
17084        let r = _mm512_maskz_packus_epi16(0, a, b);
17085        assert_eq_m512i(r, _mm512_setzero_si512());
17086        let r = _mm512_maskz_packus_epi16(
17087            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17088            a,
17089            b,
17090        );
17091        #[rustfmt::skip]
17092        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17093                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17094                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17095                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17096        assert_eq_m512i(r, e);
17097    }
17098
17099    #[simd_test(enable = "avx512bw,avx512vl")]
17100    unsafe fn test_mm256_mask_packus_epi16() {
17101        let a = _mm256_set1_epi16(-1);
17102        let b = _mm256_set1_epi16(1 << 8 | 1);
17103        let r = _mm256_mask_packus_epi16(a, 0, a, b);
17104        assert_eq_m256i(r, a);
17105        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17106        #[rustfmt::skip]
17107        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17108                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17109        assert_eq_m256i(r, e);
17110    }
17111
17112    #[simd_test(enable = "avx512bw,avx512vl")]
17113    unsafe fn test_mm256_maskz_packus_epi16() {
17114        let a = _mm256_set1_epi16(-1);
17115        let b = _mm256_set1_epi16(1);
17116        let r = _mm256_maskz_packus_epi16(0, a, b);
17117        assert_eq_m256i(r, _mm256_setzero_si256());
17118        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
17119        #[rustfmt::skip]
17120        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17121                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17122        assert_eq_m256i(r, e);
17123    }
17124
17125    #[simd_test(enable = "avx512bw,avx512vl")]
17126    unsafe fn test_mm_mask_packus_epi16() {
17127        let a = _mm_set1_epi16(-1);
17128        let b = _mm_set1_epi16(1 << 8 | 1);
17129        let r = _mm_mask_packus_epi16(a, 0, a, b);
17130        assert_eq_m128i(r, a);
17131        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
17132        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17133        assert_eq_m128i(r, e);
17134    }
17135
17136    #[simd_test(enable = "avx512bw,avx512vl")]
17137    unsafe fn test_mm_maskz_packus_epi16() {
17138        let a = _mm_set1_epi16(-1);
17139        let b = _mm_set1_epi16(1);
17140        let r = _mm_maskz_packus_epi16(0, a, b);
17141        assert_eq_m128i(r, _mm_setzero_si128());
17142        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
17143        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17144        assert_eq_m128i(r, e);
17145    }
17146
17147    #[simd_test(enable = "avx512bw")]
17148    unsafe fn test_mm512_avg_epu16() {
17149        let a = _mm512_set1_epi16(1);
17150        let b = _mm512_set1_epi16(1);
17151        let r = _mm512_avg_epu16(a, b);
17152        let e = _mm512_set1_epi16(1);
17153        assert_eq_m512i(r, e);
17154    }
17155
17156    #[simd_test(enable = "avx512bw")]
17157    unsafe fn test_mm512_mask_avg_epu16() {
17158        let a = _mm512_set1_epi16(1);
17159        let b = _mm512_set1_epi16(1);
17160        let r = _mm512_mask_avg_epu16(a, 0, a, b);
17161        assert_eq_m512i(r, a);
17162        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
17163        #[rustfmt::skip]
17164        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17165                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17166        assert_eq_m512i(r, e);
17167    }
17168
17169    #[simd_test(enable = "avx512bw")]
17170    unsafe fn test_mm512_maskz_avg_epu16() {
17171        let a = _mm512_set1_epi16(1);
17172        let b = _mm512_set1_epi16(1);
17173        let r = _mm512_maskz_avg_epu16(0, a, b);
17174        assert_eq_m512i(r, _mm512_setzero_si512());
17175        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
17176        #[rustfmt::skip]
17177        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17178                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17179        assert_eq_m512i(r, e);
17180    }
17181
17182    #[simd_test(enable = "avx512bw,avx512vl")]
17183    unsafe fn test_mm256_mask_avg_epu16() {
17184        let a = _mm256_set1_epi16(1);
17185        let b = _mm256_set1_epi16(1);
17186        let r = _mm256_mask_avg_epu16(a, 0, a, b);
17187        assert_eq_m256i(r, a);
17188        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
17189        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17190        assert_eq_m256i(r, e);
17191    }
17192
17193    #[simd_test(enable = "avx512bw,avx512vl")]
17194    unsafe fn test_mm256_maskz_avg_epu16() {
17195        let a = _mm256_set1_epi16(1);
17196        let b = _mm256_set1_epi16(1);
17197        let r = _mm256_maskz_avg_epu16(0, a, b);
17198        assert_eq_m256i(r, _mm256_setzero_si256());
17199        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
17200        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17201        assert_eq_m256i(r, e);
17202    }
17203
17204    #[simd_test(enable = "avx512bw,avx512vl")]
17205    unsafe fn test_mm_mask_avg_epu16() {
17206        let a = _mm_set1_epi16(1);
17207        let b = _mm_set1_epi16(1);
17208        let r = _mm_mask_avg_epu16(a, 0, a, b);
17209        assert_eq_m128i(r, a);
17210        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
17211        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
17212        assert_eq_m128i(r, e);
17213    }
17214
17215    #[simd_test(enable = "avx512bw,avx512vl")]
17216    unsafe fn test_mm_maskz_avg_epu16() {
17217        let a = _mm_set1_epi16(1);
17218        let b = _mm_set1_epi16(1);
17219        let r = _mm_maskz_avg_epu16(0, a, b);
17220        assert_eq_m128i(r, _mm_setzero_si128());
17221        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
17222        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
17223        assert_eq_m128i(r, e);
17224    }
17225
17226    #[simd_test(enable = "avx512bw")]
17227    unsafe fn test_mm512_avg_epu8() {
17228        let a = _mm512_set1_epi8(1);
17229        let b = _mm512_set1_epi8(1);
17230        let r = _mm512_avg_epu8(a, b);
17231        let e = _mm512_set1_epi8(1);
17232        assert_eq_m512i(r, e);
17233    }
17234
17235    #[simd_test(enable = "avx512bw")]
17236    unsafe fn test_mm512_mask_avg_epu8() {
17237        let a = _mm512_set1_epi8(1);
17238        let b = _mm512_set1_epi8(1);
17239        let r = _mm512_mask_avg_epu8(a, 0, a, b);
17240        assert_eq_m512i(r, a);
17241        let r = _mm512_mask_avg_epu8(
17242            a,
17243            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17244            a,
17245            b,
17246        );
17247        #[rustfmt::skip]
17248        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17249                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17250                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17251                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17252        assert_eq_m512i(r, e);
17253    }
17254
17255    #[simd_test(enable = "avx512bw")]
17256    unsafe fn test_mm512_maskz_avg_epu8() {
17257        let a = _mm512_set1_epi8(1);
17258        let b = _mm512_set1_epi8(1);
17259        let r = _mm512_maskz_avg_epu8(0, a, b);
17260        assert_eq_m512i(r, _mm512_setzero_si512());
17261        let r = _mm512_maskz_avg_epu8(
17262            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
17263            a,
17264            b,
17265        );
17266        #[rustfmt::skip]
17267        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17268                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17269                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17270                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17271        assert_eq_m512i(r, e);
17272    }
17273
17274    #[simd_test(enable = "avx512bw,avx512vl")]
17275    unsafe fn test_mm256_mask_avg_epu8() {
17276        let a = _mm256_set1_epi8(1);
17277        let b = _mm256_set1_epi8(1);
17278        let r = _mm256_mask_avg_epu8(a, 0, a, b);
17279        assert_eq_m256i(r, a);
17280        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
17281        #[rustfmt::skip]
17282        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17283                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17284        assert_eq_m256i(r, e);
17285    }
17286
17287    #[simd_test(enable = "avx512bw,avx512vl")]
17288    unsafe fn test_mm256_maskz_avg_epu8() {
17289        let a = _mm256_set1_epi8(1);
17290        let b = _mm256_set1_epi8(1);
17291        let r = _mm256_maskz_avg_epu8(0, a, b);
17292        assert_eq_m256i(r, _mm256_setzero_si256());
17293        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
17294        #[rustfmt::skip]
17295        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17296                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17297        assert_eq_m256i(r, e);
17298    }
17299
17300    #[simd_test(enable = "avx512bw,avx512vl")]
17301    unsafe fn test_mm_mask_avg_epu8() {
17302        let a = _mm_set1_epi8(1);
17303        let b = _mm_set1_epi8(1);
17304        let r = _mm_mask_avg_epu8(a, 0, a, b);
17305        assert_eq_m128i(r, a);
17306        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
17307        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17308        assert_eq_m128i(r, e);
17309    }
17310
17311    #[simd_test(enable = "avx512bw,avx512vl")]
17312    unsafe fn test_mm_maskz_avg_epu8() {
17313        let a = _mm_set1_epi8(1);
17314        let b = _mm_set1_epi8(1);
17315        let r = _mm_maskz_avg_epu8(0, a, b);
17316        assert_eq_m128i(r, _mm_setzero_si128());
17317        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
17318        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17319        assert_eq_m128i(r, e);
17320    }
17321
17322    #[simd_test(enable = "avx512bw")]
17323    unsafe fn test_mm512_sll_epi16() {
17324        let a = _mm512_set1_epi16(1 << 15);
17325        let count = _mm_set1_epi16(2);
17326        let r = _mm512_sll_epi16(a, count);
17327        let e = _mm512_set1_epi16(0);
17328        assert_eq_m512i(r, e);
17329    }
17330
17331    #[simd_test(enable = "avx512bw")]
17332    unsafe fn test_mm512_mask_sll_epi16() {
17333        let a = _mm512_set1_epi16(1 << 15);
17334        let count = _mm_set1_epi16(2);
17335        let r = _mm512_mask_sll_epi16(a, 0, a, count);
17336        assert_eq_m512i(r, a);
17337        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17338        let e = _mm512_set1_epi16(0);
17339        assert_eq_m512i(r, e);
17340    }
17341
17342    #[simd_test(enable = "avx512bw")]
17343    unsafe fn test_mm512_maskz_sll_epi16() {
17344        let a = _mm512_set1_epi16(1 << 15);
17345        let count = _mm_set1_epi16(2);
17346        let r = _mm512_maskz_sll_epi16(0, a, count);
17347        assert_eq_m512i(r, _mm512_setzero_si512());
17348        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
17349        let e = _mm512_set1_epi16(0);
17350        assert_eq_m512i(r, e);
17351    }
17352
17353    #[simd_test(enable = "avx512bw,avx512vl")]
17354    unsafe fn test_mm256_mask_sll_epi16() {
17355        let a = _mm256_set1_epi16(1 << 15);
17356        let count = _mm_set1_epi16(2);
17357        let r = _mm256_mask_sll_epi16(a, 0, a, count);
17358        assert_eq_m256i(r, a);
17359        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
17360        let e = _mm256_set1_epi16(0);
17361        assert_eq_m256i(r, e);
17362    }
17363
17364    #[simd_test(enable = "avx512bw,avx512vl")]
17365    unsafe fn test_mm256_maskz_sll_epi16() {
17366        let a = _mm256_set1_epi16(1 << 15);
17367        let count = _mm_set1_epi16(2);
17368        let r = _mm256_maskz_sll_epi16(0, a, count);
17369        assert_eq_m256i(r, _mm256_setzero_si256());
17370        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
17371        let e = _mm256_set1_epi16(0);
17372        assert_eq_m256i(r, e);
17373    }
17374
17375    #[simd_test(enable = "avx512bw,avx512vl")]
17376    unsafe fn test_mm_mask_sll_epi16() {
17377        let a = _mm_set1_epi16(1 << 15);
17378        let count = _mm_set1_epi16(2);
17379        let r = _mm_mask_sll_epi16(a, 0, a, count);
17380        assert_eq_m128i(r, a);
17381        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
17382        let e = _mm_set1_epi16(0);
17383        assert_eq_m128i(r, e);
17384    }
17385
17386    #[simd_test(enable = "avx512bw,avx512vl")]
17387    unsafe fn test_mm_maskz_sll_epi16() {
17388        let a = _mm_set1_epi16(1 << 15);
17389        let count = _mm_set1_epi16(2);
17390        let r = _mm_maskz_sll_epi16(0, a, count);
17391        assert_eq_m128i(r, _mm_setzero_si128());
17392        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
17393        let e = _mm_set1_epi16(0);
17394        assert_eq_m128i(r, e);
17395    }
17396
17397    #[simd_test(enable = "avx512bw")]
17398    unsafe fn test_mm512_slli_epi16() {
17399        let a = _mm512_set1_epi16(1 << 15);
17400        let r = _mm512_slli_epi16::<1>(a);
17401        let e = _mm512_set1_epi16(0);
17402        assert_eq_m512i(r, e);
17403    }
17404
17405    #[simd_test(enable = "avx512bw")]
17406    unsafe fn test_mm512_mask_slli_epi16() {
17407        let a = _mm512_set1_epi16(1 << 15);
17408        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
17409        assert_eq_m512i(r, a);
17410        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
17411        let e = _mm512_set1_epi16(0);
17412        assert_eq_m512i(r, e);
17413    }
17414
17415    #[simd_test(enable = "avx512bw")]
17416    unsafe fn test_mm512_maskz_slli_epi16() {
17417        let a = _mm512_set1_epi16(1 << 15);
17418        let r = _mm512_maskz_slli_epi16::<1>(0, a);
17419        assert_eq_m512i(r, _mm512_setzero_si512());
17420        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
17421        let e = _mm512_set1_epi16(0);
17422        assert_eq_m512i(r, e);
17423    }
17424
17425    #[simd_test(enable = "avx512bw,avx512vl")]
17426    unsafe fn test_mm256_mask_slli_epi16() {
17427        let a = _mm256_set1_epi16(1 << 15);
17428        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
17429        assert_eq_m256i(r, a);
17430        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
17431        let e = _mm256_set1_epi16(0);
17432        assert_eq_m256i(r, e);
17433    }
17434
17435    #[simd_test(enable = "avx512bw,avx512vl")]
17436    unsafe fn test_mm256_maskz_slli_epi16() {
17437        let a = _mm256_set1_epi16(1 << 15);
17438        let r = _mm256_maskz_slli_epi16::<1>(0, a);
17439        assert_eq_m256i(r, _mm256_setzero_si256());
17440        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
17441        let e = _mm256_set1_epi16(0);
17442        assert_eq_m256i(r, e);
17443    }
17444
17445    #[simd_test(enable = "avx512bw,avx512vl")]
17446    unsafe fn test_mm_mask_slli_epi16() {
17447        let a = _mm_set1_epi16(1 << 15);
17448        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
17449        assert_eq_m128i(r, a);
17450        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
17451        let e = _mm_set1_epi16(0);
17452        assert_eq_m128i(r, e);
17453    }
17454
17455    #[simd_test(enable = "avx512bw,avx512vl")]
17456    unsafe fn test_mm_maskz_slli_epi16() {
17457        let a = _mm_set1_epi16(1 << 15);
17458        let r = _mm_maskz_slli_epi16::<1>(0, a);
17459        assert_eq_m128i(r, _mm_setzero_si128());
17460        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
17461        let e = _mm_set1_epi16(0);
17462        assert_eq_m128i(r, e);
17463    }
17464
17465    #[simd_test(enable = "avx512bw")]
17466    unsafe fn test_mm512_sllv_epi16() {
17467        let a = _mm512_set1_epi16(1 << 15);
17468        let count = _mm512_set1_epi16(2);
17469        let r = _mm512_sllv_epi16(a, count);
17470        let e = _mm512_set1_epi16(0);
17471        assert_eq_m512i(r, e);
17472    }
17473
17474    #[simd_test(enable = "avx512bw")]
17475    unsafe fn test_mm512_mask_sllv_epi16() {
17476        let a = _mm512_set1_epi16(1 << 15);
17477        let count = _mm512_set1_epi16(2);
17478        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
17479        assert_eq_m512i(r, a);
17480        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17481        let e = _mm512_set1_epi16(0);
17482        assert_eq_m512i(r, e);
17483    }
17484
17485    #[simd_test(enable = "avx512bw")]
17486    unsafe fn test_mm512_maskz_sllv_epi16() {
17487        let a = _mm512_set1_epi16(1 << 15);
17488        let count = _mm512_set1_epi16(2);
17489        let r = _mm512_maskz_sllv_epi16(0, a, count);
17490        assert_eq_m512i(r, _mm512_setzero_si512());
17491        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17492        let e = _mm512_set1_epi16(0);
17493        assert_eq_m512i(r, e);
17494    }
17495
17496    #[simd_test(enable = "avx512bw,avx512vl")]
17497    unsafe fn test_mm256_sllv_epi16() {
17498        let a = _mm256_set1_epi16(1 << 15);
17499        let count = _mm256_set1_epi16(2);
17500        let r = _mm256_sllv_epi16(a, count);
17501        let e = _mm256_set1_epi16(0);
17502        assert_eq_m256i(r, e);
17503    }
17504
17505    #[simd_test(enable = "avx512bw,avx512vl")]
17506    unsafe fn test_mm256_mask_sllv_epi16() {
17507        let a = _mm256_set1_epi16(1 << 15);
17508        let count = _mm256_set1_epi16(2);
17509        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
17510        assert_eq_m256i(r, a);
17511        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
17512        let e = _mm256_set1_epi16(0);
17513        assert_eq_m256i(r, e);
17514    }
17515
17516    #[simd_test(enable = "avx512bw,avx512vl")]
17517    unsafe fn test_mm256_maskz_sllv_epi16() {
17518        let a = _mm256_set1_epi16(1 << 15);
17519        let count = _mm256_set1_epi16(2);
17520        let r = _mm256_maskz_sllv_epi16(0, a, count);
17521        assert_eq_m256i(r, _mm256_setzero_si256());
17522        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
17523        let e = _mm256_set1_epi16(0);
17524        assert_eq_m256i(r, e);
17525    }
17526
17527    #[simd_test(enable = "avx512bw,avx512vl")]
17528    unsafe fn test_mm_sllv_epi16() {
17529        let a = _mm_set1_epi16(1 << 15);
17530        let count = _mm_set1_epi16(2);
17531        let r = _mm_sllv_epi16(a, count);
17532        let e = _mm_set1_epi16(0);
17533        assert_eq_m128i(r, e);
17534    }
17535
17536    #[simd_test(enable = "avx512bw,avx512vl")]
17537    unsafe fn test_mm_mask_sllv_epi16() {
17538        let a = _mm_set1_epi16(1 << 15);
17539        let count = _mm_set1_epi16(2);
17540        let r = _mm_mask_sllv_epi16(a, 0, a, count);
17541        assert_eq_m128i(r, a);
17542        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
17543        let e = _mm_set1_epi16(0);
17544        assert_eq_m128i(r, e);
17545    }
17546
17547    #[simd_test(enable = "avx512bw,avx512vl")]
17548    unsafe fn test_mm_maskz_sllv_epi16() {
17549        let a = _mm_set1_epi16(1 << 15);
17550        let count = _mm_set1_epi16(2);
17551        let r = _mm_maskz_sllv_epi16(0, a, count);
17552        assert_eq_m128i(r, _mm_setzero_si128());
17553        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
17554        let e = _mm_set1_epi16(0);
17555        assert_eq_m128i(r, e);
17556    }
17557
17558    #[simd_test(enable = "avx512bw")]
17559    unsafe fn test_mm512_srl_epi16() {
17560        let a = _mm512_set1_epi16(1 << 1);
17561        let count = _mm_set1_epi16(2);
17562        let r = _mm512_srl_epi16(a, count);
17563        let e = _mm512_set1_epi16(0);
17564        assert_eq_m512i(r, e);
17565    }
17566
17567    #[simd_test(enable = "avx512bw")]
17568    unsafe fn test_mm512_mask_srl_epi16() {
17569        let a = _mm512_set1_epi16(1 << 1);
17570        let count = _mm_set1_epi16(2);
17571        let r = _mm512_mask_srl_epi16(a, 0, a, count);
17572        assert_eq_m512i(r, a);
17573        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17574        let e = _mm512_set1_epi16(0);
17575        assert_eq_m512i(r, e);
17576    }
17577
17578    #[simd_test(enable = "avx512bw")]
17579    unsafe fn test_mm512_maskz_srl_epi16() {
17580        let a = _mm512_set1_epi16(1 << 1);
17581        let count = _mm_set1_epi16(2);
17582        let r = _mm512_maskz_srl_epi16(0, a, count);
17583        assert_eq_m512i(r, _mm512_setzero_si512());
17584        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
17585        let e = _mm512_set1_epi16(0);
17586        assert_eq_m512i(r, e);
17587    }
17588
17589    #[simd_test(enable = "avx512bw,avx512vl")]
17590    unsafe fn test_mm256_mask_srl_epi16() {
17591        let a = _mm256_set1_epi16(1 << 1);
17592        let count = _mm_set1_epi16(2);
17593        let r = _mm256_mask_srl_epi16(a, 0, a, count);
17594        assert_eq_m256i(r, a);
17595        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
17596        let e = _mm256_set1_epi16(0);
17597        assert_eq_m256i(r, e);
17598    }
17599
17600    #[simd_test(enable = "avx512bw,avx512vl")]
17601    unsafe fn test_mm256_maskz_srl_epi16() {
17602        let a = _mm256_set1_epi16(1 << 1);
17603        let count = _mm_set1_epi16(2);
17604        let r = _mm256_maskz_srl_epi16(0, a, count);
17605        assert_eq_m256i(r, _mm256_setzero_si256());
17606        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
17607        let e = _mm256_set1_epi16(0);
17608        assert_eq_m256i(r, e);
17609    }
17610
17611    #[simd_test(enable = "avx512bw,avx512vl")]
17612    unsafe fn test_mm_mask_srl_epi16() {
17613        let a = _mm_set1_epi16(1 << 1);
17614        let count = _mm_set1_epi16(2);
17615        let r = _mm_mask_srl_epi16(a, 0, a, count);
17616        assert_eq_m128i(r, a);
17617        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
17618        let e = _mm_set1_epi16(0);
17619        assert_eq_m128i(r, e);
17620    }
17621
17622    #[simd_test(enable = "avx512bw,avx512vl")]
17623    unsafe fn test_mm_maskz_srl_epi16() {
17624        let a = _mm_set1_epi16(1 << 1);
17625        let count = _mm_set1_epi16(2);
17626        let r = _mm_maskz_srl_epi16(0, a, count);
17627        assert_eq_m128i(r, _mm_setzero_si128());
17628        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
17629        let e = _mm_set1_epi16(0);
17630        assert_eq_m128i(r, e);
17631    }
17632
17633    #[simd_test(enable = "avx512bw")]
17634    unsafe fn test_mm512_srli_epi16() {
17635        let a = _mm512_set1_epi16(1 << 1);
17636        let r = _mm512_srli_epi16::<2>(a);
17637        let e = _mm512_set1_epi16(0);
17638        assert_eq_m512i(r, e);
17639    }
17640
17641    #[simd_test(enable = "avx512bw")]
17642    unsafe fn test_mm512_mask_srli_epi16() {
17643        let a = _mm512_set1_epi16(1 << 1);
17644        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
17645        assert_eq_m512i(r, a);
17646        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17647        let e = _mm512_set1_epi16(0);
17648        assert_eq_m512i(r, e);
17649    }
17650
17651    #[simd_test(enable = "avx512bw")]
17652    unsafe fn test_mm512_maskz_srli_epi16() {
17653        let a = _mm512_set1_epi16(1 << 1);
17654        let r = _mm512_maskz_srli_epi16::<2>(0, a);
17655        assert_eq_m512i(r, _mm512_setzero_si512());
17656        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17657        let e = _mm512_set1_epi16(0);
17658        assert_eq_m512i(r, e);
17659    }
17660
17661    #[simd_test(enable = "avx512bw,avx512vl")]
17662    unsafe fn test_mm256_mask_srli_epi16() {
17663        let a = _mm256_set1_epi16(1 << 1);
17664        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
17665        assert_eq_m256i(r, a);
17666        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
17667        let e = _mm256_set1_epi16(0);
17668        assert_eq_m256i(r, e);
17669    }
17670
17671    #[simd_test(enable = "avx512bw,avx512vl")]
17672    unsafe fn test_mm256_maskz_srli_epi16() {
17673        let a = _mm256_set1_epi16(1 << 1);
17674        let r = _mm256_maskz_srli_epi16::<2>(0, a);
17675        assert_eq_m256i(r, _mm256_setzero_si256());
17676        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
17677        let e = _mm256_set1_epi16(0);
17678        assert_eq_m256i(r, e);
17679    }
17680
17681    #[simd_test(enable = "avx512bw,avx512vl")]
17682    unsafe fn test_mm_mask_srli_epi16() {
17683        let a = _mm_set1_epi16(1 << 1);
17684        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
17685        assert_eq_m128i(r, a);
17686        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
17687        let e = _mm_set1_epi16(0);
17688        assert_eq_m128i(r, e);
17689    }
17690
17691    #[simd_test(enable = "avx512bw,avx512vl")]
17692    unsafe fn test_mm_maskz_srli_epi16() {
17693        let a = _mm_set1_epi16(1 << 1);
17694        let r = _mm_maskz_srli_epi16::<2>(0, a);
17695        assert_eq_m128i(r, _mm_setzero_si128());
17696        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
17697        let e = _mm_set1_epi16(0);
17698        assert_eq_m128i(r, e);
17699    }
17700
17701    #[simd_test(enable = "avx512bw")]
17702    unsafe fn test_mm512_srlv_epi16() {
17703        let a = _mm512_set1_epi16(1 << 1);
17704        let count = _mm512_set1_epi16(2);
17705        let r = _mm512_srlv_epi16(a, count);
17706        let e = _mm512_set1_epi16(0);
17707        assert_eq_m512i(r, e);
17708    }
17709
17710    #[simd_test(enable = "avx512bw")]
17711    unsafe fn test_mm512_mask_srlv_epi16() {
17712        let a = _mm512_set1_epi16(1 << 1);
17713        let count = _mm512_set1_epi16(2);
17714        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
17715        assert_eq_m512i(r, a);
17716        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17717        let e = _mm512_set1_epi16(0);
17718        assert_eq_m512i(r, e);
17719    }
17720
17721    #[simd_test(enable = "avx512bw")]
17722    unsafe fn test_mm512_maskz_srlv_epi16() {
17723        let a = _mm512_set1_epi16(1 << 1);
17724        let count = _mm512_set1_epi16(2);
17725        let r = _mm512_maskz_srlv_epi16(0, a, count);
17726        assert_eq_m512i(r, _mm512_setzero_si512());
17727        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17728        let e = _mm512_set1_epi16(0);
17729        assert_eq_m512i(r, e);
17730    }
17731
17732    #[simd_test(enable = "avx512bw,avx512vl")]
17733    unsafe fn test_mm256_srlv_epi16() {
17734        let a = _mm256_set1_epi16(1 << 1);
17735        let count = _mm256_set1_epi16(2);
17736        let r = _mm256_srlv_epi16(a, count);
17737        let e = _mm256_set1_epi16(0);
17738        assert_eq_m256i(r, e);
17739    }
17740
17741    #[simd_test(enable = "avx512bw,avx512vl")]
17742    unsafe fn test_mm256_mask_srlv_epi16() {
17743        let a = _mm256_set1_epi16(1 << 1);
17744        let count = _mm256_set1_epi16(2);
17745        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
17746        assert_eq_m256i(r, a);
17747        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
17748        let e = _mm256_set1_epi16(0);
17749        assert_eq_m256i(r, e);
17750    }
17751
17752    #[simd_test(enable = "avx512bw,avx512vl")]
17753    unsafe fn test_mm256_maskz_srlv_epi16() {
17754        let a = _mm256_set1_epi16(1 << 1);
17755        let count = _mm256_set1_epi16(2);
17756        let r = _mm256_maskz_srlv_epi16(0, a, count);
17757        assert_eq_m256i(r, _mm256_setzero_si256());
17758        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
17759        let e = _mm256_set1_epi16(0);
17760        assert_eq_m256i(r, e);
17761    }
17762
17763    #[simd_test(enable = "avx512bw,avx512vl")]
17764    unsafe fn test_mm_srlv_epi16() {
17765        let a = _mm_set1_epi16(1 << 1);
17766        let count = _mm_set1_epi16(2);
17767        let r = _mm_srlv_epi16(a, count);
17768        let e = _mm_set1_epi16(0);
17769        assert_eq_m128i(r, e);
17770    }
17771
17772    #[simd_test(enable = "avx512bw,avx512vl")]
17773    unsafe fn test_mm_mask_srlv_epi16() {
17774        let a = _mm_set1_epi16(1 << 1);
17775        let count = _mm_set1_epi16(2);
17776        let r = _mm_mask_srlv_epi16(a, 0, a, count);
17777        assert_eq_m128i(r, a);
17778        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
17779        let e = _mm_set1_epi16(0);
17780        assert_eq_m128i(r, e);
17781    }
17782
17783    #[simd_test(enable = "avx512bw,avx512vl")]
17784    unsafe fn test_mm_maskz_srlv_epi16() {
17785        let a = _mm_set1_epi16(1 << 1);
17786        let count = _mm_set1_epi16(2);
17787        let r = _mm_maskz_srlv_epi16(0, a, count);
17788        assert_eq_m128i(r, _mm_setzero_si128());
17789        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
17790        let e = _mm_set1_epi16(0);
17791        assert_eq_m128i(r, e);
17792    }
17793
17794    #[simd_test(enable = "avx512bw")]
17795    unsafe fn test_mm512_sra_epi16() {
17796        let a = _mm512_set1_epi16(8);
17797        let count = _mm_set1_epi16(1);
17798        let r = _mm512_sra_epi16(a, count);
17799        let e = _mm512_set1_epi16(0);
17800        assert_eq_m512i(r, e);
17801    }
17802
17803    #[simd_test(enable = "avx512bw")]
17804    unsafe fn test_mm512_mask_sra_epi16() {
17805        let a = _mm512_set1_epi16(8);
17806        let count = _mm_set1_epi16(1);
17807        let r = _mm512_mask_sra_epi16(a, 0, a, count);
17808        assert_eq_m512i(r, a);
17809        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17810        let e = _mm512_set1_epi16(0);
17811        assert_eq_m512i(r, e);
17812    }
17813
17814    #[simd_test(enable = "avx512bw")]
17815    unsafe fn test_mm512_maskz_sra_epi16() {
17816        let a = _mm512_set1_epi16(8);
17817        let count = _mm_set1_epi16(1);
17818        let r = _mm512_maskz_sra_epi16(0, a, count);
17819        assert_eq_m512i(r, _mm512_setzero_si512());
17820        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
17821        let e = _mm512_set1_epi16(0);
17822        assert_eq_m512i(r, e);
17823    }
17824
17825    #[simd_test(enable = "avx512bw,avx512vl")]
17826    unsafe fn test_mm256_mask_sra_epi16() {
17827        let a = _mm256_set1_epi16(8);
17828        let count = _mm_set1_epi16(1);
17829        let r = _mm256_mask_sra_epi16(a, 0, a, count);
17830        assert_eq_m256i(r, a);
17831        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
17832        let e = _mm256_set1_epi16(0);
17833        assert_eq_m256i(r, e);
17834    }
17835
17836    #[simd_test(enable = "avx512bw,avx512vl")]
17837    unsafe fn test_mm256_maskz_sra_epi16() {
17838        let a = _mm256_set1_epi16(8);
17839        let count = _mm_set1_epi16(1);
17840        let r = _mm256_maskz_sra_epi16(0, a, count);
17841        assert_eq_m256i(r, _mm256_setzero_si256());
17842        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
17843        let e = _mm256_set1_epi16(0);
17844        assert_eq_m256i(r, e);
17845    }
17846
17847    #[simd_test(enable = "avx512bw,avx512vl")]
17848    unsafe fn test_mm_mask_sra_epi16() {
17849        let a = _mm_set1_epi16(8);
17850        let count = _mm_set1_epi16(1);
17851        let r = _mm_mask_sra_epi16(a, 0, a, count);
17852        assert_eq_m128i(r, a);
17853        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
17854        let e = _mm_set1_epi16(0);
17855        assert_eq_m128i(r, e);
17856    }
17857
17858    #[simd_test(enable = "avx512bw,avx512vl")]
17859    unsafe fn test_mm_maskz_sra_epi16() {
17860        let a = _mm_set1_epi16(8);
17861        let count = _mm_set1_epi16(1);
17862        let r = _mm_maskz_sra_epi16(0, a, count);
17863        assert_eq_m128i(r, _mm_setzero_si128());
17864        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
17865        let e = _mm_set1_epi16(0);
17866        assert_eq_m128i(r, e);
17867    }
17868
17869    #[simd_test(enable = "avx512bw")]
17870    unsafe fn test_mm512_srai_epi16() {
17871        let a = _mm512_set1_epi16(8);
17872        let r = _mm512_srai_epi16::<2>(a);
17873        let e = _mm512_set1_epi16(2);
17874        assert_eq_m512i(r, e);
17875    }
17876
17877    #[simd_test(enable = "avx512bw")]
17878    unsafe fn test_mm512_mask_srai_epi16() {
17879        let a = _mm512_set1_epi16(8);
17880        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
17881        assert_eq_m512i(r, a);
17882        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17883        let e = _mm512_set1_epi16(2);
17884        assert_eq_m512i(r, e);
17885    }
17886
17887    #[simd_test(enable = "avx512bw")]
17888    unsafe fn test_mm512_maskz_srai_epi16() {
17889        let a = _mm512_set1_epi16(8);
17890        let r = _mm512_maskz_srai_epi16::<2>(0, a);
17891        assert_eq_m512i(r, _mm512_setzero_si512());
17892        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17893        let e = _mm512_set1_epi16(2);
17894        assert_eq_m512i(r, e);
17895    }
17896
17897    #[simd_test(enable = "avx512bw,avx512vl")]
17898    unsafe fn test_mm256_mask_srai_epi16() {
17899        let a = _mm256_set1_epi16(8);
17900        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
17901        assert_eq_m256i(r, a);
17902        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
17903        let e = _mm256_set1_epi16(2);
17904        assert_eq_m256i(r, e);
17905    }
17906
17907    #[simd_test(enable = "avx512bw,avx512vl")]
17908    unsafe fn test_mm256_maskz_srai_epi16() {
17909        let a = _mm256_set1_epi16(8);
17910        let r = _mm256_maskz_srai_epi16::<2>(0, a);
17911        assert_eq_m256i(r, _mm256_setzero_si256());
17912        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
17913        let e = _mm256_set1_epi16(2);
17914        assert_eq_m256i(r, e);
17915    }
17916
17917    #[simd_test(enable = "avx512bw,avx512vl")]
17918    unsafe fn test_mm_mask_srai_epi16() {
17919        let a = _mm_set1_epi16(8);
17920        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
17921        assert_eq_m128i(r, a);
17922        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
17923        let e = _mm_set1_epi16(2);
17924        assert_eq_m128i(r, e);
17925    }
17926
17927    #[simd_test(enable = "avx512bw,avx512vl")]
17928    unsafe fn test_mm_maskz_srai_epi16() {
17929        let a = _mm_set1_epi16(8);
17930        let r = _mm_maskz_srai_epi16::<2>(0, a);
17931        assert_eq_m128i(r, _mm_setzero_si128());
17932        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
17933        let e = _mm_set1_epi16(2);
17934        assert_eq_m128i(r, e);
17935    }
17936
17937    #[simd_test(enable = "avx512bw")]
17938    unsafe fn test_mm512_srav_epi16() {
17939        let a = _mm512_set1_epi16(8);
17940        let count = _mm512_set1_epi16(2);
17941        let r = _mm512_srav_epi16(a, count);
17942        let e = _mm512_set1_epi16(2);
17943        assert_eq_m512i(r, e);
17944    }
17945
17946    #[simd_test(enable = "avx512bw")]
17947    unsafe fn test_mm512_mask_srav_epi16() {
17948        let a = _mm512_set1_epi16(8);
17949        let count = _mm512_set1_epi16(2);
17950        let r = _mm512_mask_srav_epi16(a, 0, a, count);
17951        assert_eq_m512i(r, a);
17952        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17953        let e = _mm512_set1_epi16(2);
17954        assert_eq_m512i(r, e);
17955    }
17956
17957    #[simd_test(enable = "avx512bw")]
17958    unsafe fn test_mm512_maskz_srav_epi16() {
17959        let a = _mm512_set1_epi16(8);
17960        let count = _mm512_set1_epi16(2);
17961        let r = _mm512_maskz_srav_epi16(0, a, count);
17962        assert_eq_m512i(r, _mm512_setzero_si512());
17963        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
17964        let e = _mm512_set1_epi16(2);
17965        assert_eq_m512i(r, e);
17966    }
17967
17968    #[simd_test(enable = "avx512bw,avx512vl")]
17969    unsafe fn test_mm256_srav_epi16() {
17970        let a = _mm256_set1_epi16(8);
17971        let count = _mm256_set1_epi16(2);
17972        let r = _mm256_srav_epi16(a, count);
17973        let e = _mm256_set1_epi16(2);
17974        assert_eq_m256i(r, e);
17975    }
17976
17977    #[simd_test(enable = "avx512bw,avx512vl")]
17978    unsafe fn test_mm256_mask_srav_epi16() {
17979        let a = _mm256_set1_epi16(8);
17980        let count = _mm256_set1_epi16(2);
17981        let r = _mm256_mask_srav_epi16(a, 0, a, count);
17982        assert_eq_m256i(r, a);
17983        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
17984        let e = _mm256_set1_epi16(2);
17985        assert_eq_m256i(r, e);
17986    }
17987
17988    #[simd_test(enable = "avx512bw,avx512vl")]
17989    unsafe fn test_mm256_maskz_srav_epi16() {
17990        let a = _mm256_set1_epi16(8);
17991        let count = _mm256_set1_epi16(2);
17992        let r = _mm256_maskz_srav_epi16(0, a, count);
17993        assert_eq_m256i(r, _mm256_setzero_si256());
17994        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
17995        let e = _mm256_set1_epi16(2);
17996        assert_eq_m256i(r, e);
17997    }
17998
17999    #[simd_test(enable = "avx512bw,avx512vl")]
18000    unsafe fn test_mm_srav_epi16() {
18001        let a = _mm_set1_epi16(8);
18002        let count = _mm_set1_epi16(2);
18003        let r = _mm_srav_epi16(a, count);
18004        let e = _mm_set1_epi16(2);
18005        assert_eq_m128i(r, e);
18006    }
18007
18008    #[simd_test(enable = "avx512bw,avx512vl")]
18009    unsafe fn test_mm_mask_srav_epi16() {
18010        let a = _mm_set1_epi16(8);
18011        let count = _mm_set1_epi16(2);
18012        let r = _mm_mask_srav_epi16(a, 0, a, count);
18013        assert_eq_m128i(r, a);
18014        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18015        let e = _mm_set1_epi16(2);
18016        assert_eq_m128i(r, e);
18017    }
18018
18019    #[simd_test(enable = "avx512bw,avx512vl")]
18020    unsafe fn test_mm_maskz_srav_epi16() {
18021        let a = _mm_set1_epi16(8);
18022        let count = _mm_set1_epi16(2);
18023        let r = _mm_maskz_srav_epi16(0, a, count);
18024        assert_eq_m128i(r, _mm_setzero_si128());
18025        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18026        let e = _mm_set1_epi16(2);
18027        assert_eq_m128i(r, e);
18028    }
18029
18030    #[simd_test(enable = "avx512bw")]
18031    unsafe fn test_mm512_permutex2var_epi16() {
18032        #[rustfmt::skip]
18033        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18034                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18035        #[rustfmt::skip]
18036        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18037                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18038        let b = _mm512_set1_epi16(100);
18039        let r = _mm512_permutex2var_epi16(a, idx, b);
18040        #[rustfmt::skip]
18041        let e = _mm512_set_epi16(
18042            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18043            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18044        );
18045        assert_eq_m512i(r, e);
18046    }
18047
18048    #[simd_test(enable = "avx512bw")]
18049    unsafe fn test_mm512_mask_permutex2var_epi16() {
18050        #[rustfmt::skip]
18051        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18052                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18053        #[rustfmt::skip]
18054        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18055                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18056        let b = _mm512_set1_epi16(100);
18057        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18058        assert_eq_m512i(r, a);
18059        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18060        #[rustfmt::skip]
18061        let e = _mm512_set_epi16(
18062            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18063            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18064        );
18065        assert_eq_m512i(r, e);
18066    }
18067
18068    #[simd_test(enable = "avx512bw")]
18069    unsafe fn test_mm512_maskz_permutex2var_epi16() {
18070        #[rustfmt::skip]
18071        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18072                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18073        #[rustfmt::skip]
18074        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18075                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18076        let b = _mm512_set1_epi16(100);
18077        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18078        assert_eq_m512i(r, _mm512_setzero_si512());
18079        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18080        #[rustfmt::skip]
18081        let e = _mm512_set_epi16(
18082            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18083            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18084        );
18085        assert_eq_m512i(r, e);
18086    }
18087
18088    #[simd_test(enable = "avx512bw")]
18089    unsafe fn test_mm512_mask2_permutex2var_epi16() {
18090        #[rustfmt::skip]
18091        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18092                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18093        #[rustfmt::skip]
18094        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18095                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18096        let b = _mm512_set1_epi16(100);
18097        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
18098        assert_eq_m512i(r, idx);
18099        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
18100        #[rustfmt::skip]
18101        let e = _mm512_set_epi16(
18102            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18103            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18104        );
18105        assert_eq_m512i(r, e);
18106    }
18107
18108    #[simd_test(enable = "avx512bw,avx512vl")]
18109    unsafe fn test_mm256_permutex2var_epi16() {
18110        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18111        #[rustfmt::skip]
18112        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18113        let b = _mm256_set1_epi16(100);
18114        let r = _mm256_permutex2var_epi16(a, idx, b);
18115        let e = _mm256_set_epi16(
18116            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18117        );
18118        assert_eq_m256i(r, e);
18119    }
18120
18121    #[simd_test(enable = "avx512bw,avx512vl")]
18122    unsafe fn test_mm256_mask_permutex2var_epi16() {
18123        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18124        #[rustfmt::skip]
18125        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18126        let b = _mm256_set1_epi16(100);
18127        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
18128        assert_eq_m256i(r, a);
18129        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
18130        let e = _mm256_set_epi16(
18131            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18132        );
18133        assert_eq_m256i(r, e);
18134    }
18135
18136    #[simd_test(enable = "avx512bw,avx512vl")]
18137    unsafe fn test_mm256_maskz_permutex2var_epi16() {
18138        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18139        #[rustfmt::skip]
18140        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18141        let b = _mm256_set1_epi16(100);
18142        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
18143        assert_eq_m256i(r, _mm256_setzero_si256());
18144        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
18145        let e = _mm256_set_epi16(
18146            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18147        );
18148        assert_eq_m256i(r, e);
18149    }
18150
18151    #[simd_test(enable = "avx512bw,avx512vl")]
18152    unsafe fn test_mm256_mask2_permutex2var_epi16() {
18153        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18154        #[rustfmt::skip]
18155        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18156        let b = _mm256_set1_epi16(100);
18157        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
18158        assert_eq_m256i(r, idx);
18159        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
18160        #[rustfmt::skip]
18161        let e = _mm256_set_epi16(
18162            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18163        );
18164        assert_eq_m256i(r, e);
18165    }
18166
18167    #[simd_test(enable = "avx512bw,avx512vl")]
18168    unsafe fn test_mm_permutex2var_epi16() {
18169        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18170        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18171        let b = _mm_set1_epi16(100);
18172        let r = _mm_permutex2var_epi16(a, idx, b);
18173        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18174        assert_eq_m128i(r, e);
18175    }
18176
18177    #[simd_test(enable = "avx512bw,avx512vl")]
18178    unsafe fn test_mm_mask_permutex2var_epi16() {
18179        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18180        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18181        let b = _mm_set1_epi16(100);
18182        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
18183        assert_eq_m128i(r, a);
18184        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
18185        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18186        assert_eq_m128i(r, e);
18187    }
18188
18189    #[simd_test(enable = "avx512bw,avx512vl")]
18190    unsafe fn test_mm_maskz_permutex2var_epi16() {
18191        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18192        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18193        let b = _mm_set1_epi16(100);
18194        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
18195        assert_eq_m128i(r, _mm_setzero_si128());
18196        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
18197        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18198        assert_eq_m128i(r, e);
18199    }
18200
18201    #[simd_test(enable = "avx512bw,avx512vl")]
18202    unsafe fn test_mm_mask2_permutex2var_epi16() {
18203        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18204        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18205        let b = _mm_set1_epi16(100);
18206        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
18207        assert_eq_m128i(r, idx);
18208        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
18209        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18210        assert_eq_m128i(r, e);
18211    }
18212
18213    #[simd_test(enable = "avx512bw")]
18214    unsafe fn test_mm512_permutexvar_epi16() {
18215        let idx = _mm512_set1_epi16(1);
18216        #[rustfmt::skip]
18217        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18218                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18219        let r = _mm512_permutexvar_epi16(idx, a);
18220        let e = _mm512_set1_epi16(30);
18221        assert_eq_m512i(r, e);
18222    }
18223
18224    #[simd_test(enable = "avx512bw")]
18225    unsafe fn test_mm512_mask_permutexvar_epi16() {
18226        let idx = _mm512_set1_epi16(1);
18227        #[rustfmt::skip]
18228        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18229                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18230        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
18231        assert_eq_m512i(r, a);
18232        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
18233        let e = _mm512_set1_epi16(30);
18234        assert_eq_m512i(r, e);
18235    }
18236
18237    #[simd_test(enable = "avx512bw")]
18238    unsafe fn test_mm512_maskz_permutexvar_epi16() {
18239        let idx = _mm512_set1_epi16(1);
18240        #[rustfmt::skip]
18241        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18242                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18243        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
18244        assert_eq_m512i(r, _mm512_setzero_si512());
18245        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
18246        let e = _mm512_set1_epi16(30);
18247        assert_eq_m512i(r, e);
18248    }
18249
18250    #[simd_test(enable = "avx512bw,avx512vl")]
18251    unsafe fn test_mm256_permutexvar_epi16() {
18252        let idx = _mm256_set1_epi16(1);
18253        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18254        let r = _mm256_permutexvar_epi16(idx, a);
18255        let e = _mm256_set1_epi16(14);
18256        assert_eq_m256i(r, e);
18257    }
18258
18259    #[simd_test(enable = "avx512bw,avx512vl")]
18260    unsafe fn test_mm256_mask_permutexvar_epi16() {
18261        let idx = _mm256_set1_epi16(1);
18262        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18263        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
18264        assert_eq_m256i(r, a);
18265        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
18266        let e = _mm256_set1_epi16(14);
18267        assert_eq_m256i(r, e);
18268    }
18269
18270    #[simd_test(enable = "avx512bw,avx512vl")]
18271    unsafe fn test_mm256_maskz_permutexvar_epi16() {
18272        let idx = _mm256_set1_epi16(1);
18273        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18274        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
18275        assert_eq_m256i(r, _mm256_setzero_si256());
18276        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
18277        let e = _mm256_set1_epi16(14);
18278        assert_eq_m256i(r, e);
18279    }
18280
18281    #[simd_test(enable = "avx512bw,avx512vl")]
18282    unsafe fn test_mm_permutexvar_epi16() {
18283        let idx = _mm_set1_epi16(1);
18284        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18285        let r = _mm_permutexvar_epi16(idx, a);
18286        let e = _mm_set1_epi16(6);
18287        assert_eq_m128i(r, e);
18288    }
18289
18290    #[simd_test(enable = "avx512bw,avx512vl")]
18291    unsafe fn test_mm_mask_permutexvar_epi16() {
18292        let idx = _mm_set1_epi16(1);
18293        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18294        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
18295        assert_eq_m128i(r, a);
18296        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
18297        let e = _mm_set1_epi16(6);
18298        assert_eq_m128i(r, e);
18299    }
18300
18301    #[simd_test(enable = "avx512bw,avx512vl")]
18302    unsafe fn test_mm_maskz_permutexvar_epi16() {
18303        let idx = _mm_set1_epi16(1);
18304        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18305        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
18306        assert_eq_m128i(r, _mm_setzero_si128());
18307        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
18308        let e = _mm_set1_epi16(6);
18309        assert_eq_m128i(r, e);
18310    }
18311
18312    #[simd_test(enable = "avx512bw")]
18313    unsafe fn test_mm512_mask_blend_epi16() {
18314        let a = _mm512_set1_epi16(1);
18315        let b = _mm512_set1_epi16(2);
18316        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
18317        #[rustfmt::skip]
18318        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18319                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18320        assert_eq_m512i(r, e);
18321    }
18322
18323    #[simd_test(enable = "avx512bw,avx512vl")]
18324    unsafe fn test_mm256_mask_blend_epi16() {
18325        let a = _mm256_set1_epi16(1);
18326        let b = _mm256_set1_epi16(2);
18327        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
18328        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18329        assert_eq_m256i(r, e);
18330    }
18331
18332    #[simd_test(enable = "avx512bw,avx512vl")]
18333    unsafe fn test_mm_mask_blend_epi16() {
18334        let a = _mm_set1_epi16(1);
18335        let b = _mm_set1_epi16(2);
18336        let r = _mm_mask_blend_epi16(0b11110000, a, b);
18337        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
18338        assert_eq_m128i(r, e);
18339    }
18340
18341    #[simd_test(enable = "avx512bw")]
18342    unsafe fn test_mm512_mask_blend_epi8() {
18343        let a = _mm512_set1_epi8(1);
18344        let b = _mm512_set1_epi8(2);
18345        let r = _mm512_mask_blend_epi8(
18346            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
18347            a,
18348            b,
18349        );
18350        #[rustfmt::skip]
18351        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18352                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18353                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18354                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18355        assert_eq_m512i(r, e);
18356    }
18357
18358    #[simd_test(enable = "avx512bw,avx512vl")]
18359    unsafe fn test_mm256_mask_blend_epi8() {
18360        let a = _mm256_set1_epi8(1);
18361        let b = _mm256_set1_epi8(2);
18362        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
18363        #[rustfmt::skip]
18364        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18365                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18366        assert_eq_m256i(r, e);
18367    }
18368
18369    #[simd_test(enable = "avx512bw,avx512vl")]
18370    unsafe fn test_mm_mask_blend_epi8() {
18371        let a = _mm_set1_epi8(1);
18372        let b = _mm_set1_epi8(2);
18373        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
18374        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18375        assert_eq_m128i(r, e);
18376    }
18377
18378    #[simd_test(enable = "avx512bw")]
18379    unsafe fn test_mm512_broadcastw_epi16() {
18380        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18381        let r = _mm512_broadcastw_epi16(a);
18382        let e = _mm512_set1_epi16(24);
18383        assert_eq_m512i(r, e);
18384    }
18385
18386    #[simd_test(enable = "avx512bw")]
18387    unsafe fn test_mm512_mask_broadcastw_epi16() {
18388        let src = _mm512_set1_epi16(1);
18389        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18390        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
18391        assert_eq_m512i(r, src);
18392        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18393        let e = _mm512_set1_epi16(24);
18394        assert_eq_m512i(r, e);
18395    }
18396
18397    #[simd_test(enable = "avx512bw")]
18398    unsafe fn test_mm512_maskz_broadcastw_epi16() {
18399        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18400        let r = _mm512_maskz_broadcastw_epi16(0, a);
18401        assert_eq_m512i(r, _mm512_setzero_si512());
18402        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
18403        let e = _mm512_set1_epi16(24);
18404        assert_eq_m512i(r, e);
18405    }
18406
18407    #[simd_test(enable = "avx512bw,avx512vl")]
18408    unsafe fn test_mm256_mask_broadcastw_epi16() {
18409        let src = _mm256_set1_epi16(1);
18410        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18411        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
18412        assert_eq_m256i(r, src);
18413        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
18414        let e = _mm256_set1_epi16(24);
18415        assert_eq_m256i(r, e);
18416    }
18417
18418    #[simd_test(enable = "avx512bw,avx512vl")]
18419    unsafe fn test_mm256_maskz_broadcastw_epi16() {
18420        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18421        let r = _mm256_maskz_broadcastw_epi16(0, a);
18422        assert_eq_m256i(r, _mm256_setzero_si256());
18423        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
18424        let e = _mm256_set1_epi16(24);
18425        assert_eq_m256i(r, e);
18426    }
18427
18428    #[simd_test(enable = "avx512bw,avx512vl")]
18429    unsafe fn test_mm_mask_broadcastw_epi16() {
18430        let src = _mm_set1_epi16(1);
18431        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18432        let r = _mm_mask_broadcastw_epi16(src, 0, a);
18433        assert_eq_m128i(r, src);
18434        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
18435        let e = _mm_set1_epi16(24);
18436        assert_eq_m128i(r, e);
18437    }
18438
18439    #[simd_test(enable = "avx512bw,avx512vl")]
18440    unsafe fn test_mm_maskz_broadcastw_epi16() {
18441        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18442        let r = _mm_maskz_broadcastw_epi16(0, a);
18443        assert_eq_m128i(r, _mm_setzero_si128());
18444        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
18445        let e = _mm_set1_epi16(24);
18446        assert_eq_m128i(r, e);
18447    }
18448
18449    #[simd_test(enable = "avx512bw")]
18450    unsafe fn test_mm512_broadcastb_epi8() {
18451        let a = _mm_set_epi8(
18452            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18453        );
18454        let r = _mm512_broadcastb_epi8(a);
18455        let e = _mm512_set1_epi8(32);
18456        assert_eq_m512i(r, e);
18457    }
18458
18459    #[simd_test(enable = "avx512bw")]
18460    unsafe fn test_mm512_mask_broadcastb_epi8() {
18461        let src = _mm512_set1_epi8(1);
18462        let a = _mm_set_epi8(
18463            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18464        );
18465        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
18466        assert_eq_m512i(r, src);
18467        let r = _mm512_mask_broadcastb_epi8(
18468            src,
18469            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18470            a,
18471        );
18472        let e = _mm512_set1_epi8(32);
18473        assert_eq_m512i(r, e);
18474    }
18475
18476    #[simd_test(enable = "avx512bw")]
18477    unsafe fn test_mm512_maskz_broadcastb_epi8() {
18478        let a = _mm_set_epi8(
18479            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18480        );
18481        let r = _mm512_maskz_broadcastb_epi8(0, a);
18482        assert_eq_m512i(r, _mm512_setzero_si512());
18483        let r = _mm512_maskz_broadcastb_epi8(
18484            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18485            a,
18486        );
18487        let e = _mm512_set1_epi8(32);
18488        assert_eq_m512i(r, e);
18489    }
18490
18491    #[simd_test(enable = "avx512bw,avx512vl")]
18492    unsafe fn test_mm256_mask_broadcastb_epi8() {
18493        let src = _mm256_set1_epi8(1);
18494        let a = _mm_set_epi8(
18495            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18496        );
18497        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
18498        assert_eq_m256i(r, src);
18499        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18500        let e = _mm256_set1_epi8(32);
18501        assert_eq_m256i(r, e);
18502    }
18503
18504    #[simd_test(enable = "avx512bw,avx512vl")]
18505    unsafe fn test_mm256_maskz_broadcastb_epi8() {
18506        let a = _mm_set_epi8(
18507            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18508        );
18509        let r = _mm256_maskz_broadcastb_epi8(0, a);
18510        assert_eq_m256i(r, _mm256_setzero_si256());
18511        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
18512        let e = _mm256_set1_epi8(32);
18513        assert_eq_m256i(r, e);
18514    }
18515
18516    #[simd_test(enable = "avx512bw,avx512vl")]
18517    unsafe fn test_mm_mask_broadcastb_epi8() {
18518        let src = _mm_set1_epi8(1);
18519        let a = _mm_set_epi8(
18520            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18521        );
18522        let r = _mm_mask_broadcastb_epi8(src, 0, a);
18523        assert_eq_m128i(r, src);
18524        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
18525        let e = _mm_set1_epi8(32);
18526        assert_eq_m128i(r, e);
18527    }
18528
18529    #[simd_test(enable = "avx512bw,avx512vl")]
18530    unsafe fn test_mm_maskz_broadcastb_epi8() {
18531        let a = _mm_set_epi8(
18532            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18533        );
18534        let r = _mm_maskz_broadcastb_epi8(0, a);
18535        assert_eq_m128i(r, _mm_setzero_si128());
18536        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
18537        let e = _mm_set1_epi8(32);
18538        assert_eq_m128i(r, e);
18539    }
18540
18541    #[simd_test(enable = "avx512bw")]
18542    unsafe fn test_mm512_unpackhi_epi16() {
18543        #[rustfmt::skip]
18544        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18545                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18546        #[rustfmt::skip]
18547        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18548                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18549        let r = _mm512_unpackhi_epi16(a, b);
18550        #[rustfmt::skip]
18551        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18552                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18553        assert_eq_m512i(r, e);
18554    }
18555
18556    #[simd_test(enable = "avx512bw")]
18557    unsafe fn test_mm512_mask_unpackhi_epi16() {
18558        #[rustfmt::skip]
18559        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18560                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18561        #[rustfmt::skip]
18562        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18563                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18564        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
18565        assert_eq_m512i(r, a);
18566        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18567        #[rustfmt::skip]
18568        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18569                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18570        assert_eq_m512i(r, e);
18571    }
18572
18573    #[simd_test(enable = "avx512bw")]
18574    unsafe fn test_mm512_maskz_unpackhi_epi16() {
18575        #[rustfmt::skip]
18576        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18577                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18578        #[rustfmt::skip]
18579        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18580                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18581        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
18582        assert_eq_m512i(r, _mm512_setzero_si512());
18583        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
18584        #[rustfmt::skip]
18585        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18586                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18587        assert_eq_m512i(r, e);
18588    }
18589
18590    #[simd_test(enable = "avx512bw,avx512vl")]
18591    unsafe fn test_mm256_mask_unpackhi_epi16() {
18592        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18593        let b = _mm256_set_epi16(
18594            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18595        );
18596        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
18597        assert_eq_m256i(r, a);
18598        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
18599        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18600        assert_eq_m256i(r, e);
18601    }
18602
18603    #[simd_test(enable = "avx512bw,avx512vl")]
18604    unsafe fn test_mm256_maskz_unpackhi_epi16() {
18605        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18606        let b = _mm256_set_epi16(
18607            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18608        );
18609        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
18610        assert_eq_m256i(r, _mm256_setzero_si256());
18611        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
18612        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18613        assert_eq_m256i(r, e);
18614    }
18615
18616    #[simd_test(enable = "avx512bw,avx512vl")]
18617    unsafe fn test_mm_mask_unpackhi_epi16() {
18618        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18619        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18620        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
18621        assert_eq_m128i(r, a);
18622        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
18623        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18624        assert_eq_m128i(r, e);
18625    }
18626
18627    #[simd_test(enable = "avx512bw,avx512vl")]
18628    unsafe fn test_mm_maskz_unpackhi_epi16() {
18629        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18630        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18631        let r = _mm_maskz_unpackhi_epi16(0, a, b);
18632        assert_eq_m128i(r, _mm_setzero_si128());
18633        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
18634        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18635        assert_eq_m128i(r, e);
18636    }
18637
18638    #[simd_test(enable = "avx512bw")]
18639    unsafe fn test_mm512_unpackhi_epi8() {
18640        #[rustfmt::skip]
18641        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18642                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18643                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18644                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18645        #[rustfmt::skip]
18646        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18647                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18648                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18649                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18650        let r = _mm512_unpackhi_epi8(a, b);
18651        #[rustfmt::skip]
18652        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18653                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18654                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18655                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18656        assert_eq_m512i(r, e);
18657    }
18658
18659    #[simd_test(enable = "avx512bw")]
18660    unsafe fn test_mm512_mask_unpackhi_epi8() {
18661        #[rustfmt::skip]
18662        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18663                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18664                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18665                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18666        #[rustfmt::skip]
18667        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18668                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18669                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18670                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18671        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
18672        assert_eq_m512i(r, a);
18673        let r = _mm512_mask_unpackhi_epi8(
18674            a,
18675            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18676            a,
18677            b,
18678        );
18679        #[rustfmt::skip]
18680        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18681                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18682                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18683                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18684        assert_eq_m512i(r, e);
18685    }
18686
18687    #[simd_test(enable = "avx512bw")]
18688    unsafe fn test_mm512_maskz_unpackhi_epi8() {
18689        #[rustfmt::skip]
18690        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18691                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18692                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18693                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18694        #[rustfmt::skip]
18695        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18696                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18697                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18698                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18699        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
18700        assert_eq_m512i(r, _mm512_setzero_si512());
18701        let r = _mm512_maskz_unpackhi_epi8(
18702            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18703            a,
18704            b,
18705        );
18706        #[rustfmt::skip]
18707        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18708                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18709                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18710                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18711        assert_eq_m512i(r, e);
18712    }
18713
18714    #[simd_test(enable = "avx512bw,avx512vl")]
18715    unsafe fn test_mm256_mask_unpackhi_epi8() {
18716        #[rustfmt::skip]
18717        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18718                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18719        #[rustfmt::skip]
18720        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18721                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18722        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
18723        assert_eq_m256i(r, a);
18724        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18725        #[rustfmt::skip]
18726        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18727                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18728        assert_eq_m256i(r, e);
18729    }
18730
18731    #[simd_test(enable = "avx512bw,avx512vl")]
18732    unsafe fn test_mm256_maskz_unpackhi_epi8() {
18733        #[rustfmt::skip]
18734        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18735                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18736        #[rustfmt::skip]
18737        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18738                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18739        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
18740        assert_eq_m256i(r, _mm256_setzero_si256());
18741        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
18742        #[rustfmt::skip]
18743        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18744                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18745        assert_eq_m256i(r, e);
18746    }
18747
18748    #[simd_test(enable = "avx512bw,avx512vl")]
18749    unsafe fn test_mm_mask_unpackhi_epi8() {
18750        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18751        let b = _mm_set_epi8(
18752            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18753        );
18754        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
18755        assert_eq_m128i(r, a);
18756        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
18757        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18758        assert_eq_m128i(r, e);
18759    }
18760
18761    #[simd_test(enable = "avx512bw,avx512vl")]
18762    unsafe fn test_mm_maskz_unpackhi_epi8() {
18763        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18764        let b = _mm_set_epi8(
18765            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18766        );
18767        let r = _mm_maskz_unpackhi_epi8(0, a, b);
18768        assert_eq_m128i(r, _mm_setzero_si128());
18769        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
18770        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18771        assert_eq_m128i(r, e);
18772    }
18773
18774    #[simd_test(enable = "avx512bw")]
18775    unsafe fn test_mm512_unpacklo_epi16() {
18776        #[rustfmt::skip]
18777        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18778                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18779        #[rustfmt::skip]
18780        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18781                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18782        let r = _mm512_unpacklo_epi16(a, b);
18783        #[rustfmt::skip]
18784        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18785                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18786        assert_eq_m512i(r, e);
18787    }
18788
18789    #[simd_test(enable = "avx512bw")]
18790    unsafe fn test_mm512_mask_unpacklo_epi16() {
18791        #[rustfmt::skip]
18792        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18793                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18794        #[rustfmt::skip]
18795        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18796                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18797        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
18798        assert_eq_m512i(r, a);
18799        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18800        #[rustfmt::skip]
18801        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18802                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18803        assert_eq_m512i(r, e);
18804    }
18805
18806    #[simd_test(enable = "avx512bw")]
18807    unsafe fn test_mm512_maskz_unpacklo_epi16() {
18808        #[rustfmt::skip]
18809        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18810                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18811        #[rustfmt::skip]
18812        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18813                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18814        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
18815        assert_eq_m512i(r, _mm512_setzero_si512());
18816        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
18817        #[rustfmt::skip]
18818        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18819                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18820        assert_eq_m512i(r, e);
18821    }
18822
18823    #[simd_test(enable = "avx512bw,avx512vl")]
18824    unsafe fn test_mm256_mask_unpacklo_epi16() {
18825        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18826        let b = _mm256_set_epi16(
18827            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18828        );
18829        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
18830        assert_eq_m256i(r, a);
18831        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
18832        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18833        assert_eq_m256i(r, e);
18834    }
18835
18836    #[simd_test(enable = "avx512bw,avx512vl")]
18837    unsafe fn test_mm256_maskz_unpacklo_epi16() {
18838        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18839        let b = _mm256_set_epi16(
18840            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18841        );
18842        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
18843        assert_eq_m256i(r, _mm256_setzero_si256());
18844        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
18845        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18846        assert_eq_m256i(r, e);
18847    }
18848
18849    #[simd_test(enable = "avx512bw,avx512vl")]
18850    unsafe fn test_mm_mask_unpacklo_epi16() {
18851        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18852        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18853        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
18854        assert_eq_m128i(r, a);
18855        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
18856        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18857        assert_eq_m128i(r, e);
18858    }
18859
18860    #[simd_test(enable = "avx512bw,avx512vl")]
18861    unsafe fn test_mm_maskz_unpacklo_epi16() {
18862        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18863        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18864        let r = _mm_maskz_unpacklo_epi16(0, a, b);
18865        assert_eq_m128i(r, _mm_setzero_si128());
18866        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
18867        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18868        assert_eq_m128i(r, e);
18869    }
18870
18871    #[simd_test(enable = "avx512bw")]
18872    unsafe fn test_mm512_unpacklo_epi8() {
18873        #[rustfmt::skip]
18874        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18875                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18876                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18877                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18878        #[rustfmt::skip]
18879        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18880                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18881                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18882                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18883        let r = _mm512_unpacklo_epi8(a, b);
18884        #[rustfmt::skip]
18885        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18886                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18887                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18888                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18889        assert_eq_m512i(r, e);
18890    }
18891
18892    #[simd_test(enable = "avx512bw")]
18893    unsafe fn test_mm512_mask_unpacklo_epi8() {
18894        #[rustfmt::skip]
18895        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18896                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18897                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18898                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18899        #[rustfmt::skip]
18900        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18901                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18902                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18903                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18904        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
18905        assert_eq_m512i(r, a);
18906        let r = _mm512_mask_unpacklo_epi8(
18907            a,
18908            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18909            a,
18910            b,
18911        );
18912        #[rustfmt::skip]
18913        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18914                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18915                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18916                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18917        assert_eq_m512i(r, e);
18918    }
18919
18920    #[simd_test(enable = "avx512bw")]
18921    unsafe fn test_mm512_maskz_unpacklo_epi8() {
18922        #[rustfmt::skip]
18923        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18924                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18925                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18926                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18927        #[rustfmt::skip]
18928        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18929                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18930                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18931                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18932        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
18933        assert_eq_m512i(r, _mm512_setzero_si512());
18934        let r = _mm512_maskz_unpacklo_epi8(
18935            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18936            a,
18937            b,
18938        );
18939        #[rustfmt::skip]
18940        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18941                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18942                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18943                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18944        assert_eq_m512i(r, e);
18945    }
18946
18947    #[simd_test(enable = "avx512bw,avx512vl")]
18948    unsafe fn test_mm256_mask_unpacklo_epi8() {
18949        #[rustfmt::skip]
18950        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18951                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18952        #[rustfmt::skip]
18953        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18954                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18955        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
18956        assert_eq_m256i(r, a);
18957        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18958        #[rustfmt::skip]
18959        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18960                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18961        assert_eq_m256i(r, e);
18962    }
18963
18964    #[simd_test(enable = "avx512bw,avx512vl")]
18965    unsafe fn test_mm256_maskz_unpacklo_epi8() {
18966        #[rustfmt::skip]
18967        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18968                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18969        #[rustfmt::skip]
18970        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18971                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18972        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
18973        assert_eq_m256i(r, _mm256_setzero_si256());
18974        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
18975        #[rustfmt::skip]
18976        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18977                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18978        assert_eq_m256i(r, e);
18979    }
18980
18981    #[simd_test(enable = "avx512bw,avx512vl")]
18982    unsafe fn test_mm_mask_unpacklo_epi8() {
18983        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18984        let b = _mm_set_epi8(
18985            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18986        );
18987        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
18988        assert_eq_m128i(r, a);
18989        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
18990        let e = _mm_set_epi8(
18991            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
18992        );
18993        assert_eq_m128i(r, e);
18994    }
18995
18996    #[simd_test(enable = "avx512bw,avx512vl")]
18997    unsafe fn test_mm_maskz_unpacklo_epi8() {
18998        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18999        let b = _mm_set_epi8(
19000            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19001        );
19002        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19003        assert_eq_m128i(r, _mm_setzero_si128());
19004        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19005        let e = _mm_set_epi8(
19006            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19007        );
19008        assert_eq_m128i(r, e);
19009    }
19010
19011    #[simd_test(enable = "avx512bw")]
19012    unsafe fn test_mm512_mask_mov_epi16() {
19013        let src = _mm512_set1_epi16(1);
19014        let a = _mm512_set1_epi16(2);
19015        let r = _mm512_mask_mov_epi16(src, 0, a);
19016        assert_eq_m512i(r, src);
19017        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19018        assert_eq_m512i(r, a);
19019    }
19020
19021    #[simd_test(enable = "avx512bw")]
19022    unsafe fn test_mm512_maskz_mov_epi16() {
19023        let a = _mm512_set1_epi16(2);
19024        let r = _mm512_maskz_mov_epi16(0, a);
19025        assert_eq_m512i(r, _mm512_setzero_si512());
19026        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19027        assert_eq_m512i(r, a);
19028    }
19029
19030    #[simd_test(enable = "avx512bw,avx512vl")]
19031    unsafe fn test_mm256_mask_mov_epi16() {
19032        let src = _mm256_set1_epi16(1);
19033        let a = _mm256_set1_epi16(2);
19034        let r = _mm256_mask_mov_epi16(src, 0, a);
19035        assert_eq_m256i(r, src);
19036        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19037        assert_eq_m256i(r, a);
19038    }
19039
19040    #[simd_test(enable = "avx512bw,avx512vl")]
19041    unsafe fn test_mm256_maskz_mov_epi16() {
19042        let a = _mm256_set1_epi16(2);
19043        let r = _mm256_maskz_mov_epi16(0, a);
19044        assert_eq_m256i(r, _mm256_setzero_si256());
19045        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19046        assert_eq_m256i(r, a);
19047    }
19048
19049    #[simd_test(enable = "avx512bw,avx512vl")]
19050    unsafe fn test_mm_mask_mov_epi16() {
19051        let src = _mm_set1_epi16(1);
19052        let a = _mm_set1_epi16(2);
19053        let r = _mm_mask_mov_epi16(src, 0, a);
19054        assert_eq_m128i(r, src);
19055        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19056        assert_eq_m128i(r, a);
19057    }
19058
19059    #[simd_test(enable = "avx512bw,avx512vl")]
19060    unsafe fn test_mm_maskz_mov_epi16() {
19061        let a = _mm_set1_epi16(2);
19062        let r = _mm_maskz_mov_epi16(0, a);
19063        assert_eq_m128i(r, _mm_setzero_si128());
19064        let r = _mm_maskz_mov_epi16(0b11111111, a);
19065        assert_eq_m128i(r, a);
19066    }
19067
19068    #[simd_test(enable = "avx512bw")]
19069    unsafe fn test_mm512_mask_mov_epi8() {
19070        let src = _mm512_set1_epi8(1);
19071        let a = _mm512_set1_epi8(2);
19072        let r = _mm512_mask_mov_epi8(src, 0, a);
19073        assert_eq_m512i(r, src);
19074        let r = _mm512_mask_mov_epi8(
19075            src,
19076            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19077            a,
19078        );
19079        assert_eq_m512i(r, a);
19080    }
19081
19082    #[simd_test(enable = "avx512bw")]
19083    unsafe fn test_mm512_maskz_mov_epi8() {
19084        let a = _mm512_set1_epi8(2);
19085        let r = _mm512_maskz_mov_epi8(0, a);
19086        assert_eq_m512i(r, _mm512_setzero_si512());
19087        let r = _mm512_maskz_mov_epi8(
19088            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19089            a,
19090        );
19091        assert_eq_m512i(r, a);
19092    }
19093
19094    #[simd_test(enable = "avx512bw,avx512vl")]
19095    unsafe fn test_mm256_mask_mov_epi8() {
19096        let src = _mm256_set1_epi8(1);
19097        let a = _mm256_set1_epi8(2);
19098        let r = _mm256_mask_mov_epi8(src, 0, a);
19099        assert_eq_m256i(r, src);
19100        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19101        assert_eq_m256i(r, a);
19102    }
19103
19104    #[simd_test(enable = "avx512bw,avx512vl")]
19105    unsafe fn test_mm256_maskz_mov_epi8() {
19106        let a = _mm256_set1_epi8(2);
19107        let r = _mm256_maskz_mov_epi8(0, a);
19108        assert_eq_m256i(r, _mm256_setzero_si256());
19109        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
19110        assert_eq_m256i(r, a);
19111    }
19112
19113    #[simd_test(enable = "avx512bw,avx512vl")]
19114    unsafe fn test_mm_mask_mov_epi8() {
19115        let src = _mm_set1_epi8(1);
19116        let a = _mm_set1_epi8(2);
19117        let r = _mm_mask_mov_epi8(src, 0, a);
19118        assert_eq_m128i(r, src);
19119        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
19120        assert_eq_m128i(r, a);
19121    }
19122
19123    #[simd_test(enable = "avx512bw,avx512vl")]
19124    unsafe fn test_mm_maskz_mov_epi8() {
19125        let a = _mm_set1_epi8(2);
19126        let r = _mm_maskz_mov_epi8(0, a);
19127        assert_eq_m128i(r, _mm_setzero_si128());
19128        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
19129        assert_eq_m128i(r, a);
19130    }
19131
19132    #[simd_test(enable = "avx512bw")]
19133    unsafe fn test_mm512_mask_set1_epi16() {
19134        let src = _mm512_set1_epi16(2);
19135        let a: i16 = 11;
19136        let r = _mm512_mask_set1_epi16(src, 0, a);
19137        assert_eq_m512i(r, src);
19138        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19139        let e = _mm512_set1_epi16(11);
19140        assert_eq_m512i(r, e);
19141    }
19142
19143    #[simd_test(enable = "avx512bw")]
19144    unsafe fn test_mm512_maskz_set1_epi16() {
19145        let a: i16 = 11;
19146        let r = _mm512_maskz_set1_epi16(0, a);
19147        assert_eq_m512i(r, _mm512_setzero_si512());
19148        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
19149        let e = _mm512_set1_epi16(11);
19150        assert_eq_m512i(r, e);
19151    }
19152
19153    #[simd_test(enable = "avx512bw,avx512vl")]
19154    unsafe fn test_mm256_mask_set1_epi16() {
19155        let src = _mm256_set1_epi16(2);
19156        let a: i16 = 11;
19157        let r = _mm256_mask_set1_epi16(src, 0, a);
19158        assert_eq_m256i(r, src);
19159        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
19160        let e = _mm256_set1_epi16(11);
19161        assert_eq_m256i(r, e);
19162    }
19163
19164    #[simd_test(enable = "avx512bw,avx512vl")]
19165    unsafe fn test_mm256_maskz_set1_epi16() {
19166        let a: i16 = 11;
19167        let r = _mm256_maskz_set1_epi16(0, a);
19168        assert_eq_m256i(r, _mm256_setzero_si256());
19169        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
19170        let e = _mm256_set1_epi16(11);
19171        assert_eq_m256i(r, e);
19172    }
19173
19174    #[simd_test(enable = "avx512bw,avx512vl")]
19175    unsafe fn test_mm_mask_set1_epi16() {
19176        let src = _mm_set1_epi16(2);
19177        let a: i16 = 11;
19178        let r = _mm_mask_set1_epi16(src, 0, a);
19179        assert_eq_m128i(r, src);
19180        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
19181        let e = _mm_set1_epi16(11);
19182        assert_eq_m128i(r, e);
19183    }
19184
19185    #[simd_test(enable = "avx512bw,avx512vl")]
19186    unsafe fn test_mm_maskz_set1_epi16() {
19187        let a: i16 = 11;
19188        let r = _mm_maskz_set1_epi16(0, a);
19189        assert_eq_m128i(r, _mm_setzero_si128());
19190        let r = _mm_maskz_set1_epi16(0b11111111, a);
19191        let e = _mm_set1_epi16(11);
19192        assert_eq_m128i(r, e);
19193    }
19194
19195    #[simd_test(enable = "avx512bw")]
19196    unsafe fn test_mm512_mask_set1_epi8() {
19197        let src = _mm512_set1_epi8(2);
19198        let a: i8 = 11;
19199        let r = _mm512_mask_set1_epi8(src, 0, a);
19200        assert_eq_m512i(r, src);
19201        let r = _mm512_mask_set1_epi8(
19202            src,
19203            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19204            a,
19205        );
19206        let e = _mm512_set1_epi8(11);
19207        assert_eq_m512i(r, e);
19208    }
19209
19210    #[simd_test(enable = "avx512bw")]
19211    unsafe fn test_mm512_maskz_set1_epi8() {
19212        let a: i8 = 11;
19213        let r = _mm512_maskz_set1_epi8(0, a);
19214        assert_eq_m512i(r, _mm512_setzero_si512());
19215        let r = _mm512_maskz_set1_epi8(
19216            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19217            a,
19218        );
19219        let e = _mm512_set1_epi8(11);
19220        assert_eq_m512i(r, e);
19221    }
19222
19223    #[simd_test(enable = "avx512bw,avx512vl")]
19224    unsafe fn test_mm256_mask_set1_epi8() {
19225        let src = _mm256_set1_epi8(2);
19226        let a: i8 = 11;
19227        let r = _mm256_mask_set1_epi8(src, 0, a);
19228        assert_eq_m256i(r, src);
19229        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19230        let e = _mm256_set1_epi8(11);
19231        assert_eq_m256i(r, e);
19232    }
19233
19234    #[simd_test(enable = "avx512bw,avx512vl")]
19235    unsafe fn test_mm256_maskz_set1_epi8() {
19236        let a: i8 = 11;
19237        let r = _mm256_maskz_set1_epi8(0, a);
19238        assert_eq_m256i(r, _mm256_setzero_si256());
19239        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
19240        let e = _mm256_set1_epi8(11);
19241        assert_eq_m256i(r, e);
19242    }
19243
19244    #[simd_test(enable = "avx512bw,avx512vl")]
19245    unsafe fn test_mm_mask_set1_epi8() {
19246        let src = _mm_set1_epi8(2);
19247        let a: i8 = 11;
19248        let r = _mm_mask_set1_epi8(src, 0, a);
19249        assert_eq_m128i(r, src);
19250        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
19251        let e = _mm_set1_epi8(11);
19252        assert_eq_m128i(r, e);
19253    }
19254
19255    #[simd_test(enable = "avx512bw,avx512vl")]
19256    unsafe fn test_mm_maskz_set1_epi8() {
19257        let a: i8 = 11;
19258        let r = _mm_maskz_set1_epi8(0, a);
19259        assert_eq_m128i(r, _mm_setzero_si128());
19260        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
19261        let e = _mm_set1_epi8(11);
19262        assert_eq_m128i(r, e);
19263    }
19264
19265    #[simd_test(enable = "avx512bw")]
19266    unsafe fn test_mm512_shufflelo_epi16() {
19267        #[rustfmt::skip]
19268        let a = _mm512_set_epi16(
19269            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19270            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19271        );
19272        #[rustfmt::skip]
19273        let e = _mm512_set_epi16(
19274            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19275            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19276        );
19277        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
19278        assert_eq_m512i(r, e);
19279    }
19280
19281    #[simd_test(enable = "avx512bw")]
19282    unsafe fn test_mm512_mask_shufflelo_epi16() {
19283        #[rustfmt::skip]
19284        let a = _mm512_set_epi16(
19285            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19286            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19287        );
19288        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19289        assert_eq_m512i(r, a);
19290        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
19291            a,
19292            0b11111111_11111111_11111111_11111111,
19293            a,
19294        );
19295        #[rustfmt::skip]
19296        let e = _mm512_set_epi16(
19297            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19298            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19299        );
19300        assert_eq_m512i(r, e);
19301    }
19302
19303    #[simd_test(enable = "avx512bw")]
19304    unsafe fn test_mm512_maskz_shufflelo_epi16() {
19305        #[rustfmt::skip]
19306        let a = _mm512_set_epi16(
19307            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19308            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19309        );
19310        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19311        assert_eq_m512i(r, _mm512_setzero_si512());
19312        let r =
19313            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19314        #[rustfmt::skip]
19315        let e = _mm512_set_epi16(
19316            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19317            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19318        );
19319        assert_eq_m512i(r, e);
19320    }
19321
19322    #[simd_test(enable = "avx512bw,avx512vl")]
19323    unsafe fn test_mm256_mask_shufflelo_epi16() {
19324        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19325        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19326        assert_eq_m256i(r, a);
19327        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19328        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19329        assert_eq_m256i(r, e);
19330    }
19331
19332    #[simd_test(enable = "avx512bw,avx512vl")]
19333    unsafe fn test_mm256_maskz_shufflelo_epi16() {
19334        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19335        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19336        assert_eq_m256i(r, _mm256_setzero_si256());
19337        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19338        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19339        assert_eq_m256i(r, e);
19340    }
19341
19342    #[simd_test(enable = "avx512bw,avx512vl")]
19343    unsafe fn test_mm_mask_shufflelo_epi16() {
19344        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19345        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19346        assert_eq_m128i(r, a);
19347        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19348        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19349        assert_eq_m128i(r, e);
19350    }
19351
19352    #[simd_test(enable = "avx512bw,avx512vl")]
19353    unsafe fn test_mm_maskz_shufflelo_epi16() {
19354        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19355        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19356        assert_eq_m128i(r, _mm_setzero_si128());
19357        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
19358        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19359        assert_eq_m128i(r, e);
19360    }
19361
19362    #[simd_test(enable = "avx512bw")]
19363    unsafe fn test_mm512_shufflehi_epi16() {
19364        #[rustfmt::skip]
19365        let a = _mm512_set_epi16(
19366            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19367            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19368        );
19369        #[rustfmt::skip]
19370        let e = _mm512_set_epi16(
19371            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19372            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19373        );
19374        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
19375        assert_eq_m512i(r, e);
19376    }
19377
19378    #[simd_test(enable = "avx512bw")]
19379    unsafe fn test_mm512_mask_shufflehi_epi16() {
19380        #[rustfmt::skip]
19381        let a = _mm512_set_epi16(
19382            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19383            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19384        );
19385        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19386        assert_eq_m512i(r, a);
19387        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
19388            a,
19389            0b11111111_11111111_11111111_11111111,
19390            a,
19391        );
19392        #[rustfmt::skip]
19393        let e = _mm512_set_epi16(
19394            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19395            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19396        );
19397        assert_eq_m512i(r, e);
19398    }
19399
19400    #[simd_test(enable = "avx512bw")]
19401    unsafe fn test_mm512_maskz_shufflehi_epi16() {
19402        #[rustfmt::skip]
19403        let a = _mm512_set_epi16(
19404            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19405            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19406        );
19407        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19408        assert_eq_m512i(r, _mm512_setzero_si512());
19409        let r =
19410            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19411        #[rustfmt::skip]
19412        let e = _mm512_set_epi16(
19413            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19414            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19415        );
19416        assert_eq_m512i(r, e);
19417    }
19418
19419    #[simd_test(enable = "avx512bw,avx512vl")]
19420    unsafe fn test_mm256_mask_shufflehi_epi16() {
19421        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19422        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19423        assert_eq_m256i(r, a);
19424        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19425        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19426        assert_eq_m256i(r, e);
19427    }
19428
19429    #[simd_test(enable = "avx512bw,avx512vl")]
19430    unsafe fn test_mm256_maskz_shufflehi_epi16() {
19431        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19432        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19433        assert_eq_m256i(r, _mm256_setzero_si256());
19434        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19435        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19436        assert_eq_m256i(r, e);
19437    }
19438
19439    #[simd_test(enable = "avx512bw,avx512vl")]
19440    unsafe fn test_mm_mask_shufflehi_epi16() {
19441        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19442        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19443        assert_eq_m128i(r, a);
19444        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19445        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19446        assert_eq_m128i(r, e);
19447    }
19448
19449    #[simd_test(enable = "avx512bw,avx512vl")]
19450    unsafe fn test_mm_maskz_shufflehi_epi16() {
19451        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19452        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19453        assert_eq_m128i(r, _mm_setzero_si128());
19454        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
19455        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19456        assert_eq_m128i(r, e);
19457    }
19458
19459    #[simd_test(enable = "avx512bw")]
19460    unsafe fn test_mm512_shuffle_epi8() {
19461        #[rustfmt::skip]
19462        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19463                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19464                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19465                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19466        let b = _mm512_set1_epi8(1);
19467        let r = _mm512_shuffle_epi8(a, b);
19468        #[rustfmt::skip]
19469        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19470                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19471                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19472                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19473        assert_eq_m512i(r, e);
19474    }
19475
19476    #[simd_test(enable = "avx512bw")]
19477    unsafe fn test_mm512_mask_shuffle_epi8() {
19478        #[rustfmt::skip]
19479        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19480                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19481                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19482                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19483        let b = _mm512_set1_epi8(1);
19484        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
19485        assert_eq_m512i(r, a);
19486        let r = _mm512_mask_shuffle_epi8(
19487            a,
19488            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19489            a,
19490            b,
19491        );
19492        #[rustfmt::skip]
19493        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19494                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19495                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19496                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19497        assert_eq_m512i(r, e);
19498    }
19499
19500    #[simd_test(enable = "avx512bw")]
19501    unsafe fn test_mm512_maskz_shuffle_epi8() {
19502        #[rustfmt::skip]
19503        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19504                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19505                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19506                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19507        let b = _mm512_set1_epi8(1);
19508        let r = _mm512_maskz_shuffle_epi8(0, a, b);
19509        assert_eq_m512i(r, _mm512_setzero_si512());
19510        let r = _mm512_maskz_shuffle_epi8(
19511            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19512            a,
19513            b,
19514        );
19515        #[rustfmt::skip]
19516        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19517                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19518                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19519                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19520        assert_eq_m512i(r, e);
19521    }
19522
19523    #[simd_test(enable = "avx512bw,avx512vl")]
19524    unsafe fn test_mm256_mask_shuffle_epi8() {
19525        #[rustfmt::skip]
19526        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19527                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19528        let b = _mm256_set1_epi8(1);
19529        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
19530        assert_eq_m256i(r, a);
19531        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19532        #[rustfmt::skip]
19533        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19534                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19535        assert_eq_m256i(r, e);
19536    }
19537
19538    #[simd_test(enable = "avx512bw,avx512vl")]
19539    unsafe fn test_mm256_maskz_shuffle_epi8() {
19540        #[rustfmt::skip]
19541        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19542                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19543        let b = _mm256_set1_epi8(1);
19544        let r = _mm256_maskz_shuffle_epi8(0, a, b);
19545        assert_eq_m256i(r, _mm256_setzero_si256());
19546        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
19547        #[rustfmt::skip]
19548        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19549                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19550        assert_eq_m256i(r, e);
19551    }
19552
19553    #[simd_test(enable = "avx512bw,avx512vl")]
19554    unsafe fn test_mm_mask_shuffle_epi8() {
19555        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19556        let b = _mm_set1_epi8(1);
19557        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
19558        assert_eq_m128i(r, a);
19559        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
19560        let e = _mm_set_epi8(
19561            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19562        );
19563        assert_eq_m128i(r, e);
19564    }
19565
19566    #[simd_test(enable = "avx512bw,avx512vl")]
19567    unsafe fn test_mm_maskz_shuffle_epi8() {
19568        #[rustfmt::skip]
19569        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
19570        let b = _mm_set1_epi8(1);
19571        let r = _mm_maskz_shuffle_epi8(0, a, b);
19572        assert_eq_m128i(r, _mm_setzero_si128());
19573        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
19574        let e = _mm_set_epi8(
19575            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19576        );
19577        assert_eq_m128i(r, e);
19578    }
19579
19580    #[simd_test(enable = "avx512bw")]
19581    unsafe fn test_mm512_test_epi16_mask() {
19582        let a = _mm512_set1_epi16(1 << 0);
19583        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19584        let r = _mm512_test_epi16_mask(a, b);
19585        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19586        assert_eq!(r, e);
19587    }
19588
19589    #[simd_test(enable = "avx512bw")]
19590    unsafe fn test_mm512_mask_test_epi16_mask() {
19591        let a = _mm512_set1_epi16(1 << 0);
19592        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19593        let r = _mm512_mask_test_epi16_mask(0, a, b);
19594        assert_eq!(r, 0);
19595        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19596        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19597        assert_eq!(r, e);
19598    }
19599
19600    #[simd_test(enable = "avx512bw,avx512vl")]
19601    unsafe fn test_mm256_test_epi16_mask() {
19602        let a = _mm256_set1_epi16(1 << 0);
19603        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19604        let r = _mm256_test_epi16_mask(a, b);
19605        let e: __mmask16 = 0b11111111_11111111;
19606        assert_eq!(r, e);
19607    }
19608
19609    #[simd_test(enable = "avx512bw,avx512vl")]
19610    unsafe fn test_mm256_mask_test_epi16_mask() {
19611        let a = _mm256_set1_epi16(1 << 0);
19612        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19613        let r = _mm256_mask_test_epi16_mask(0, a, b);
19614        assert_eq!(r, 0);
19615        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
19616        let e: __mmask16 = 0b11111111_11111111;
19617        assert_eq!(r, e);
19618    }
19619
19620    #[simd_test(enable = "avx512bw,avx512vl")]
19621    unsafe fn test_mm_test_epi16_mask() {
19622        let a = _mm_set1_epi16(1 << 0);
19623        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19624        let r = _mm_test_epi16_mask(a, b);
19625        let e: __mmask8 = 0b11111111;
19626        assert_eq!(r, e);
19627    }
19628
19629    #[simd_test(enable = "avx512bw,avx512vl")]
19630    unsafe fn test_mm_mask_test_epi16_mask() {
19631        let a = _mm_set1_epi16(1 << 0);
19632        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19633        let r = _mm_mask_test_epi16_mask(0, a, b);
19634        assert_eq!(r, 0);
19635        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
19636        let e: __mmask8 = 0b11111111;
19637        assert_eq!(r, e);
19638    }
19639
19640    #[simd_test(enable = "avx512bw")]
19641    unsafe fn test_mm512_test_epi8_mask() {
19642        let a = _mm512_set1_epi8(1 << 0);
19643        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19644        let r = _mm512_test_epi8_mask(a, b);
19645        let e: __mmask64 =
19646            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19647        assert_eq!(r, e);
19648    }
19649
19650    #[simd_test(enable = "avx512bw")]
19651    unsafe fn test_mm512_mask_test_epi8_mask() {
19652        let a = _mm512_set1_epi8(1 << 0);
19653        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19654        let r = _mm512_mask_test_epi8_mask(0, a, b);
19655        assert_eq!(r, 0);
19656        let r = _mm512_mask_test_epi8_mask(
19657            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19658            a,
19659            b,
19660        );
19661        let e: __mmask64 =
19662            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19663        assert_eq!(r, e);
19664    }
19665
19666    #[simd_test(enable = "avx512bw,avx512vl")]
19667    unsafe fn test_mm256_test_epi8_mask() {
19668        let a = _mm256_set1_epi8(1 << 0);
19669        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19670        let r = _mm256_test_epi8_mask(a, b);
19671        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19672        assert_eq!(r, e);
19673    }
19674
19675    #[simd_test(enable = "avx512bw,avx512vl")]
19676    unsafe fn test_mm256_mask_test_epi8_mask() {
19677        let a = _mm256_set1_epi8(1 << 0);
19678        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19679        let r = _mm256_mask_test_epi8_mask(0, a, b);
19680        assert_eq!(r, 0);
19681        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19682        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19683        assert_eq!(r, e);
19684    }
19685
19686    #[simd_test(enable = "avx512bw,avx512vl")]
19687    unsafe fn test_mm_test_epi8_mask() {
19688        let a = _mm_set1_epi8(1 << 0);
19689        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19690        let r = _mm_test_epi8_mask(a, b);
19691        let e: __mmask16 = 0b11111111_11111111;
19692        assert_eq!(r, e);
19693    }
19694
19695    #[simd_test(enable = "avx512bw,avx512vl")]
19696    unsafe fn test_mm_mask_test_epi8_mask() {
19697        let a = _mm_set1_epi8(1 << 0);
19698        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19699        let r = _mm_mask_test_epi8_mask(0, a, b);
19700        assert_eq!(r, 0);
19701        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
19702        let e: __mmask16 = 0b11111111_11111111;
19703        assert_eq!(r, e);
19704    }
19705
19706    #[simd_test(enable = "avx512bw")]
19707    unsafe fn test_mm512_testn_epi16_mask() {
19708        let a = _mm512_set1_epi16(1 << 0);
19709        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19710        let r = _mm512_testn_epi16_mask(a, b);
19711        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19712        assert_eq!(r, e);
19713    }
19714
19715    #[simd_test(enable = "avx512bw")]
19716    unsafe fn test_mm512_mask_testn_epi16_mask() {
19717        let a = _mm512_set1_epi16(1 << 0);
19718        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19719        let r = _mm512_mask_testn_epi16_mask(0, a, b);
19720        assert_eq!(r, 0);
19721        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19722        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19723        assert_eq!(r, e);
19724    }
19725
19726    #[simd_test(enable = "avx512bw,avx512vl")]
19727    unsafe fn test_mm256_testn_epi16_mask() {
19728        let a = _mm256_set1_epi16(1 << 0);
19729        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19730        let r = _mm256_testn_epi16_mask(a, b);
19731        let e: __mmask16 = 0b00000000_00000000;
19732        assert_eq!(r, e);
19733    }
19734
19735    #[simd_test(enable = "avx512bw,avx512vl")]
19736    unsafe fn test_mm256_mask_testn_epi16_mask() {
19737        let a = _mm256_set1_epi16(1 << 0);
19738        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19739        let r = _mm256_mask_testn_epi16_mask(0, a, b);
19740        assert_eq!(r, 0);
19741        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
19742        let e: __mmask16 = 0b00000000_00000000;
19743        assert_eq!(r, e);
19744    }
19745
19746    #[simd_test(enable = "avx512bw,avx512vl")]
19747    unsafe fn test_mm_testn_epi16_mask() {
19748        let a = _mm_set1_epi16(1 << 0);
19749        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19750        let r = _mm_testn_epi16_mask(a, b);
19751        let e: __mmask8 = 0b00000000;
19752        assert_eq!(r, e);
19753    }
19754
19755    #[simd_test(enable = "avx512bw,avx512vl")]
19756    unsafe fn test_mm_mask_testn_epi16_mask() {
19757        let a = _mm_set1_epi16(1 << 0);
19758        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19759        let r = _mm_mask_testn_epi16_mask(0, a, b);
19760        assert_eq!(r, 0);
19761        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
19762        let e: __mmask8 = 0b00000000;
19763        assert_eq!(r, e);
19764    }
19765
19766    #[simd_test(enable = "avx512bw")]
19767    unsafe fn test_mm512_testn_epi8_mask() {
19768        let a = _mm512_set1_epi8(1 << 0);
19769        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19770        let r = _mm512_testn_epi8_mask(a, b);
19771        let e: __mmask64 =
19772            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19773        assert_eq!(r, e);
19774    }
19775
19776    #[simd_test(enable = "avx512bw")]
19777    unsafe fn test_mm512_mask_testn_epi8_mask() {
19778        let a = _mm512_set1_epi8(1 << 0);
19779        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19780        let r = _mm512_mask_testn_epi8_mask(0, a, b);
19781        assert_eq!(r, 0);
19782        let r = _mm512_mask_testn_epi8_mask(
19783            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19784            a,
19785            b,
19786        );
19787        let e: __mmask64 =
19788            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19789        assert_eq!(r, e);
19790    }
19791
19792    #[simd_test(enable = "avx512bw,avx512vl")]
19793    unsafe fn test_mm256_testn_epi8_mask() {
19794        let a = _mm256_set1_epi8(1 << 0);
19795        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19796        let r = _mm256_testn_epi8_mask(a, b);
19797        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19798        assert_eq!(r, e);
19799    }
19800
19801    #[simd_test(enable = "avx512bw,avx512vl")]
19802    unsafe fn test_mm256_mask_testn_epi8_mask() {
19803        let a = _mm256_set1_epi8(1 << 0);
19804        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19805        let r = _mm256_mask_testn_epi8_mask(0, a, b);
19806        assert_eq!(r, 0);
19807        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19808        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19809        assert_eq!(r, e);
19810    }
19811
19812    #[simd_test(enable = "avx512bw,avx512vl")]
19813    unsafe fn test_mm_testn_epi8_mask() {
19814        let a = _mm_set1_epi8(1 << 0);
19815        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19816        let r = _mm_testn_epi8_mask(a, b);
19817        let e: __mmask16 = 0b00000000_00000000;
19818        assert_eq!(r, e);
19819    }
19820
19821    #[simd_test(enable = "avx512bw,avx512vl")]
19822    unsafe fn test_mm_mask_testn_epi8_mask() {
19823        let a = _mm_set1_epi8(1 << 0);
19824        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19825        let r = _mm_mask_testn_epi8_mask(0, a, b);
19826        assert_eq!(r, 0);
19827        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
19828        let e: __mmask16 = 0b00000000_00000000;
19829        assert_eq!(r, e);
19830    }
19831
19832    #[simd_test(enable = "avx512bw")]
19833    unsafe fn test_store_mask64() {
19834        let a: __mmask64 =
19835            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19836        let mut r = 0;
19837        _store_mask64(&mut r, a);
19838        assert_eq!(r, a);
19839    }
19840
19841    #[simd_test(enable = "avx512bw")]
19842    unsafe fn test_store_mask32() {
19843        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
19844        let mut r = 0;
19845        _store_mask32(&mut r, a);
19846        assert_eq!(r, a);
19847    }
19848
19849    #[simd_test(enable = "avx512bw")]
19850    unsafe fn test_load_mask64() {
19851        let p: __mmask64 =
19852            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19853        let r = _load_mask64(&p);
19854        let e: __mmask64 =
19855            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19856        assert_eq!(r, e);
19857    }
19858
19859    #[simd_test(enable = "avx512bw")]
19860    unsafe fn test_load_mask32() {
19861        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
19862        let r = _load_mask32(&p);
19863        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
19864        assert_eq!(r, e);
19865    }
19866
19867    #[simd_test(enable = "avx512bw")]
19868    unsafe fn test_mm512_sad_epu8() {
19869        let a = _mm512_set1_epi8(2);
19870        let b = _mm512_set1_epi8(4);
19871        let r = _mm512_sad_epu8(a, b);
19872        let e = _mm512_set1_epi64(16);
19873        assert_eq_m512i(r, e);
19874    }
19875
19876    #[simd_test(enable = "avx512bw")]
19877    unsafe fn test_mm512_dbsad_epu8() {
19878        let a = _mm512_set1_epi8(2);
19879        let b = _mm512_set1_epi8(4);
19880        let r = _mm512_dbsad_epu8::<0>(a, b);
19881        let e = _mm512_set1_epi16(8);
19882        assert_eq_m512i(r, e);
19883    }
19884
19885    #[simd_test(enable = "avx512bw")]
19886    unsafe fn test_mm512_mask_dbsad_epu8() {
19887        let src = _mm512_set1_epi16(1);
19888        let a = _mm512_set1_epi8(2);
19889        let b = _mm512_set1_epi8(4);
19890        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
19891        assert_eq_m512i(r, src);
19892        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
19893        let e = _mm512_set1_epi16(8);
19894        assert_eq_m512i(r, e);
19895    }
19896
19897    #[simd_test(enable = "avx512bw")]
19898    unsafe fn test_mm512_maskz_dbsad_epu8() {
19899        let a = _mm512_set1_epi8(2);
19900        let b = _mm512_set1_epi8(4);
19901        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
19902        assert_eq_m512i(r, _mm512_setzero_si512());
19903        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
19904        let e = _mm512_set1_epi16(8);
19905        assert_eq_m512i(r, e);
19906    }
19907
19908    #[simd_test(enable = "avx512bw,avx512vl")]
19909    unsafe fn test_mm256_dbsad_epu8() {
19910        let a = _mm256_set1_epi8(2);
19911        let b = _mm256_set1_epi8(4);
19912        let r = _mm256_dbsad_epu8::<0>(a, b);
19913        let e = _mm256_set1_epi16(8);
19914        assert_eq_m256i(r, e);
19915    }
19916
19917    #[simd_test(enable = "avx512bw,avx512vl")]
19918    unsafe fn test_mm256_mask_dbsad_epu8() {
19919        let src = _mm256_set1_epi16(1);
19920        let a = _mm256_set1_epi8(2);
19921        let b = _mm256_set1_epi8(4);
19922        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
19923        assert_eq_m256i(r, src);
19924        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
19925        let e = _mm256_set1_epi16(8);
19926        assert_eq_m256i(r, e);
19927    }
19928
19929    #[simd_test(enable = "avx512bw,avx512vl")]
19930    unsafe fn test_mm256_maskz_dbsad_epu8() {
19931        let a = _mm256_set1_epi8(2);
19932        let b = _mm256_set1_epi8(4);
19933        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
19934        assert_eq_m256i(r, _mm256_setzero_si256());
19935        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
19936        let e = _mm256_set1_epi16(8);
19937        assert_eq_m256i(r, e);
19938    }
19939
19940    #[simd_test(enable = "avx512bw,avx512vl")]
19941    unsafe fn test_mm_dbsad_epu8() {
19942        let a = _mm_set1_epi8(2);
19943        let b = _mm_set1_epi8(4);
19944        let r = _mm_dbsad_epu8::<0>(a, b);
19945        let e = _mm_set1_epi16(8);
19946        assert_eq_m128i(r, e);
19947    }
19948
19949    #[simd_test(enable = "avx512bw,avx512vl")]
19950    unsafe fn test_mm_mask_dbsad_epu8() {
19951        let src = _mm_set1_epi16(1);
19952        let a = _mm_set1_epi8(2);
19953        let b = _mm_set1_epi8(4);
19954        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
19955        assert_eq_m128i(r, src);
19956        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
19957        let e = _mm_set1_epi16(8);
19958        assert_eq_m128i(r, e);
19959    }
19960
19961    #[simd_test(enable = "avx512bw,avx512vl")]
19962    unsafe fn test_mm_maskz_dbsad_epu8() {
19963        let a = _mm_set1_epi8(2);
19964        let b = _mm_set1_epi8(4);
19965        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
19966        assert_eq_m128i(r, _mm_setzero_si128());
19967        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
19968        let e = _mm_set1_epi16(8);
19969        assert_eq_m128i(r, e);
19970    }
19971
19972    #[simd_test(enable = "avx512bw")]
19973    unsafe fn test_mm512_movepi16_mask() {
19974        let a = _mm512_set1_epi16(1 << 15);
19975        let r = _mm512_movepi16_mask(a);
19976        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19977        assert_eq!(r, e);
19978    }
19979
19980    #[simd_test(enable = "avx512bw,avx512vl")]
19981    unsafe fn test_mm256_movepi16_mask() {
19982        let a = _mm256_set1_epi16(1 << 15);
19983        let r = _mm256_movepi16_mask(a);
19984        let e: __mmask16 = 0b11111111_11111111;
19985        assert_eq!(r, e);
19986    }
19987
19988    #[simd_test(enable = "avx512bw,avx512vl")]
19989    unsafe fn test_mm_movepi16_mask() {
19990        let a = _mm_set1_epi16(1 << 15);
19991        let r = _mm_movepi16_mask(a);
19992        let e: __mmask8 = 0b11111111;
19993        assert_eq!(r, e);
19994    }
19995
19996    #[simd_test(enable = "avx512bw")]
19997    unsafe fn test_mm512_movepi8_mask() {
19998        let a = _mm512_set1_epi8(1 << 7);
19999        let r = _mm512_movepi8_mask(a);
20000        let e: __mmask64 =
20001            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20002        assert_eq!(r, e);
20003    }
20004
20005    #[simd_test(enable = "avx512bw,avx512vl")]
20006    unsafe fn test_mm256_movepi8_mask() {
20007        let a = _mm256_set1_epi8(1 << 7);
20008        let r = _mm256_movepi8_mask(a);
20009        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20010        assert_eq!(r, e);
20011    }
20012
20013    #[simd_test(enable = "avx512bw,avx512vl")]
20014    unsafe fn test_mm_movepi8_mask() {
20015        let a = _mm_set1_epi8(1 << 7);
20016        let r = _mm_movepi8_mask(a);
20017        let e: __mmask16 = 0b11111111_11111111;
20018        assert_eq!(r, e);
20019    }
20020
20021    #[simd_test(enable = "avx512bw")]
20022    unsafe fn test_mm512_movm_epi16() {
20023        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20024        let r = _mm512_movm_epi16(a);
20025        let e = _mm512_set1_epi16(
20026            1 << 15
20027                | 1 << 14
20028                | 1 << 13
20029                | 1 << 12
20030                | 1 << 11
20031                | 1 << 10
20032                | 1 << 9
20033                | 1 << 8
20034                | 1 << 7
20035                | 1 << 6
20036                | 1 << 5
20037                | 1 << 4
20038                | 1 << 3
20039                | 1 << 2
20040                | 1 << 1
20041                | 1 << 0,
20042        );
20043        assert_eq_m512i(r, e);
20044    }
20045
20046    #[simd_test(enable = "avx512bw,avx512vl")]
20047    unsafe fn test_mm256_movm_epi16() {
20048        let a: __mmask16 = 0b11111111_11111111;
20049        let r = _mm256_movm_epi16(a);
20050        let e = _mm256_set1_epi16(
20051            1 << 15
20052                | 1 << 14
20053                | 1 << 13
20054                | 1 << 12
20055                | 1 << 11
20056                | 1 << 10
20057                | 1 << 9
20058                | 1 << 8
20059                | 1 << 7
20060                | 1 << 6
20061                | 1 << 5
20062                | 1 << 4
20063                | 1 << 3
20064                | 1 << 2
20065                | 1 << 1
20066                | 1 << 0,
20067        );
20068        assert_eq_m256i(r, e);
20069    }
20070
20071    #[simd_test(enable = "avx512bw,avx512vl")]
20072    unsafe fn test_mm_movm_epi16() {
20073        let a: __mmask8 = 0b11111111;
20074        let r = _mm_movm_epi16(a);
20075        let e = _mm_set1_epi16(
20076            1 << 15
20077                | 1 << 14
20078                | 1 << 13
20079                | 1 << 12
20080                | 1 << 11
20081                | 1 << 10
20082                | 1 << 9
20083                | 1 << 8
20084                | 1 << 7
20085                | 1 << 6
20086                | 1 << 5
20087                | 1 << 4
20088                | 1 << 3
20089                | 1 << 2
20090                | 1 << 1
20091                | 1 << 0,
20092        );
20093        assert_eq_m128i(r, e);
20094    }
20095
20096    #[simd_test(enable = "avx512bw")]
20097    unsafe fn test_mm512_movm_epi8() {
20098        let a: __mmask64 =
20099            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20100        let r = _mm512_movm_epi8(a);
20101        let e =
20102            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20103        assert_eq_m512i(r, e);
20104    }
20105
20106    #[simd_test(enable = "avx512bw,avx512vl")]
20107    unsafe fn test_mm256_movm_epi8() {
20108        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20109        let r = _mm256_movm_epi8(a);
20110        let e =
20111            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20112        assert_eq_m256i(r, e);
20113    }
20114
20115    #[simd_test(enable = "avx512bw,avx512vl")]
20116    unsafe fn test_mm_movm_epi8() {
20117        let a: __mmask16 = 0b11111111_11111111;
20118        let r = _mm_movm_epi8(a);
20119        let e =
20120            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20121        assert_eq_m128i(r, e);
20122    }
20123
20124    #[simd_test(enable = "avx512bw")]
20125    unsafe fn test_cvtmask32_u32() {
20126        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
20127        let r = _cvtmask32_u32(a);
20128        let e: u32 = 0b11001100_00110011_01100110_10011001;
20129        assert_eq!(r, e);
20130    }
20131
20132    #[simd_test(enable = "avx512bw")]
20133    unsafe fn test_cvtu32_mask32() {
20134        let a: u32 = 0b11001100_00110011_01100110_10011001;
20135        let r = _cvtu32_mask32(a);
20136        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
20137        assert_eq!(r, e);
20138    }
20139
20140    #[simd_test(enable = "avx512bw")]
20141    unsafe fn test_kadd_mask32() {
20142        let a: __mmask32 = 11;
20143        let b: __mmask32 = 22;
20144        let r = _kadd_mask32(a, b);
20145        let e: __mmask32 = 33;
20146        assert_eq!(r, e);
20147    }
20148
20149    #[simd_test(enable = "avx512bw")]
20150    unsafe fn test_kadd_mask64() {
20151        let a: __mmask64 = 11;
20152        let b: __mmask64 = 22;
20153        let r = _kadd_mask64(a, b);
20154        let e: __mmask64 = 33;
20155        assert_eq!(r, e);
20156    }
20157
20158    #[simd_test(enable = "avx512bw")]
20159    unsafe fn test_kand_mask32() {
20160        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20161        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20162        let r = _kand_mask32(a, b);
20163        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
20164        assert_eq!(r, e);
20165    }
20166
20167    #[simd_test(enable = "avx512bw")]
20168    unsafe fn test_kand_mask64() {
20169        let a: __mmask64 =
20170            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20171        let b: __mmask64 =
20172            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20173        let r = _kand_mask64(a, b);
20174        let e: __mmask64 =
20175            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20176        assert_eq!(r, e);
20177    }
20178
20179    #[simd_test(enable = "avx512bw")]
20180    unsafe fn test_knot_mask32() {
20181        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20182        let r = _knot_mask32(a);
20183        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
20184        assert_eq!(r, e);
20185    }
20186
20187    #[simd_test(enable = "avx512bw")]
20188    unsafe fn test_knot_mask64() {
20189        let a: __mmask64 =
20190            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20191        let r = _knot_mask64(a);
20192        let e: __mmask64 =
20193            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20194        assert_eq!(r, e);
20195    }
20196
20197    #[simd_test(enable = "avx512bw")]
20198    unsafe fn test_kandn_mask32() {
20199        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20200        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20201        let r = _kandn_mask32(a, b);
20202        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20203        assert_eq!(r, e);
20204    }
20205
20206    #[simd_test(enable = "avx512bw")]
20207    unsafe fn test_kandn_mask64() {
20208        let a: __mmask64 =
20209            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20210        let b: __mmask64 =
20211            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20212        let r = _kandn_mask64(a, b);
20213        let e: __mmask64 =
20214            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20215        assert_eq!(r, e);
20216    }
20217
20218    #[simd_test(enable = "avx512bw")]
20219    unsafe fn test_kor_mask32() {
20220        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20221        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20222        let r = _kor_mask32(a, b);
20223        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20224        assert_eq!(r, e);
20225    }
20226
20227    #[simd_test(enable = "avx512bw")]
20228    unsafe fn test_kor_mask64() {
20229        let a: __mmask64 =
20230            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20231        let b: __mmask64 =
20232            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20233        let r = _kor_mask64(a, b);
20234        let e: __mmask64 =
20235            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20236        assert_eq!(r, e);
20237    }
20238
20239    #[simd_test(enable = "avx512bw")]
20240    unsafe fn test_kxor_mask32() {
20241        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20242        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20243        let r = _kxor_mask32(a, b);
20244        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20245        assert_eq!(r, e);
20246    }
20247
20248    #[simd_test(enable = "avx512bw")]
20249    unsafe fn test_kxor_mask64() {
20250        let a: __mmask64 =
20251            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20252        let b: __mmask64 =
20253            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20254        let r = _kxor_mask64(a, b);
20255        let e: __mmask64 =
20256            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20257        assert_eq!(r, e);
20258    }
20259
20260    #[simd_test(enable = "avx512bw")]
20261    unsafe fn test_kxnor_mask32() {
20262        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20263        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20264        let r = _kxnor_mask32(a, b);
20265        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20266        assert_eq!(r, e);
20267    }
20268
20269    #[simd_test(enable = "avx512bw")]
20270    unsafe fn test_kxnor_mask64() {
20271        let a: __mmask64 =
20272            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20273        let b: __mmask64 =
20274            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20275        let r = _kxnor_mask64(a, b);
20276        let e: __mmask64 =
20277            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20278        assert_eq!(r, e);
20279    }
20280
20281    #[simd_test(enable = "avx512bw")]
20282    unsafe fn test_kortest_mask32_u8() {
20283        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20284        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20285        let mut all_ones: u8 = 0;
20286        let r = _kortest_mask32_u8(a, b, &mut all_ones);
20287        assert_eq!(r, 0);
20288        assert_eq!(all_ones, 1);
20289    }
20290
20291    #[simd_test(enable = "avx512bw")]
20292    unsafe fn test_kortest_mask64_u8() {
20293        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20294        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20295        let mut all_ones: u8 = 0;
20296        let r = _kortest_mask64_u8(a, b, &mut all_ones);
20297        assert_eq!(r, 0);
20298        assert_eq!(all_ones, 0);
20299    }
20300
20301    #[simd_test(enable = "avx512bw")]
20302    unsafe fn test_kortestc_mask32_u8() {
20303        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20304        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20305        let r = _kortestc_mask32_u8(a, b);
20306        assert_eq!(r, 1);
20307    }
20308
20309    #[simd_test(enable = "avx512bw")]
20310    unsafe fn test_kortestc_mask64_u8() {
20311        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20312        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20313        let r = _kortestc_mask64_u8(a, b);
20314        assert_eq!(r, 0);
20315    }
20316
20317    #[simd_test(enable = "avx512bw")]
20318    unsafe fn test_kortestz_mask32_u8() {
20319        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20320        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20321        let r = _kortestz_mask32_u8(a, b);
20322        assert_eq!(r, 0);
20323    }
20324
20325    #[simd_test(enable = "avx512bw")]
20326    unsafe fn test_kortestz_mask64_u8() {
20327        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20328        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20329        let r = _kortestz_mask64_u8(a, b);
20330        assert_eq!(r, 0);
20331    }
20332
20333    #[simd_test(enable = "avx512bw")]
20334    unsafe fn test_kshiftli_mask32() {
20335        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20336        let r = _kshiftli_mask32::<3>(a);
20337        let e: __mmask32 = 0b0100101101001011_0100101101001000;
20338        assert_eq!(r, e);
20339
20340        let r = _kshiftli_mask32::<31>(a);
20341        let e: __mmask32 = 0b1000000000000000_0000000000000000;
20342        assert_eq!(r, e);
20343
20344        let r = _kshiftli_mask32::<32>(a);
20345        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20346        assert_eq!(r, e);
20347
20348        let r = _kshiftli_mask32::<33>(a);
20349        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20350        assert_eq!(r, e);
20351    }
20352
20353    #[simd_test(enable = "avx512bw")]
20354    unsafe fn test_kshiftli_mask64() {
20355        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20356        let r = _kshiftli_mask64::<3>(a);
20357        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
20358        assert_eq!(r, e);
20359
20360        let r = _kshiftli_mask64::<63>(a);
20361        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
20362        assert_eq!(r, e);
20363
20364        let r = _kshiftli_mask64::<64>(a);
20365        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20366        assert_eq!(r, e);
20367
20368        let r = _kshiftli_mask64::<65>(a);
20369        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20370        assert_eq!(r, e);
20371    }
20372
20373    #[simd_test(enable = "avx512bw")]
20374    unsafe fn test_kshiftri_mask32() {
20375        let a: __mmask32 = 0b1010100101101001_0110100101101001;
20376        let r = _kshiftri_mask32::<3>(a);
20377        let e: __mmask32 = 0b0001010100101101_0010110100101101;
20378        assert_eq!(r, e);
20379
20380        let r = _kshiftri_mask32::<31>(a);
20381        let e: __mmask32 = 0b0000000000000000_0000000000000001;
20382        assert_eq!(r, e);
20383
20384        let r = _kshiftri_mask32::<32>(a);
20385        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20386        assert_eq!(r, e);
20387
20388        let r = _kshiftri_mask32::<33>(a);
20389        let e: __mmask32 = 0b0000000000000000_0000000000000000;
20390        assert_eq!(r, e);
20391    }
20392
20393    #[simd_test(enable = "avx512bw")]
20394    unsafe fn test_kshiftri_mask64() {
20395        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
20396        let r = _kshiftri_mask64::<3>(a);
20397        let e: __mmask64 = 0b1010100101101001_0110100101101001;
20398        assert_eq!(r, e);
20399
20400        let r = _kshiftri_mask64::<34>(a);
20401        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
20402        assert_eq!(r, e);
20403
20404        let r = _kshiftri_mask64::<35>(a);
20405        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20406        assert_eq!(r, e);
20407
20408        let r = _kshiftri_mask64::<64>(a);
20409        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20410        assert_eq!(r, e);
20411
20412        let r = _kshiftri_mask64::<65>(a);
20413        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
20414        assert_eq!(r, e);
20415    }
20416
20417    #[simd_test(enable = "avx512bw")]
20418    unsafe fn test_ktest_mask32_u8() {
20419        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20420        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20421        let mut and_not: u8 = 0;
20422        let r = _ktest_mask32_u8(a, b, &mut and_not);
20423        assert_eq!(r, 1);
20424        assert_eq!(and_not, 0);
20425    }
20426
20427    #[simd_test(enable = "avx512bw")]
20428    unsafe fn test_ktestc_mask32_u8() {
20429        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20430        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20431        let r = _ktestc_mask32_u8(a, b);
20432        assert_eq!(r, 0);
20433    }
20434
20435    #[simd_test(enable = "avx512bw")]
20436    unsafe fn test_ktestz_mask32_u8() {
20437        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20438        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20439        let r = _ktestz_mask32_u8(a, b);
20440        assert_eq!(r, 1);
20441    }
20442
20443    #[simd_test(enable = "avx512bw")]
20444    unsafe fn test_ktest_mask64_u8() {
20445        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20446        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20447        let mut and_not: u8 = 0;
20448        let r = _ktest_mask64_u8(a, b, &mut and_not);
20449        assert_eq!(r, 1);
20450        assert_eq!(and_not, 0);
20451    }
20452
20453    #[simd_test(enable = "avx512bw")]
20454    unsafe fn test_ktestc_mask64_u8() {
20455        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20456        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20457        let r = _ktestc_mask64_u8(a, b);
20458        assert_eq!(r, 0);
20459    }
20460
20461    #[simd_test(enable = "avx512bw")]
20462    unsafe fn test_ktestz_mask64_u8() {
20463        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20464        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20465        let r = _ktestz_mask64_u8(a, b);
20466        assert_eq!(r, 1);
20467    }
20468
20469    #[simd_test(enable = "avx512bw")]
20470    unsafe fn test_mm512_kunpackw() {
20471        let a: u32 = 0x00110011;
20472        let b: u32 = 0x00001011;
20473        let r = _mm512_kunpackw(a, b);
20474        let e: u32 = 0x00111011;
20475        assert_eq!(r, e);
20476    }
20477
20478    #[simd_test(enable = "avx512bw")]
20479    unsafe fn test_mm512_kunpackd() {
20480        let a: u64 = 0x11001100_00110011;
20481        let b: u64 = 0x00101110_00001011;
20482        let r = _mm512_kunpackd(a, b);
20483        let e: u64 = 0x00110011_00001011;
20484        assert_eq!(r, e);
20485    }
20486
20487    #[simd_test(enable = "avx512bw")]
20488    unsafe fn test_mm512_cvtepi16_epi8() {
20489        let a = _mm512_set1_epi16(2);
20490        let r = _mm512_cvtepi16_epi8(a);
20491        let e = _mm256_set1_epi8(2);
20492        assert_eq_m256i(r, e);
20493    }
20494
20495    #[simd_test(enable = "avx512bw")]
20496    unsafe fn test_mm512_mask_cvtepi16_epi8() {
20497        let src = _mm256_set1_epi8(1);
20498        let a = _mm512_set1_epi16(2);
20499        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
20500        assert_eq_m256i(r, src);
20501        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20502        let e = _mm256_set1_epi8(2);
20503        assert_eq_m256i(r, e);
20504    }
20505
20506    #[simd_test(enable = "avx512bw")]
20507    unsafe fn test_mm512_maskz_cvtepi16_epi8() {
20508        let a = _mm512_set1_epi16(2);
20509        let r = _mm512_maskz_cvtepi16_epi8(0, a);
20510        assert_eq_m256i(r, _mm256_setzero_si256());
20511        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20512        let e = _mm256_set1_epi8(2);
20513        assert_eq_m256i(r, e);
20514    }
20515
20516    #[simd_test(enable = "avx512bw,avx512vl")]
20517    unsafe fn test_mm256_cvtepi16_epi8() {
20518        let a = _mm256_set1_epi16(2);
20519        let r = _mm256_cvtepi16_epi8(a);
20520        let e = _mm_set1_epi8(2);
20521        assert_eq_m128i(r, e);
20522    }
20523
20524    #[simd_test(enable = "avx512bw,avx512vl")]
20525    unsafe fn test_mm256_mask_cvtepi16_epi8() {
20526        let src = _mm_set1_epi8(1);
20527        let a = _mm256_set1_epi16(2);
20528        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
20529        assert_eq_m128i(r, src);
20530        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
20531        let e = _mm_set1_epi8(2);
20532        assert_eq_m128i(r, e);
20533    }
20534
20535    #[simd_test(enable = "avx512bw,avx512vl")]
20536    unsafe fn test_mm256_maskz_cvtepi16_epi8() {
20537        let a = _mm256_set1_epi16(2);
20538        let r = _mm256_maskz_cvtepi16_epi8(0, a);
20539        assert_eq_m128i(r, _mm_setzero_si128());
20540        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
20541        let e = _mm_set1_epi8(2);
20542        assert_eq_m128i(r, e);
20543    }
20544
20545    #[simd_test(enable = "avx512bw,avx512vl")]
20546    unsafe fn test_mm_cvtepi16_epi8() {
20547        let a = _mm_set1_epi16(2);
20548        let r = _mm_cvtepi16_epi8(a);
20549        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20550        assert_eq_m128i(r, e);
20551    }
20552
20553    #[simd_test(enable = "avx512bw,avx512vl")]
20554    unsafe fn test_mm_mask_cvtepi16_epi8() {
20555        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20556        let a = _mm_set1_epi16(2);
20557        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
20558        assert_eq_m128i(r, src);
20559        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
20560        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20561        assert_eq_m128i(r, e);
20562    }
20563
20564    #[simd_test(enable = "avx512bw,avx512vl")]
20565    unsafe fn test_mm_maskz_cvtepi16_epi8() {
20566        let a = _mm_set1_epi16(2);
20567        let r = _mm_maskz_cvtepi16_epi8(0, a);
20568        assert_eq_m128i(r, _mm_setzero_si128());
20569        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
20570        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20571        assert_eq_m128i(r, e);
20572    }
20573
20574    #[simd_test(enable = "avx512bw")]
20575    unsafe fn test_mm512_cvtsepi16_epi8() {
20576        let a = _mm512_set1_epi16(i16::MAX);
20577        let r = _mm512_cvtsepi16_epi8(a);
20578        let e = _mm256_set1_epi8(i8::MAX);
20579        assert_eq_m256i(r, e);
20580    }
20581
20582    #[simd_test(enable = "avx512bw")]
20583    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
20584        let src = _mm256_set1_epi8(1);
20585        let a = _mm512_set1_epi16(i16::MAX);
20586        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
20587        assert_eq_m256i(r, src);
20588        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20589        let e = _mm256_set1_epi8(i8::MAX);
20590        assert_eq_m256i(r, e);
20591    }
20592
20593    #[simd_test(enable = "avx512bw,avx512vl")]
20594    unsafe fn test_mm256_cvtsepi16_epi8() {
20595        let a = _mm256_set1_epi16(i16::MAX);
20596        let r = _mm256_cvtsepi16_epi8(a);
20597        let e = _mm_set1_epi8(i8::MAX);
20598        assert_eq_m128i(r, e);
20599    }
20600
20601    #[simd_test(enable = "avx512bw,avx512vl")]
20602    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
20603        let src = _mm_set1_epi8(1);
20604        let a = _mm256_set1_epi16(i16::MAX);
20605        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
20606        assert_eq_m128i(r, src);
20607        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
20608        let e = _mm_set1_epi8(i8::MAX);
20609        assert_eq_m128i(r, e);
20610    }
20611
20612    #[simd_test(enable = "avx512bw,avx512vl")]
20613    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
20614        let a = _mm256_set1_epi16(i16::MAX);
20615        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
20616        assert_eq_m128i(r, _mm_setzero_si128());
20617        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
20618        let e = _mm_set1_epi8(i8::MAX);
20619        assert_eq_m128i(r, e);
20620    }
20621
20622    #[simd_test(enable = "avx512bw,avx512vl")]
20623    unsafe fn test_mm_cvtsepi16_epi8() {
20624        let a = _mm_set1_epi16(i16::MAX);
20625        let r = _mm_cvtsepi16_epi8(a);
20626        #[rustfmt::skip]
20627        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20628        assert_eq_m128i(r, e);
20629    }
20630
20631    #[simd_test(enable = "avx512bw,avx512vl")]
20632    unsafe fn test_mm_mask_cvtsepi16_epi8() {
20633        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20634        let a = _mm_set1_epi16(i16::MAX);
20635        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
20636        assert_eq_m128i(r, src);
20637        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
20638        #[rustfmt::skip]
20639        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20640        assert_eq_m128i(r, e);
20641    }
20642
20643    #[simd_test(enable = "avx512bw,avx512vl")]
20644    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
20645        let a = _mm_set1_epi16(i16::MAX);
20646        let r = _mm_maskz_cvtsepi16_epi8(0, a);
20647        assert_eq_m128i(r, _mm_setzero_si128());
20648        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
20649        #[rustfmt::skip]
20650        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20651        assert_eq_m128i(r, e);
20652    }
20653
20654    #[simd_test(enable = "avx512bw")]
20655    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
20656        let a = _mm512_set1_epi16(i16::MAX);
20657        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
20658        assert_eq_m256i(r, _mm256_setzero_si256());
20659        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20660        let e = _mm256_set1_epi8(i8::MAX);
20661        assert_eq_m256i(r, e);
20662    }
20663
20664    #[simd_test(enable = "avx512bw")]
20665    unsafe fn test_mm512_cvtusepi16_epi8() {
20666        let a = _mm512_set1_epi16(i16::MIN);
20667        let r = _mm512_cvtusepi16_epi8(a);
20668        let e = _mm256_set1_epi8(-1);
20669        assert_eq_m256i(r, e);
20670    }
20671
20672    #[simd_test(enable = "avx512bw")]
20673    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
20674        let src = _mm256_set1_epi8(1);
20675        let a = _mm512_set1_epi16(i16::MIN);
20676        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
20677        assert_eq_m256i(r, src);
20678        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20679        let e = _mm256_set1_epi8(-1);
20680        assert_eq_m256i(r, e);
20681    }
20682
20683    #[simd_test(enable = "avx512bw")]
20684    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
20685        let a = _mm512_set1_epi16(i16::MIN);
20686        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
20687        assert_eq_m256i(r, _mm256_setzero_si256());
20688        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20689        let e = _mm256_set1_epi8(-1);
20690        assert_eq_m256i(r, e);
20691    }
20692
20693    #[simd_test(enable = "avx512bw,avx512vl")]
20694    unsafe fn test_mm256_cvtusepi16_epi8() {
20695        let a = _mm256_set1_epi16(i16::MIN);
20696        let r = _mm256_cvtusepi16_epi8(a);
20697        let e = _mm_set1_epi8(-1);
20698        assert_eq_m128i(r, e);
20699    }
20700
20701    #[simd_test(enable = "avx512bw,avx512vl")]
20702    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
20703        let src = _mm_set1_epi8(1);
20704        let a = _mm256_set1_epi16(i16::MIN);
20705        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
20706        assert_eq_m128i(r, src);
20707        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
20708        let e = _mm_set1_epi8(-1);
20709        assert_eq_m128i(r, e);
20710    }
20711
20712    #[simd_test(enable = "avx512bw,avx512vl")]
20713    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
20714        let a = _mm256_set1_epi16(i16::MIN);
20715        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
20716        assert_eq_m128i(r, _mm_setzero_si128());
20717        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
20718        let e = _mm_set1_epi8(-1);
20719        assert_eq_m128i(r, e);
20720    }
20721
20722    #[simd_test(enable = "avx512bw,avx512vl")]
20723    unsafe fn test_mm_cvtusepi16_epi8() {
20724        let a = _mm_set1_epi16(i16::MIN);
20725        let r = _mm_cvtusepi16_epi8(a);
20726        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20727        assert_eq_m128i(r, e);
20728    }
20729
20730    #[simd_test(enable = "avx512bw,avx512vl")]
20731    unsafe fn test_mm_mask_cvtusepi16_epi8() {
20732        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20733        let a = _mm_set1_epi16(i16::MIN);
20734        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
20735        assert_eq_m128i(r, src);
20736        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
20737        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20738        assert_eq_m128i(r, e);
20739    }
20740
20741    #[simd_test(enable = "avx512bw,avx512vl")]
20742    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
20743        let a = _mm_set1_epi16(i16::MIN);
20744        let r = _mm_maskz_cvtusepi16_epi8(0, a);
20745        assert_eq_m128i(r, _mm_setzero_si128());
20746        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
20747        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20748        assert_eq_m128i(r, e);
20749    }
20750
20751    #[simd_test(enable = "avx512bw")]
20752    unsafe fn test_mm512_cvtepi8_epi16() {
20753        let a = _mm256_set1_epi8(2);
20754        let r = _mm512_cvtepi8_epi16(a);
20755        let e = _mm512_set1_epi16(2);
20756        assert_eq_m512i(r, e);
20757    }
20758
20759    #[simd_test(enable = "avx512bw")]
20760    unsafe fn test_mm512_mask_cvtepi8_epi16() {
20761        let src = _mm512_set1_epi16(1);
20762        let a = _mm256_set1_epi8(2);
20763        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
20764        assert_eq_m512i(r, src);
20765        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20766        let e = _mm512_set1_epi16(2);
20767        assert_eq_m512i(r, e);
20768    }
20769
20770    #[simd_test(enable = "avx512bw")]
20771    unsafe fn test_mm512_maskz_cvtepi8_epi16() {
20772        let a = _mm256_set1_epi8(2);
20773        let r = _mm512_maskz_cvtepi8_epi16(0, a);
20774        assert_eq_m512i(r, _mm512_setzero_si512());
20775        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
20776        let e = _mm512_set1_epi16(2);
20777        assert_eq_m512i(r, e);
20778    }
20779
20780    #[simd_test(enable = "avx512bw,avx512vl")]
20781    unsafe fn test_mm256_mask_cvtepi8_epi16() {
20782        let src = _mm256_set1_epi16(1);
20783        let a = _mm_set1_epi8(2);
20784        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
20785        assert_eq_m256i(r, src);
20786        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
20787        let e = _mm256_set1_epi16(2);
20788        assert_eq_m256i(r, e);
20789    }
20790
20791    #[simd_test(enable = "avx512bw,avx512vl")]
20792    unsafe fn test_mm256_maskz_cvtepi8_epi16() {
20793        let a = _mm_set1_epi8(2);
20794        let r = _mm256_maskz_cvtepi8_epi16(0, a);
20795        assert_eq_m256i(r, _mm256_setzero_si256());
20796        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
20797        let e = _mm256_set1_epi16(2);
20798        assert_eq_m256i(r, e);
20799    }
20800
20801    #[simd_test(enable = "avx512bw,avx512vl")]
20802    unsafe fn test_mm_mask_cvtepi8_epi16() {
20803        let src = _mm_set1_epi16(1);
20804        let a = _mm_set1_epi8(2);
20805        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
20806        assert_eq_m128i(r, src);
20807        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
20808        let e = _mm_set1_epi16(2);
20809        assert_eq_m128i(r, e);
20810    }
20811
20812    #[simd_test(enable = "avx512bw,avx512vl")]
20813    unsafe fn test_mm_maskz_cvtepi8_epi16() {
20814        let a = _mm_set1_epi8(2);
20815        let r = _mm_maskz_cvtepi8_epi16(0, a);
20816        assert_eq_m128i(r, _mm_setzero_si128());
20817        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
20818        let e = _mm_set1_epi16(2);
20819        assert_eq_m128i(r, e);
20820    }
20821
20822    #[simd_test(enable = "avx512bw")]
20823    unsafe fn test_mm512_cvtepu8_epi16() {
20824        let a = _mm256_set1_epi8(2);
20825        let r = _mm512_cvtepu8_epi16(a);
20826        let e = _mm512_set1_epi16(2);
20827        assert_eq_m512i(r, e);
20828    }
20829
20830    #[simd_test(enable = "avx512bw")]
20831    unsafe fn test_mm512_mask_cvtepu8_epi16() {
20832        let src = _mm512_set1_epi16(1);
20833        let a = _mm256_set1_epi8(2);
20834        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
20835        assert_eq_m512i(r, src);
20836        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20837        let e = _mm512_set1_epi16(2);
20838        assert_eq_m512i(r, e);
20839    }
20840
20841    #[simd_test(enable = "avx512bw")]
20842    unsafe fn test_mm512_maskz_cvtepu8_epi16() {
20843        let a = _mm256_set1_epi8(2);
20844        let r = _mm512_maskz_cvtepu8_epi16(0, a);
20845        assert_eq_m512i(r, _mm512_setzero_si512());
20846        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
20847        let e = _mm512_set1_epi16(2);
20848        assert_eq_m512i(r, e);
20849    }
20850
20851    #[simd_test(enable = "avx512bw,avx512vl")]
20852    unsafe fn test_mm256_mask_cvtepu8_epi16() {
20853        let src = _mm256_set1_epi16(1);
20854        let a = _mm_set1_epi8(2);
20855        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
20856        assert_eq_m256i(r, src);
20857        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
20858        let e = _mm256_set1_epi16(2);
20859        assert_eq_m256i(r, e);
20860    }
20861
20862    #[simd_test(enable = "avx512bw,avx512vl")]
20863    unsafe fn test_mm256_maskz_cvtepu8_epi16() {
20864        let a = _mm_set1_epi8(2);
20865        let r = _mm256_maskz_cvtepu8_epi16(0, a);
20866        assert_eq_m256i(r, _mm256_setzero_si256());
20867        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
20868        let e = _mm256_set1_epi16(2);
20869        assert_eq_m256i(r, e);
20870    }
20871
20872    #[simd_test(enable = "avx512bw,avx512vl")]
20873    unsafe fn test_mm_mask_cvtepu8_epi16() {
20874        let src = _mm_set1_epi16(1);
20875        let a = _mm_set1_epi8(2);
20876        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
20877        assert_eq_m128i(r, src);
20878        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
20879        let e = _mm_set1_epi16(2);
20880        assert_eq_m128i(r, e);
20881    }
20882
20883    #[simd_test(enable = "avx512bw,avx512vl")]
20884    unsafe fn test_mm_maskz_cvtepu8_epi16() {
20885        let a = _mm_set1_epi8(2);
20886        let r = _mm_maskz_cvtepu8_epi16(0, a);
20887        assert_eq_m128i(r, _mm_setzero_si128());
20888        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
20889        let e = _mm_set1_epi16(2);
20890        assert_eq_m128i(r, e);
20891    }
20892
20893    #[simd_test(enable = "avx512bw")]
20894    unsafe fn test_mm512_bslli_epi128() {
20895        #[rustfmt::skip]
20896        let a = _mm512_set_epi8(
20897            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20898            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20899            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20900            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20901        );
20902        let r = _mm512_bslli_epi128::<9>(a);
20903        #[rustfmt::skip]
20904        let e = _mm512_set_epi8(
20905            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20906            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20907            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20908            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20909        );
20910        assert_eq_m512i(r, e);
20911    }
20912
20913    #[simd_test(enable = "avx512bw")]
20914    unsafe fn test_mm512_bsrli_epi128() {
20915        #[rustfmt::skip]
20916        let a = _mm512_set_epi8(
20917            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
20918            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
20919            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
20920            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
20921        );
20922        let r = _mm512_bsrli_epi128::<3>(a);
20923        #[rustfmt::skip]
20924        let e = _mm512_set_epi8(
20925            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
20926            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
20927            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
20928            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
20929        );
20930        assert_eq_m512i(r, e);
20931    }
20932
20933    #[simd_test(enable = "avx512bw")]
20934    unsafe fn test_mm512_alignr_epi8() {
20935        #[rustfmt::skip]
20936        let a = _mm512_set_epi8(
20937            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20938            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20939            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20940            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20941        );
20942        let b = _mm512_set1_epi8(1);
20943        let r = _mm512_alignr_epi8::<14>(a, b);
20944        #[rustfmt::skip]
20945        let e = _mm512_set_epi8(
20946            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20947            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20948            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20949            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20950        );
20951        assert_eq_m512i(r, e);
20952    }
20953
20954    #[simd_test(enable = "avx512bw")]
20955    unsafe fn test_mm512_mask_alignr_epi8() {
20956        #[rustfmt::skip]
20957        let a = _mm512_set_epi8(
20958            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20959            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20960            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20961            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20962        );
20963        let b = _mm512_set1_epi8(1);
20964        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
20965        assert_eq_m512i(r, a);
20966        let r = _mm512_mask_alignr_epi8::<14>(
20967            a,
20968            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20969            a,
20970            b,
20971        );
20972        #[rustfmt::skip]
20973        let e = _mm512_set_epi8(
20974            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20975            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20976            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20977            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20978        );
20979        assert_eq_m512i(r, e);
20980    }
20981
20982    #[simd_test(enable = "avx512bw")]
20983    unsafe fn test_mm512_maskz_alignr_epi8() {
20984        #[rustfmt::skip]
20985        let a = _mm512_set_epi8(
20986            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20987            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20988            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20989            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20990        );
20991        let b = _mm512_set1_epi8(1);
20992        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
20993        assert_eq_m512i(r, _mm512_setzero_si512());
20994        let r = _mm512_maskz_alignr_epi8::<14>(
20995            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20996            a,
20997            b,
20998        );
20999        #[rustfmt::skip]
21000        let e = _mm512_set_epi8(
21001            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21002            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21003            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21004            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21005        );
21006        assert_eq_m512i(r, e);
21007    }
21008
21009    #[simd_test(enable = "avx512bw,avx512vl")]
21010    unsafe fn test_mm256_mask_alignr_epi8() {
21011        #[rustfmt::skip]
21012        let a = _mm256_set_epi8(
21013            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21014            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21015        );
21016        let b = _mm256_set1_epi8(1);
21017        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21018        assert_eq_m256i(r, a);
21019        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21020        #[rustfmt::skip]
21021        let e = _mm256_set_epi8(
21022            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21023            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21024        );
21025        assert_eq_m256i(r, e);
21026    }
21027
21028    #[simd_test(enable = "avx512bw,avx512vl")]
21029    unsafe fn test_mm256_maskz_alignr_epi8() {
21030        #[rustfmt::skip]
21031        let a = _mm256_set_epi8(
21032            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21033            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21034        );
21035        let b = _mm256_set1_epi8(1);
21036        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21037        assert_eq_m256i(r, _mm256_setzero_si256());
21038        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21039        #[rustfmt::skip]
21040        let e = _mm256_set_epi8(
21041            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21042            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21043        );
21044        assert_eq_m256i(r, e);
21045    }
21046
21047    #[simd_test(enable = "avx512bw,avx512vl")]
21048    unsafe fn test_mm_mask_alignr_epi8() {
21049        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21050        let b = _mm_set1_epi8(1);
21051        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21052        assert_eq_m128i(r, a);
21053        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21054        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21055        assert_eq_m128i(r, e);
21056    }
21057
21058    #[simd_test(enable = "avx512bw,avx512vl")]
21059    unsafe fn test_mm_maskz_alignr_epi8() {
21060        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21061        let b = _mm_set1_epi8(1);
21062        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21063        assert_eq_m128i(r, _mm_setzero_si128());
21064        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21065        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21066        assert_eq_m128i(r, e);
21067    }
21068
21069    #[simd_test(enable = "avx512bw")]
21070    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21071        let a = _mm512_set1_epi16(i16::MAX);
21072        let mut r = _mm256_undefined_si256();
21073        _mm512_mask_cvtsepi16_storeu_epi8(
21074            &mut r as *mut _ as *mut i8,
21075            0b11111111_11111111_11111111_11111111,
21076            a,
21077        );
21078        let e = _mm256_set1_epi8(i8::MAX);
21079        assert_eq_m256i(r, e);
21080    }
21081
21082    #[simd_test(enable = "avx512bw,avx512vl")]
21083    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21084        let a = _mm256_set1_epi16(i16::MAX);
21085        let mut r = _mm_undefined_si128();
21086        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21087        let e = _mm_set1_epi8(i8::MAX);
21088        assert_eq_m128i(r, e);
21089    }
21090
21091    #[simd_test(enable = "avx512bw,avx512vl")]
21092    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
21093        let a = _mm_set1_epi16(i16::MAX);
21094        let mut r = _mm_set1_epi8(0);
21095        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21096        #[rustfmt::skip]
21097        let e = _mm_set_epi8(
21098            0, 0, 0, 0, 0, 0, 0, 0,
21099            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
21100        );
21101        assert_eq_m128i(r, e);
21102    }
21103
21104    #[simd_test(enable = "avx512bw")]
21105    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
21106        let a = _mm512_set1_epi16(8);
21107        let mut r = _mm256_undefined_si256();
21108        _mm512_mask_cvtepi16_storeu_epi8(
21109            &mut r as *mut _ as *mut i8,
21110            0b11111111_11111111_11111111_11111111,
21111            a,
21112        );
21113        let e = _mm256_set1_epi8(8);
21114        assert_eq_m256i(r, e);
21115    }
21116
21117    #[simd_test(enable = "avx512bw,avx512vl")]
21118    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
21119        let a = _mm256_set1_epi16(8);
21120        let mut r = _mm_undefined_si128();
21121        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21122        let e = _mm_set1_epi8(8);
21123        assert_eq_m128i(r, e);
21124    }
21125
21126    #[simd_test(enable = "avx512bw,avx512vl")]
21127    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
21128        let a = _mm_set1_epi16(8);
21129        let mut r = _mm_set1_epi8(0);
21130        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21131        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
21132        assert_eq_m128i(r, e);
21133    }
21134
21135    #[simd_test(enable = "avx512bw")]
21136    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
21137        let a = _mm512_set1_epi16(i16::MAX);
21138        let mut r = _mm256_undefined_si256();
21139        _mm512_mask_cvtusepi16_storeu_epi8(
21140            &mut r as *mut _ as *mut i8,
21141            0b11111111_11111111_11111111_11111111,
21142            a,
21143        );
21144        let e = _mm256_set1_epi8(u8::MAX as i8);
21145        assert_eq_m256i(r, e);
21146    }
21147
21148    #[simd_test(enable = "avx512bw,avx512vl")]
21149    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
21150        let a = _mm256_set1_epi16(i16::MAX);
21151        let mut r = _mm_undefined_si128();
21152        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21153        let e = _mm_set1_epi8(u8::MAX as i8);
21154        assert_eq_m128i(r, e);
21155    }
21156
21157    #[simd_test(enable = "avx512bw,avx512vl")]
21158    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
21159        let a = _mm_set1_epi16(i16::MAX);
21160        let mut r = _mm_set1_epi8(0);
21161        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21162        #[rustfmt::skip]
21163        let e = _mm_set_epi8(
21164            0, 0, 0, 0,
21165            0, 0, 0, 0,
21166            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, 
21167            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
21168        );
21169        assert_eq_m128i(r, e);
21170    }
21171}