core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17pub fn _mm512_abs_epi16(a: __m512i) -> __m512i {
18    unsafe {
19        let a = a.as_i16x32();
20        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
21        transmute(simd_select(cmp, a, simd_neg(a)))
22    }
23}
24
25/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26///
27/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
28#[inline]
29#[target_feature(enable = "avx512bw")]
30#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31#[cfg_attr(test, assert_instr(vpabsw))]
32pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
33    unsafe {
34        let abs = _mm512_abs_epi16(a).as_i16x32();
35        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
36    }
37}
38
39/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
42#[inline]
43#[target_feature(enable = "avx512bw")]
44#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
45#[cfg_attr(test, assert_instr(vpabsw))]
46pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
47    unsafe {
48        let abs = _mm512_abs_epi16(a).as_i16x32();
49        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
50    }
51}
52
53/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
56#[inline]
57#[target_feature(enable = "avx512bw,avx512vl")]
58#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
59#[cfg_attr(test, assert_instr(vpabsw))]
60pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
61    unsafe {
62        let abs = _mm256_abs_epi16(a).as_i16x16();
63        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
64    }
65}
66
67/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
68///
69/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
70#[inline]
71#[target_feature(enable = "avx512bw,avx512vl")]
72#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
73#[cfg_attr(test, assert_instr(vpabsw))]
74pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
75    unsafe {
76        let abs = _mm256_abs_epi16(a).as_i16x16();
77        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
78    }
79}
80
81/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
84#[inline]
85#[target_feature(enable = "avx512bw,avx512vl")]
86#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
87#[cfg_attr(test, assert_instr(vpabsw))]
88pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
89    unsafe {
90        let abs = _mm_abs_epi16(a).as_i16x8();
91        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
92    }
93}
94
95/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
96///
97/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
98#[inline]
99#[target_feature(enable = "avx512bw,avx512vl")]
100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
101#[cfg_attr(test, assert_instr(vpabsw))]
102pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
103    unsafe {
104        let abs = _mm_abs_epi16(a).as_i16x8();
105        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
106    }
107}
108
109/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
110///
111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
112#[inline]
113#[target_feature(enable = "avx512bw")]
114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
115#[cfg_attr(test, assert_instr(vpabsb))]
116pub fn _mm512_abs_epi8(a: __m512i) -> __m512i {
117    unsafe {
118        let a = a.as_i8x64();
119        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
120        transmute(simd_select(cmp, a, simd_neg(a)))
121    }
122}
123
124/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
127#[inline]
128#[target_feature(enable = "avx512bw")]
129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130#[cfg_attr(test, assert_instr(vpabsb))]
131pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
132    unsafe {
133        let abs = _mm512_abs_epi8(a).as_i8x64();
134        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
135    }
136}
137
138/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
141#[inline]
142#[target_feature(enable = "avx512bw")]
143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
144#[cfg_attr(test, assert_instr(vpabsb))]
145pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
146    unsafe {
147        let abs = _mm512_abs_epi8(a).as_i8x64();
148        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
149    }
150}
151
152/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
155#[inline]
156#[target_feature(enable = "avx512bw,avx512vl")]
157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
158#[cfg_attr(test, assert_instr(vpabsb))]
159pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
160    unsafe {
161        let abs = _mm256_abs_epi8(a).as_i8x32();
162        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
163    }
164}
165
166/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
167///
168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
169#[inline]
170#[target_feature(enable = "avx512bw,avx512vl")]
171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
172#[cfg_attr(test, assert_instr(vpabsb))]
173pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
174    unsafe {
175        let abs = _mm256_abs_epi8(a).as_i8x32();
176        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
177    }
178}
179
180/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
181///
182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
183#[inline]
184#[target_feature(enable = "avx512bw,avx512vl")]
185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
186#[cfg_attr(test, assert_instr(vpabsb))]
187pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
188    unsafe {
189        let abs = _mm_abs_epi8(a).as_i8x16();
190        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
191    }
192}
193
194/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
195///
196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
197#[inline]
198#[target_feature(enable = "avx512bw,avx512vl")]
199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
200#[cfg_attr(test, assert_instr(vpabsb))]
201pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
202    unsafe {
203        let abs = _mm_abs_epi8(a).as_i8x16();
204        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
205    }
206}
207
208/// Add packed 16-bit integers in a and b, and store the results in dst.
209///
210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
211#[inline]
212#[target_feature(enable = "avx512bw")]
213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
214#[cfg_attr(test, assert_instr(vpaddw))]
215pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
216    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
217}
218
219/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
220///
221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
222#[inline]
223#[target_feature(enable = "avx512bw")]
224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
225#[cfg_attr(test, assert_instr(vpaddw))]
226pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
227    unsafe {
228        let add = _mm512_add_epi16(a, b).as_i16x32();
229        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
230    }
231}
232
233/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
234///
235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
236#[inline]
237#[target_feature(enable = "avx512bw")]
238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
239#[cfg_attr(test, assert_instr(vpaddw))]
240pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
241    unsafe {
242        let add = _mm512_add_epi16(a, b).as_i16x32();
243        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
244    }
245}
246
247/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
248///
249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
250#[inline]
251#[target_feature(enable = "avx512bw,avx512vl")]
252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
253#[cfg_attr(test, assert_instr(vpaddw))]
254pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
255    unsafe {
256        let add = _mm256_add_epi16(a, b).as_i16x16();
257        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
258    }
259}
260
261/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
264#[inline]
265#[target_feature(enable = "avx512bw,avx512vl")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpaddw))]
268pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
269    unsafe {
270        let add = _mm256_add_epi16(a, b).as_i16x16();
271        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
272    }
273}
274
275/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
278#[inline]
279#[target_feature(enable = "avx512bw,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr(vpaddw))]
282pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
283    unsafe {
284        let add = _mm_add_epi16(a, b).as_i16x8();
285        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
286    }
287}
288
289/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
292#[inline]
293#[target_feature(enable = "avx512bw,avx512vl")]
294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
295#[cfg_attr(test, assert_instr(vpaddw))]
296pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
297    unsafe {
298        let add = _mm_add_epi16(a, b).as_i16x8();
299        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
300    }
301}
302
303/// Add packed 8-bit integers in a and b, and store the results in dst.
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
306#[inline]
307#[target_feature(enable = "avx512bw")]
308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
309#[cfg_attr(test, assert_instr(vpaddb))]
310pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
311    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
312}
313
314/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
315///
316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
317#[inline]
318#[target_feature(enable = "avx512bw")]
319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
320#[cfg_attr(test, assert_instr(vpaddb))]
321pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
322    unsafe {
323        let add = _mm512_add_epi8(a, b).as_i8x64();
324        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
325    }
326}
327
328/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
329///
330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
331#[inline]
332#[target_feature(enable = "avx512bw")]
333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
334#[cfg_attr(test, assert_instr(vpaddb))]
335pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
336    unsafe {
337        let add = _mm512_add_epi8(a, b).as_i8x64();
338        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
339    }
340}
341
342/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
343///
344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
345#[inline]
346#[target_feature(enable = "avx512bw,avx512vl")]
347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
348#[cfg_attr(test, assert_instr(vpaddb))]
349pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
350    unsafe {
351        let add = _mm256_add_epi8(a, b).as_i8x32();
352        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
353    }
354}
355
356/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
357///
358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
359#[inline]
360#[target_feature(enable = "avx512bw,avx512vl")]
361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
362#[cfg_attr(test, assert_instr(vpaddb))]
363pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
364    unsafe {
365        let add = _mm256_add_epi8(a, b).as_i8x32();
366        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
367    }
368}
369
370/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
371///
372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
373#[inline]
374#[target_feature(enable = "avx512bw,avx512vl")]
375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
376#[cfg_attr(test, assert_instr(vpaddb))]
377pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
378    unsafe {
379        let add = _mm_add_epi8(a, b).as_i8x16();
380        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
381    }
382}
383
384/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
387#[inline]
388#[target_feature(enable = "avx512bw,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vpaddb))]
391pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
392    unsafe {
393        let add = _mm_add_epi8(a, b).as_i8x16();
394        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
395    }
396}
397
398/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
399///
400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
401#[inline]
402#[target_feature(enable = "avx512bw")]
403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
404#[cfg_attr(test, assert_instr(vpaddusw))]
405pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
406    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
407}
408
409/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
410///
411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
412#[inline]
413#[target_feature(enable = "avx512bw")]
414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
415#[cfg_attr(test, assert_instr(vpaddusw))]
416pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
417    unsafe {
418        let add = _mm512_adds_epu16(a, b).as_u16x32();
419        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
420    }
421}
422
423/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
424///
425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
426#[inline]
427#[target_feature(enable = "avx512bw")]
428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429#[cfg_attr(test, assert_instr(vpaddusw))]
430pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
431    unsafe {
432        let add = _mm512_adds_epu16(a, b).as_u16x32();
433        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
434    }
435}
436
437/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
438///
439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
440#[inline]
441#[target_feature(enable = "avx512bw,avx512vl")]
442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
443#[cfg_attr(test, assert_instr(vpaddusw))]
444pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
445    unsafe {
446        let add = _mm256_adds_epu16(a, b).as_u16x16();
447        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
448    }
449}
450
451/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
454#[inline]
455#[target_feature(enable = "avx512bw,avx512vl")]
456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
457#[cfg_attr(test, assert_instr(vpaddusw))]
458pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
459    unsafe {
460        let add = _mm256_adds_epu16(a, b).as_u16x16();
461        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
462    }
463}
464
465/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
466///
467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
468#[inline]
469#[target_feature(enable = "avx512bw,avx512vl")]
470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
471#[cfg_attr(test, assert_instr(vpaddusw))]
472pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
473    unsafe {
474        let add = _mm_adds_epu16(a, b).as_u16x8();
475        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
476    }
477}
478
479/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
480///
481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
482#[inline]
483#[target_feature(enable = "avx512bw,avx512vl")]
484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
485#[cfg_attr(test, assert_instr(vpaddusw))]
486pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
487    unsafe {
488        let add = _mm_adds_epu16(a, b).as_u16x8();
489        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
490    }
491}
492
493/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
494///
495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
496#[inline]
497#[target_feature(enable = "avx512bw")]
498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
499#[cfg_attr(test, assert_instr(vpaddusb))]
500pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
501    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
502}
503
504/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
507#[inline]
508#[target_feature(enable = "avx512bw")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vpaddusb))]
511pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
512    unsafe {
513        let add = _mm512_adds_epu8(a, b).as_u8x64();
514        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
515    }
516}
517
518/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
519///
520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
521#[inline]
522#[target_feature(enable = "avx512bw")]
523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
524#[cfg_attr(test, assert_instr(vpaddusb))]
525pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
526    unsafe {
527        let add = _mm512_adds_epu8(a, b).as_u8x64();
528        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
529    }
530}
531
532/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
533///
534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
535#[inline]
536#[target_feature(enable = "avx512bw,avx512vl")]
537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
538#[cfg_attr(test, assert_instr(vpaddusb))]
539pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
540    unsafe {
541        let add = _mm256_adds_epu8(a, b).as_u8x32();
542        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
543    }
544}
545
546/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
547///
548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
549#[inline]
550#[target_feature(enable = "avx512bw,avx512vl")]
551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
552#[cfg_attr(test, assert_instr(vpaddusb))]
553pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
554    unsafe {
555        let add = _mm256_adds_epu8(a, b).as_u8x32();
556        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
557    }
558}
559
560/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
561///
562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
563#[inline]
564#[target_feature(enable = "avx512bw,avx512vl")]
565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
566#[cfg_attr(test, assert_instr(vpaddusb))]
567pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
568    unsafe {
569        let add = _mm_adds_epu8(a, b).as_u8x16();
570        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
571    }
572}
573
574/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
575///
576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
577#[inline]
578#[target_feature(enable = "avx512bw,avx512vl")]
579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
580#[cfg_attr(test, assert_instr(vpaddusb))]
581pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
582    unsafe {
583        let add = _mm_adds_epu8(a, b).as_u8x16();
584        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
585    }
586}
587
588/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
589///
590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
591#[inline]
592#[target_feature(enable = "avx512bw")]
593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
594#[cfg_attr(test, assert_instr(vpaddsw))]
595pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
596    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
597}
598
599/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
600///
601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
602#[inline]
603#[target_feature(enable = "avx512bw")]
604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
605#[cfg_attr(test, assert_instr(vpaddsw))]
606pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
607    unsafe {
608        let add = _mm512_adds_epi16(a, b).as_i16x32();
609        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
610    }
611}
612
613/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
614///
615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
616#[inline]
617#[target_feature(enable = "avx512bw")]
618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
619#[cfg_attr(test, assert_instr(vpaddsw))]
620pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
621    unsafe {
622        let add = _mm512_adds_epi16(a, b).as_i16x32();
623        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
624    }
625}
626
627/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
628///
629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
630#[inline]
631#[target_feature(enable = "avx512bw,avx512vl")]
632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
633#[cfg_attr(test, assert_instr(vpaddsw))]
634pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
635    unsafe {
636        let add = _mm256_adds_epi16(a, b).as_i16x16();
637        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
638    }
639}
640
641/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
642///
643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
644#[inline]
645#[target_feature(enable = "avx512bw,avx512vl")]
646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
647#[cfg_attr(test, assert_instr(vpaddsw))]
648pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
649    unsafe {
650        let add = _mm256_adds_epi16(a, b).as_i16x16();
651        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
652    }
653}
654
655/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
656///
657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
658#[inline]
659#[target_feature(enable = "avx512bw,avx512vl")]
660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
661#[cfg_attr(test, assert_instr(vpaddsw))]
662pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
663    unsafe {
664        let add = _mm_adds_epi16(a, b).as_i16x8();
665        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
666    }
667}
668
669/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
672#[inline]
673#[target_feature(enable = "avx512bw,avx512vl")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddsw))]
676pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
677    unsafe {
678        let add = _mm_adds_epi16(a, b).as_i16x8();
679        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
680    }
681}
682
683/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
684///
685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
686#[inline]
687#[target_feature(enable = "avx512bw")]
688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
689#[cfg_attr(test, assert_instr(vpaddsb))]
690pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
691    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
692}
693
694/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
695///
696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
697#[inline]
698#[target_feature(enable = "avx512bw")]
699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
700#[cfg_attr(test, assert_instr(vpaddsb))]
701pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
702    unsafe {
703        let add = _mm512_adds_epi8(a, b).as_i8x64();
704        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
705    }
706}
707
708/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
709///
710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
711#[inline]
712#[target_feature(enable = "avx512bw")]
713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
714#[cfg_attr(test, assert_instr(vpaddsb))]
715pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
716    unsafe {
717        let add = _mm512_adds_epi8(a, b).as_i8x64();
718        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
719    }
720}
721
722/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
723///
724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
725#[inline]
726#[target_feature(enable = "avx512bw,avx512vl")]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728#[cfg_attr(test, assert_instr(vpaddsb))]
729pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
730    unsafe {
731        let add = _mm256_adds_epi8(a, b).as_i8x32();
732        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
733    }
734}
735
736/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
737///
738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
739#[inline]
740#[target_feature(enable = "avx512bw,avx512vl")]
741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
742#[cfg_attr(test, assert_instr(vpaddsb))]
743pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
744    unsafe {
745        let add = _mm256_adds_epi8(a, b).as_i8x32();
746        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
747    }
748}
749
750/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
751///
752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
753#[inline]
754#[target_feature(enable = "avx512bw,avx512vl")]
755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
756#[cfg_attr(test, assert_instr(vpaddsb))]
757pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
758    unsafe {
759        let add = _mm_adds_epi8(a, b).as_i8x16();
760        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
761    }
762}
763
764/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
765///
766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
767#[inline]
768#[target_feature(enable = "avx512bw,avx512vl")]
769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
770#[cfg_attr(test, assert_instr(vpaddsb))]
771pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
772    unsafe {
773        let add = _mm_adds_epi8(a, b).as_i8x16();
774        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
775    }
776}
777
778/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
779///
780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
781#[inline]
782#[target_feature(enable = "avx512bw")]
783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
784#[cfg_attr(test, assert_instr(vpsubw))]
785pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
786    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
787}
788
789/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
792#[inline]
793#[target_feature(enable = "avx512bw")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpsubw))]
796pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
797    unsafe {
798        let sub = _mm512_sub_epi16(a, b).as_i16x32();
799        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
800    }
801}
802
803/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
804///
805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
806#[inline]
807#[target_feature(enable = "avx512bw")]
808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
809#[cfg_attr(test, assert_instr(vpsubw))]
810pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
811    unsafe {
812        let sub = _mm512_sub_epi16(a, b).as_i16x32();
813        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
814    }
815}
816
817/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
818///
819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
820#[inline]
821#[target_feature(enable = "avx512bw,avx512vl")]
822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
823#[cfg_attr(test, assert_instr(vpsubw))]
824pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
825    unsafe {
826        let sub = _mm256_sub_epi16(a, b).as_i16x16();
827        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
828    }
829}
830
831/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
832///
833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
834#[inline]
835#[target_feature(enable = "avx512bw,avx512vl")]
836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
837#[cfg_attr(test, assert_instr(vpsubw))]
838pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
839    unsafe {
840        let sub = _mm256_sub_epi16(a, b).as_i16x16();
841        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
842    }
843}
844
845/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
846///
847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
848#[inline]
849#[target_feature(enable = "avx512bw,avx512vl")]
850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
851#[cfg_attr(test, assert_instr(vpsubw))]
852pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
853    unsafe {
854        let sub = _mm_sub_epi16(a, b).as_i16x8();
855        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
856    }
857}
858
859/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
860///
861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
862#[inline]
863#[target_feature(enable = "avx512bw,avx512vl")]
864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
865#[cfg_attr(test, assert_instr(vpsubw))]
866pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867    unsafe {
868        let sub = _mm_sub_epi16(a, b).as_i16x8();
869        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
870    }
871}
872
873/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
876#[inline]
877#[target_feature(enable = "avx512bw")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vpsubb))]
880pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
881    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
882}
883
884/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
885///
886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
887#[inline]
888#[target_feature(enable = "avx512bw")]
889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
890#[cfg_attr(test, assert_instr(vpsubb))]
891pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
892    unsafe {
893        let sub = _mm512_sub_epi8(a, b).as_i8x64();
894        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
895    }
896}
897
898/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
899///
900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
901#[inline]
902#[target_feature(enable = "avx512bw")]
903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
904#[cfg_attr(test, assert_instr(vpsubb))]
905pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
906    unsafe {
907        let sub = _mm512_sub_epi8(a, b).as_i8x64();
908        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
909    }
910}
911
912/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
913///
914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
915#[inline]
916#[target_feature(enable = "avx512bw,avx512vl")]
917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
918#[cfg_attr(test, assert_instr(vpsubb))]
919pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
920    unsafe {
921        let sub = _mm256_sub_epi8(a, b).as_i8x32();
922        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
923    }
924}
925
926/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
927///
928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
929#[inline]
930#[target_feature(enable = "avx512bw,avx512vl")]
931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
932#[cfg_attr(test, assert_instr(vpsubb))]
933pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
934    unsafe {
935        let sub = _mm256_sub_epi8(a, b).as_i8x32();
936        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
937    }
938}
939
940/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
941///
942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
943#[inline]
944#[target_feature(enable = "avx512bw,avx512vl")]
945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
946#[cfg_attr(test, assert_instr(vpsubb))]
947pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
948    unsafe {
949        let sub = _mm_sub_epi8(a, b).as_i8x16();
950        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
951    }
952}
953
954/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
955///
956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
957#[inline]
958#[target_feature(enable = "avx512bw,avx512vl")]
959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
960#[cfg_attr(test, assert_instr(vpsubb))]
961pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
962    unsafe {
963        let sub = _mm_sub_epi8(a, b).as_i8x16();
964        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
965    }
966}
967
968/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
969///
970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
971#[inline]
972#[target_feature(enable = "avx512bw")]
973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
974#[cfg_attr(test, assert_instr(vpsubusw))]
975pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
976    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
977}
978
979/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
980///
981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
982#[inline]
983#[target_feature(enable = "avx512bw")]
984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
985#[cfg_attr(test, assert_instr(vpsubusw))]
986pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
987    unsafe {
988        let sub = _mm512_subs_epu16(a, b).as_u16x32();
989        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
990    }
991}
992
993/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
996#[inline]
997#[target_feature(enable = "avx512bw")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubusw))]
1000pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1001    unsafe {
1002        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1003        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1004    }
1005}
1006
1007/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1010#[inline]
1011#[target_feature(enable = "avx512bw,avx512vl")]
1012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1013#[cfg_attr(test, assert_instr(vpsubusw))]
1014pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1015    unsafe {
1016        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1017        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1018    }
1019}
1020
1021/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1022///
1023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1024#[inline]
1025#[target_feature(enable = "avx512bw,avx512vl")]
1026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1027#[cfg_attr(test, assert_instr(vpsubusw))]
1028pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1029    unsafe {
1030        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1031        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1032    }
1033}
1034
1035/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1036///
1037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1038#[inline]
1039#[target_feature(enable = "avx512bw,avx512vl")]
1040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1041#[cfg_attr(test, assert_instr(vpsubusw))]
1042pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1043    unsafe {
1044        let sub = _mm_subs_epu16(a, b).as_u16x8();
1045        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1046    }
1047}
1048
1049/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1050///
1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1052#[inline]
1053#[target_feature(enable = "avx512bw,avx512vl")]
1054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1055#[cfg_attr(test, assert_instr(vpsubusw))]
1056pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1057    unsafe {
1058        let sub = _mm_subs_epu16(a, b).as_u16x8();
1059        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1060    }
1061}
1062
1063/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1064///
1065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1066#[inline]
1067#[target_feature(enable = "avx512bw")]
1068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1069#[cfg_attr(test, assert_instr(vpsubusb))]
1070pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1071    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1072}
1073
1074/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1075///
1076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1077#[inline]
1078#[target_feature(enable = "avx512bw")]
1079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1080#[cfg_attr(test, assert_instr(vpsubusb))]
1081pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1082    unsafe {
1083        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1084        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1085    }
1086}
1087
1088/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1089///
1090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1091#[inline]
1092#[target_feature(enable = "avx512bw")]
1093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1094#[cfg_attr(test, assert_instr(vpsubusb))]
1095pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1096    unsafe {
1097        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1098        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1099    }
1100}
1101
1102/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1103///
1104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1105#[inline]
1106#[target_feature(enable = "avx512bw,avx512vl")]
1107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1108#[cfg_attr(test, assert_instr(vpsubusb))]
1109pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1110    unsafe {
1111        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1112        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1113    }
1114}
1115
1116/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1117///
1118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1119#[inline]
1120#[target_feature(enable = "avx512bw,avx512vl")]
1121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1122#[cfg_attr(test, assert_instr(vpsubusb))]
1123pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1124    unsafe {
1125        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1126        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1127    }
1128}
1129
1130/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1131///
1132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1133#[inline]
1134#[target_feature(enable = "avx512bw,avx512vl")]
1135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1136#[cfg_attr(test, assert_instr(vpsubusb))]
1137pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1138    unsafe {
1139        let sub = _mm_subs_epu8(a, b).as_u8x16();
1140        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1141    }
1142}
1143
1144/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1145///
1146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1147#[inline]
1148#[target_feature(enable = "avx512bw,avx512vl")]
1149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1150#[cfg_attr(test, assert_instr(vpsubusb))]
1151pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1152    unsafe {
1153        let sub = _mm_subs_epu8(a, b).as_u8x16();
1154        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1155    }
1156}
1157
1158/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1159///
1160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1161#[inline]
1162#[target_feature(enable = "avx512bw")]
1163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1164#[cfg_attr(test, assert_instr(vpsubsw))]
1165pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1166    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1167}
1168
1169/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1170///
1171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1172#[inline]
1173#[target_feature(enable = "avx512bw")]
1174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1175#[cfg_attr(test, assert_instr(vpsubsw))]
1176pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1177    unsafe {
1178        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1179        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1180    }
1181}
1182
1183/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184///
1185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1186#[inline]
1187#[target_feature(enable = "avx512bw")]
1188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1189#[cfg_attr(test, assert_instr(vpsubsw))]
1190pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1191    unsafe {
1192        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1193        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1194    }
1195}
1196
1197/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubsw))]
1204pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1205    unsafe {
1206        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1207        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1208    }
1209}
1210
1211/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1212///
1213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1214#[inline]
1215#[target_feature(enable = "avx512bw,avx512vl")]
1216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1217#[cfg_attr(test, assert_instr(vpsubsw))]
1218pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1219    unsafe {
1220        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1221        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1222    }
1223}
1224
1225/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1226///
1227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1228#[inline]
1229#[target_feature(enable = "avx512bw,avx512vl")]
1230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1231#[cfg_attr(test, assert_instr(vpsubsw))]
1232pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1233    unsafe {
1234        let sub = _mm_subs_epi16(a, b).as_i16x8();
1235        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1236    }
1237}
1238
1239/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1240///
1241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1242#[inline]
1243#[target_feature(enable = "avx512bw,avx512vl")]
1244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1245#[cfg_attr(test, assert_instr(vpsubsw))]
1246pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1247    unsafe {
1248        let sub = _mm_subs_epi16(a, b).as_i16x8();
1249        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1250    }
1251}
1252
1253/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1254///
1255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1256#[inline]
1257#[target_feature(enable = "avx512bw")]
1258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1259#[cfg_attr(test, assert_instr(vpsubsb))]
1260pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1261    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1262}
1263
1264/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1265///
1266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1267#[inline]
1268#[target_feature(enable = "avx512bw")]
1269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1270#[cfg_attr(test, assert_instr(vpsubsb))]
1271pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1272    unsafe {
1273        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1274        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1275    }
1276}
1277
1278/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1279///
1280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1281#[inline]
1282#[target_feature(enable = "avx512bw")]
1283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1284#[cfg_attr(test, assert_instr(vpsubsb))]
1285pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1286    unsafe {
1287        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1288        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1289    }
1290}
1291
1292/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1293///
1294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1295#[inline]
1296#[target_feature(enable = "avx512bw,avx512vl")]
1297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1298#[cfg_attr(test, assert_instr(vpsubsb))]
1299pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1300    unsafe {
1301        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1302        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1303    }
1304}
1305
1306/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1307///
1308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1309#[inline]
1310#[target_feature(enable = "avx512bw,avx512vl")]
1311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1312#[cfg_attr(test, assert_instr(vpsubsb))]
1313pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1314    unsafe {
1315        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1316        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1317    }
1318}
1319
1320/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1321///
1322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1323#[inline]
1324#[target_feature(enable = "avx512bw,avx512vl")]
1325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1326#[cfg_attr(test, assert_instr(vpsubsb))]
1327pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1328    unsafe {
1329        let sub = _mm_subs_epi8(a, b).as_i8x16();
1330        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1331    }
1332}
1333
1334/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1335///
1336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1337#[inline]
1338#[target_feature(enable = "avx512bw,avx512vl")]
1339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1340#[cfg_attr(test, assert_instr(vpsubsb))]
1341pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1342    unsafe {
1343        let sub = _mm_subs_epi8(a, b).as_i8x16();
1344        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1345    }
1346}
1347
1348/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1349///
1350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1351#[inline]
1352#[target_feature(enable = "avx512bw")]
1353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1354#[cfg_attr(test, assert_instr(vpmulhuw))]
1355pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1356    unsafe {
1357        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1358        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1359        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1360        transmute(simd_cast::<u32x32, u16x32>(r))
1361    }
1362}
1363
1364/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1365///
1366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1367#[inline]
1368#[target_feature(enable = "avx512bw")]
1369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1370#[cfg_attr(test, assert_instr(vpmulhuw))]
1371pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1372    unsafe {
1373        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1374        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1375    }
1376}
1377
1378/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1379///
1380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1381#[inline]
1382#[target_feature(enable = "avx512bw")]
1383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1384#[cfg_attr(test, assert_instr(vpmulhuw))]
1385pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1386    unsafe {
1387        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1388        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1389    }
1390}
1391
1392/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1393///
1394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1395#[inline]
1396#[target_feature(enable = "avx512bw,avx512vl")]
1397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1398#[cfg_attr(test, assert_instr(vpmulhuw))]
1399pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1400    unsafe {
1401        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1402        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1403    }
1404}
1405
1406/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1407///
1408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1409#[inline]
1410#[target_feature(enable = "avx512bw,avx512vl")]
1411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1412#[cfg_attr(test, assert_instr(vpmulhuw))]
1413pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1414    unsafe {
1415        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1416        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1417    }
1418}
1419
1420/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1421///
1422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1423#[inline]
1424#[target_feature(enable = "avx512bw,avx512vl")]
1425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1426#[cfg_attr(test, assert_instr(vpmulhuw))]
1427pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1428    unsafe {
1429        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1430        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1431    }
1432}
1433
1434/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1435///
1436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1437#[inline]
1438#[target_feature(enable = "avx512bw,avx512vl")]
1439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1440#[cfg_attr(test, assert_instr(vpmulhuw))]
1441pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1442    unsafe {
1443        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1444        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1445    }
1446}
1447
1448/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1449///
1450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1451#[inline]
1452#[target_feature(enable = "avx512bw")]
1453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1454#[cfg_attr(test, assert_instr(vpmulhw))]
1455pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1456    unsafe {
1457        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1458        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1459        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1460        transmute(simd_cast::<i32x32, i16x32>(r))
1461    }
1462}
1463
1464/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1465///
1466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1467#[inline]
1468#[target_feature(enable = "avx512bw")]
1469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1470#[cfg_attr(test, assert_instr(vpmulhw))]
1471pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1472    unsafe {
1473        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1474        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1475    }
1476}
1477
1478/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1479///
1480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1481#[inline]
1482#[target_feature(enable = "avx512bw")]
1483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1484#[cfg_attr(test, assert_instr(vpmulhw))]
1485pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1486    unsafe {
1487        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1488        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1489    }
1490}
1491
1492/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1493///
1494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1495#[inline]
1496#[target_feature(enable = "avx512bw,avx512vl")]
1497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1498#[cfg_attr(test, assert_instr(vpmulhw))]
1499pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1500    unsafe {
1501        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1502        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1503    }
1504}
1505
1506/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1507///
1508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1509#[inline]
1510#[target_feature(enable = "avx512bw,avx512vl")]
1511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1512#[cfg_attr(test, assert_instr(vpmulhw))]
1513pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1514    unsafe {
1515        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1516        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1517    }
1518}
1519
1520/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1521///
1522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1523#[inline]
1524#[target_feature(enable = "avx512bw,avx512vl")]
1525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1526#[cfg_attr(test, assert_instr(vpmulhw))]
1527pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1528    unsafe {
1529        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1530        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1531    }
1532}
1533
1534/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1535///
1536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1537#[inline]
1538#[target_feature(enable = "avx512bw,avx512vl")]
1539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1540#[cfg_attr(test, assert_instr(vpmulhw))]
1541pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1544        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1545    }
1546}
1547
1548/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1551#[inline]
1552#[target_feature(enable = "avx512bw")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhrsw))]
1555pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1556    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1557}
1558
1559/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1560///
1561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1562#[inline]
1563#[target_feature(enable = "avx512bw")]
1564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1565#[cfg_attr(test, assert_instr(vpmulhrsw))]
1566pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1567    unsafe {
1568        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1569        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1570    }
1571}
1572
1573/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1574///
1575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1576#[inline]
1577#[target_feature(enable = "avx512bw")]
1578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1579#[cfg_attr(test, assert_instr(vpmulhrsw))]
1580pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1581    unsafe {
1582        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1583        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1584    }
1585}
1586
1587/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1588///
1589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1590#[inline]
1591#[target_feature(enable = "avx512bw,avx512vl")]
1592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1593#[cfg_attr(test, assert_instr(vpmulhrsw))]
1594pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1595    unsafe {
1596        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1597        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1598    }
1599}
1600
1601/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1602///
1603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1604#[inline]
1605#[target_feature(enable = "avx512bw,avx512vl")]
1606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1607#[cfg_attr(test, assert_instr(vpmulhrsw))]
1608pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1609    unsafe {
1610        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1611        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1612    }
1613}
1614
1615/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhrsw))]
1622pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1623    unsafe {
1624        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1625        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1626    }
1627}
1628
1629/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1630///
1631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1632#[inline]
1633#[target_feature(enable = "avx512bw,avx512vl")]
1634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1635#[cfg_attr(test, assert_instr(vpmulhrsw))]
1636pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1637    unsafe {
1638        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1639        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1640    }
1641}
1642
1643/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1644///
1645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1646#[inline]
1647#[target_feature(enable = "avx512bw")]
1648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1649#[cfg_attr(test, assert_instr(vpmullw))]
1650pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1651    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1652}
1653
1654/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1655///
1656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1657#[inline]
1658#[target_feature(enable = "avx512bw")]
1659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1660#[cfg_attr(test, assert_instr(vpmullw))]
1661pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1662    unsafe {
1663        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1664        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1665    }
1666}
1667
1668/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1671#[inline]
1672#[target_feature(enable = "avx512bw")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmullw))]
1675pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1676    unsafe {
1677        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1678        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1679    }
1680}
1681
1682/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1683///
1684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1685#[inline]
1686#[target_feature(enable = "avx512bw,avx512vl")]
1687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1688#[cfg_attr(test, assert_instr(vpmullw))]
1689pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1690    unsafe {
1691        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1692        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1693    }
1694}
1695
1696/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1697///
1698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1699#[inline]
1700#[target_feature(enable = "avx512bw,avx512vl")]
1701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1702#[cfg_attr(test, assert_instr(vpmullw))]
1703pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1704    unsafe {
1705        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1706        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1707    }
1708}
1709
1710/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1711///
1712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1713#[inline]
1714#[target_feature(enable = "avx512bw,avx512vl")]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[cfg_attr(test, assert_instr(vpmullw))]
1717pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1718    unsafe {
1719        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1720        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1721    }
1722}
1723
1724/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1725///
1726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1727#[inline]
1728#[target_feature(enable = "avx512bw,avx512vl")]
1729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1730#[cfg_attr(test, assert_instr(vpmullw))]
1731pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1732    unsafe {
1733        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1734        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1735    }
1736}
1737
1738/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1739///
1740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1741#[inline]
1742#[target_feature(enable = "avx512bw")]
1743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1744#[cfg_attr(test, assert_instr(vpmaxuw))]
1745pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1746    unsafe {
1747        let a = a.as_u16x32();
1748        let b = b.as_u16x32();
1749        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1750    }
1751}
1752
1753/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1754///
1755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1756#[inline]
1757#[target_feature(enable = "avx512bw")]
1758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1759#[cfg_attr(test, assert_instr(vpmaxuw))]
1760pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1761    unsafe {
1762        let max = _mm512_max_epu16(a, b).as_u16x32();
1763        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1764    }
1765}
1766
1767/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1768///
1769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1770#[inline]
1771#[target_feature(enable = "avx512bw")]
1772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1773#[cfg_attr(test, assert_instr(vpmaxuw))]
1774pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1775    unsafe {
1776        let max = _mm512_max_epu16(a, b).as_u16x32();
1777        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1778    }
1779}
1780
1781/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1782///
1783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1784#[inline]
1785#[target_feature(enable = "avx512bw,avx512vl")]
1786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1787#[cfg_attr(test, assert_instr(vpmaxuw))]
1788pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1789    unsafe {
1790        let max = _mm256_max_epu16(a, b).as_u16x16();
1791        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1792    }
1793}
1794
1795/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1796///
1797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1798#[inline]
1799#[target_feature(enable = "avx512bw,avx512vl")]
1800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1801#[cfg_attr(test, assert_instr(vpmaxuw))]
1802pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1803    unsafe {
1804        let max = _mm256_max_epu16(a, b).as_u16x16();
1805        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1806    }
1807}
1808
1809/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1812#[inline]
1813#[target_feature(enable = "avx512bw,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmaxuw))]
1816pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1817    unsafe {
1818        let max = _mm_max_epu16(a, b).as_u16x8();
1819        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1820    }
1821}
1822
1823/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1824///
1825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1826#[inline]
1827#[target_feature(enable = "avx512bw,avx512vl")]
1828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1829#[cfg_attr(test, assert_instr(vpmaxuw))]
1830pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1831    unsafe {
1832        let max = _mm_max_epu16(a, b).as_u16x8();
1833        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1834    }
1835}
1836
1837/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1840#[inline]
1841#[target_feature(enable = "avx512bw")]
1842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1843#[cfg_attr(test, assert_instr(vpmaxub))]
1844pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1845    unsafe {
1846        let a = a.as_u8x64();
1847        let b = b.as_u8x64();
1848        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1849    }
1850}
1851
1852/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1853///
1854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
1855#[inline]
1856#[target_feature(enable = "avx512bw")]
1857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1858#[cfg_attr(test, assert_instr(vpmaxub))]
1859pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1860    unsafe {
1861        let max = _mm512_max_epu8(a, b).as_u8x64();
1862        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1863    }
1864}
1865
1866/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
1869#[inline]
1870#[target_feature(enable = "avx512bw")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vpmaxub))]
1873pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1874    unsafe {
1875        let max = _mm512_max_epu8(a, b).as_u8x64();
1876        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
1877    }
1878}
1879
1880/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1881///
1882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
1883#[inline]
1884#[target_feature(enable = "avx512bw,avx512vl")]
1885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1886#[cfg_attr(test, assert_instr(vpmaxub))]
1887pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1888    unsafe {
1889        let max = _mm256_max_epu8(a, b).as_u8x32();
1890        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1891    }
1892}
1893
1894/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1895///
1896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
1897#[inline]
1898#[target_feature(enable = "avx512bw,avx512vl")]
1899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1900#[cfg_attr(test, assert_instr(vpmaxub))]
1901pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1902    unsafe {
1903        let max = _mm256_max_epu8(a, b).as_u8x32();
1904        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
1905    }
1906}
1907
1908/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1909///
1910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
1911#[inline]
1912#[target_feature(enable = "avx512bw,avx512vl")]
1913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1914#[cfg_attr(test, assert_instr(vpmaxub))]
1915pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1916    unsafe {
1917        let max = _mm_max_epu8(a, b).as_u8x16();
1918        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1919    }
1920}
1921
1922/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1923///
1924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
1925#[inline]
1926#[target_feature(enable = "avx512bw,avx512vl")]
1927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1928#[cfg_attr(test, assert_instr(vpmaxub))]
1929pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1930    unsafe {
1931        let max = _mm_max_epu8(a, b).as_u8x16();
1932        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
1933    }
1934}
1935
1936/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1937///
1938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
1939#[inline]
1940#[target_feature(enable = "avx512bw")]
1941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1942#[cfg_attr(test, assert_instr(vpmaxsw))]
1943pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1944    unsafe {
1945        let a = a.as_i16x32();
1946        let b = b.as_i16x32();
1947        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1948    }
1949}
1950
1951/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1952///
1953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
1954#[inline]
1955#[target_feature(enable = "avx512bw")]
1956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1957#[cfg_attr(test, assert_instr(vpmaxsw))]
1958pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1959    unsafe {
1960        let max = _mm512_max_epi16(a, b).as_i16x32();
1961        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1962    }
1963}
1964
1965/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1966///
1967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
1968#[inline]
1969#[target_feature(enable = "avx512bw")]
1970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1971#[cfg_attr(test, assert_instr(vpmaxsw))]
1972pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1973    unsafe {
1974        let max = _mm512_max_epi16(a, b).as_i16x32();
1975        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
1976    }
1977}
1978
1979/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1980///
1981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
1982#[inline]
1983#[target_feature(enable = "avx512bw,avx512vl")]
1984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1985#[cfg_attr(test, assert_instr(vpmaxsw))]
1986pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1987    unsafe {
1988        let max = _mm256_max_epi16(a, b).as_i16x16();
1989        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1990    }
1991}
1992
1993/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1994///
1995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
1996#[inline]
1997#[target_feature(enable = "avx512bw,avx512vl")]
1998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1999#[cfg_attr(test, assert_instr(vpmaxsw))]
2000pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2001    unsafe {
2002        let max = _mm256_max_epi16(a, b).as_i16x16();
2003        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2004    }
2005}
2006
2007/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2008///
2009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2010#[inline]
2011#[target_feature(enable = "avx512bw,avx512vl")]
2012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2013#[cfg_attr(test, assert_instr(vpmaxsw))]
2014pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2015    unsafe {
2016        let max = _mm_max_epi16(a, b).as_i16x8();
2017        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2018    }
2019}
2020
2021/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2022///
2023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2024#[inline]
2025#[target_feature(enable = "avx512bw,avx512vl")]
2026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2027#[cfg_attr(test, assert_instr(vpmaxsw))]
2028pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2029    unsafe {
2030        let max = _mm_max_epi16(a, b).as_i16x8();
2031        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2032    }
2033}
2034
2035/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2036///
2037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2038#[inline]
2039#[target_feature(enable = "avx512bw")]
2040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2041#[cfg_attr(test, assert_instr(vpmaxsb))]
2042pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2043    unsafe {
2044        let a = a.as_i8x64();
2045        let b = b.as_i8x64();
2046        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
2047    }
2048}
2049
2050/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2051///
2052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2053#[inline]
2054#[target_feature(enable = "avx512bw")]
2055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2056#[cfg_attr(test, assert_instr(vpmaxsb))]
2057pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2058    unsafe {
2059        let max = _mm512_max_epi8(a, b).as_i8x64();
2060        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2061    }
2062}
2063
2064/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2065///
2066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2067#[inline]
2068#[target_feature(enable = "avx512bw")]
2069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2070#[cfg_attr(test, assert_instr(vpmaxsb))]
2071pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2072    unsafe {
2073        let max = _mm512_max_epi8(a, b).as_i8x64();
2074        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2075    }
2076}
2077
2078/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2079///
2080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2081#[inline]
2082#[target_feature(enable = "avx512bw,avx512vl")]
2083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2084#[cfg_attr(test, assert_instr(vpmaxsb))]
2085pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2086    unsafe {
2087        let max = _mm256_max_epi8(a, b).as_i8x32();
2088        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2089    }
2090}
2091
2092/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2093///
2094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2095#[inline]
2096#[target_feature(enable = "avx512bw,avx512vl")]
2097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2098#[cfg_attr(test, assert_instr(vpmaxsb))]
2099pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2100    unsafe {
2101        let max = _mm256_max_epi8(a, b).as_i8x32();
2102        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2103    }
2104}
2105
2106/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2107///
2108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2109#[inline]
2110#[target_feature(enable = "avx512bw,avx512vl")]
2111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2112#[cfg_attr(test, assert_instr(vpmaxsb))]
2113pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2114    unsafe {
2115        let max = _mm_max_epi8(a, b).as_i8x16();
2116        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2117    }
2118}
2119
2120/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2121///
2122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2123#[inline]
2124#[target_feature(enable = "avx512bw,avx512vl")]
2125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2126#[cfg_attr(test, assert_instr(vpmaxsb))]
2127pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2128    unsafe {
2129        let max = _mm_max_epi8(a, b).as_i8x16();
2130        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2131    }
2132}
2133
2134/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2135///
2136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2137#[inline]
2138#[target_feature(enable = "avx512bw")]
2139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2140#[cfg_attr(test, assert_instr(vpminuw))]
2141pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2142    unsafe {
2143        let a = a.as_u16x32();
2144        let b = b.as_u16x32();
2145        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2146    }
2147}
2148
2149/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2150///
2151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2152#[inline]
2153#[target_feature(enable = "avx512bw")]
2154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2155#[cfg_attr(test, assert_instr(vpminuw))]
2156pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2157    unsafe {
2158        let min = _mm512_min_epu16(a, b).as_u16x32();
2159        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2160    }
2161}
2162
2163/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2166#[inline]
2167#[target_feature(enable = "avx512bw")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpminuw))]
2170pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2171    unsafe {
2172        let min = _mm512_min_epu16(a, b).as_u16x32();
2173        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2174    }
2175}
2176
2177/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2178///
2179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2180#[inline]
2181#[target_feature(enable = "avx512bw,avx512vl")]
2182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2183#[cfg_attr(test, assert_instr(vpminuw))]
2184pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2185    unsafe {
2186        let min = _mm256_min_epu16(a, b).as_u16x16();
2187        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2188    }
2189}
2190
2191/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2192///
2193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2194#[inline]
2195#[target_feature(enable = "avx512bw,avx512vl")]
2196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2197#[cfg_attr(test, assert_instr(vpminuw))]
2198pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2199    unsafe {
2200        let min = _mm256_min_epu16(a, b).as_u16x16();
2201        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2202    }
2203}
2204
2205/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2208#[inline]
2209#[target_feature(enable = "avx512bw,avx512vl")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpminuw))]
2212pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2213    unsafe {
2214        let min = _mm_min_epu16(a, b).as_u16x8();
2215        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2216    }
2217}
2218
2219/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2220///
2221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2222#[inline]
2223#[target_feature(enable = "avx512bw,avx512vl")]
2224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2225#[cfg_attr(test, assert_instr(vpminuw))]
2226pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2227    unsafe {
2228        let min = _mm_min_epu16(a, b).as_u16x8();
2229        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2230    }
2231}
2232
2233/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2234///
2235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2236#[inline]
2237#[target_feature(enable = "avx512bw")]
2238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2239#[cfg_attr(test, assert_instr(vpminub))]
2240pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2241    unsafe {
2242        let a = a.as_u8x64();
2243        let b = b.as_u8x64();
2244        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2245    }
2246}
2247
2248/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2249///
2250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2251#[inline]
2252#[target_feature(enable = "avx512bw")]
2253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2254#[cfg_attr(test, assert_instr(vpminub))]
2255pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2256    unsafe {
2257        let min = _mm512_min_epu8(a, b).as_u8x64();
2258        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2259    }
2260}
2261
2262/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2265#[inline]
2266#[target_feature(enable = "avx512bw")]
2267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2268#[cfg_attr(test, assert_instr(vpminub))]
2269pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2270    unsafe {
2271        let min = _mm512_min_epu8(a, b).as_u8x64();
2272        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2273    }
2274}
2275
2276/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2277///
2278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2279#[inline]
2280#[target_feature(enable = "avx512bw,avx512vl")]
2281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2282#[cfg_attr(test, assert_instr(vpminub))]
2283pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2284    unsafe {
2285        let min = _mm256_min_epu8(a, b).as_u8x32();
2286        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2287    }
2288}
2289
2290/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2291///
2292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2293#[inline]
2294#[target_feature(enable = "avx512bw,avx512vl")]
2295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2296#[cfg_attr(test, assert_instr(vpminub))]
2297pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2298    unsafe {
2299        let min = _mm256_min_epu8(a, b).as_u8x32();
2300        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2301    }
2302}
2303
2304/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2307#[inline]
2308#[target_feature(enable = "avx512bw,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpminub))]
2311pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2312    unsafe {
2313        let min = _mm_min_epu8(a, b).as_u8x16();
2314        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2315    }
2316}
2317
2318/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2319///
2320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2321#[inline]
2322#[target_feature(enable = "avx512bw,avx512vl")]
2323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2324#[cfg_attr(test, assert_instr(vpminub))]
2325pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2326    unsafe {
2327        let min = _mm_min_epu8(a, b).as_u8x16();
2328        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2329    }
2330}
2331
2332/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2333///
2334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2335#[inline]
2336#[target_feature(enable = "avx512bw")]
2337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2338#[cfg_attr(test, assert_instr(vpminsw))]
2339pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2340    unsafe {
2341        let a = a.as_i16x32();
2342        let b = b.as_i16x32();
2343        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2344    }
2345}
2346
2347/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2348///
2349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2350#[inline]
2351#[target_feature(enable = "avx512bw")]
2352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2353#[cfg_attr(test, assert_instr(vpminsw))]
2354pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2355    unsafe {
2356        let min = _mm512_min_epi16(a, b).as_i16x32();
2357        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2358    }
2359}
2360
2361/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2364#[inline]
2365#[target_feature(enable = "avx512bw")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpminsw))]
2368pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2369    unsafe {
2370        let min = _mm512_min_epi16(a, b).as_i16x32();
2371        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2372    }
2373}
2374
2375/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2376///
2377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2378#[inline]
2379#[target_feature(enable = "avx512bw,avx512vl")]
2380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2381#[cfg_attr(test, assert_instr(vpminsw))]
2382pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2383    unsafe {
2384        let min = _mm256_min_epi16(a, b).as_i16x16();
2385        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2386    }
2387}
2388
2389/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2390///
2391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2392#[inline]
2393#[target_feature(enable = "avx512bw,avx512vl")]
2394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2395#[cfg_attr(test, assert_instr(vpminsw))]
2396pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2397    unsafe {
2398        let min = _mm256_min_epi16(a, b).as_i16x16();
2399        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2400    }
2401}
2402
2403/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2406#[inline]
2407#[target_feature(enable = "avx512bw,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpminsw))]
2410pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2411    unsafe {
2412        let min = _mm_min_epi16(a, b).as_i16x8();
2413        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2414    }
2415}
2416
2417/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2418///
2419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2420#[inline]
2421#[target_feature(enable = "avx512bw,avx512vl")]
2422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2423#[cfg_attr(test, assert_instr(vpminsw))]
2424pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2425    unsafe {
2426        let min = _mm_min_epi16(a, b).as_i16x8();
2427        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2428    }
2429}
2430
2431/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2432///
2433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2434#[inline]
2435#[target_feature(enable = "avx512bw")]
2436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2437#[cfg_attr(test, assert_instr(vpminsb))]
2438pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2439    unsafe {
2440        let a = a.as_i8x64();
2441        let b = b.as_i8x64();
2442        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2443    }
2444}
2445
2446/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2447///
2448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2449#[inline]
2450#[target_feature(enable = "avx512bw")]
2451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2452#[cfg_attr(test, assert_instr(vpminsb))]
2453pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2454    unsafe {
2455        let min = _mm512_min_epi8(a, b).as_i8x64();
2456        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2457    }
2458}
2459
2460/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2463#[inline]
2464#[target_feature(enable = "avx512bw")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpminsb))]
2467pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2468    unsafe {
2469        let min = _mm512_min_epi8(a, b).as_i8x64();
2470        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2471    }
2472}
2473
2474/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2475///
2476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2477#[inline]
2478#[target_feature(enable = "avx512bw,avx512vl")]
2479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2480#[cfg_attr(test, assert_instr(vpminsb))]
2481pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2482    unsafe {
2483        let min = _mm256_min_epi8(a, b).as_i8x32();
2484        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2485    }
2486}
2487
2488/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2489///
2490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2491#[inline]
2492#[target_feature(enable = "avx512bw,avx512vl")]
2493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2494#[cfg_attr(test, assert_instr(vpminsb))]
2495pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2496    unsafe {
2497        let min = _mm256_min_epi8(a, b).as_i8x32();
2498        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2499    }
2500}
2501
2502/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2503///
2504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2505#[inline]
2506#[target_feature(enable = "avx512bw,avx512vl")]
2507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2508#[cfg_attr(test, assert_instr(vpminsb))]
2509pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2510    unsafe {
2511        let min = _mm_min_epi8(a, b).as_i8x16();
2512        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2513    }
2514}
2515
2516/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2517///
2518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2519#[inline]
2520#[target_feature(enable = "avx512bw,avx512vl")]
2521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2522#[cfg_attr(test, assert_instr(vpminsb))]
2523pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2524    unsafe {
2525        let min = _mm_min_epi8(a, b).as_i8x16();
2526        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2527    }
2528}
2529
2530/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2531///
2532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2533#[inline]
2534#[target_feature(enable = "avx512bw")]
2535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2536#[cfg_attr(test, assert_instr(vpcmp))]
2537pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2538    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2539}
2540
2541/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2544#[inline]
2545#[target_feature(enable = "avx512bw")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpcmp))]
2548pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2549    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2550}
2551
2552/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2553///
2554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2555#[inline]
2556#[target_feature(enable = "avx512bw,avx512vl")]
2557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2558#[cfg_attr(test, assert_instr(vpcmp))]
2559pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2560    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2561}
2562
2563/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2564///
2565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2566#[inline]
2567#[target_feature(enable = "avx512bw,avx512vl")]
2568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2569#[cfg_attr(test, assert_instr(vpcmp))]
2570pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2571    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2572}
2573
2574/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2575///
2576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2577#[inline]
2578#[target_feature(enable = "avx512bw,avx512vl")]
2579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2580#[cfg_attr(test, assert_instr(vpcmp))]
2581pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2582    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2583}
2584
2585/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2586///
2587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2588#[inline]
2589#[target_feature(enable = "avx512bw,avx512vl")]
2590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2591#[cfg_attr(test, assert_instr(vpcmp))]
2592pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2593    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2594}
2595
2596/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2597///
2598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2599#[inline]
2600#[target_feature(enable = "avx512bw")]
2601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2602#[cfg_attr(test, assert_instr(vpcmp))]
2603pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2604    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2605}
2606
2607/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2608///
2609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2610#[inline]
2611#[target_feature(enable = "avx512bw")]
2612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2613#[cfg_attr(test, assert_instr(vpcmp))]
2614pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2615    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2616}
2617
2618/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2619///
2620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2621#[inline]
2622#[target_feature(enable = "avx512bw,avx512vl")]
2623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2624#[cfg_attr(test, assert_instr(vpcmp))]
2625pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2626    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2627}
2628
2629/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2632#[inline]
2633#[target_feature(enable = "avx512bw,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vpcmp))]
2636pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2637    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2638}
2639
2640/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2641///
2642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2643#[inline]
2644#[target_feature(enable = "avx512bw,avx512vl")]
2645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2646#[cfg_attr(test, assert_instr(vpcmp))]
2647pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2648    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2649}
2650
2651/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2652///
2653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2654#[inline]
2655#[target_feature(enable = "avx512bw,avx512vl")]
2656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2657#[cfg_attr(test, assert_instr(vpcmp))]
2658pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2659    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2660}
2661
2662/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2663///
2664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2665#[inline]
2666#[target_feature(enable = "avx512bw")]
2667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2668#[cfg_attr(test, assert_instr(vpcmp))]
2669pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2670    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2671}
2672
2673/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2676#[inline]
2677#[target_feature(enable = "avx512bw")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpcmp))]
2680pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2681    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2682}
2683
2684/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2685///
2686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2687#[inline]
2688#[target_feature(enable = "avx512bw,avx512vl")]
2689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2690#[cfg_attr(test, assert_instr(vpcmp))]
2691pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2692    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2693}
2694
2695/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2698#[inline]
2699#[target_feature(enable = "avx512bw,avx512vl")]
2700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2701#[cfg_attr(test, assert_instr(vpcmp))]
2702pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2703    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2704}
2705
2706/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2707///
2708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2709#[inline]
2710#[target_feature(enable = "avx512bw,avx512vl")]
2711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2712#[cfg_attr(test, assert_instr(vpcmp))]
2713pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2714    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2715}
2716
2717/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2718///
2719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2720#[inline]
2721#[target_feature(enable = "avx512bw,avx512vl")]
2722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2723#[cfg_attr(test, assert_instr(vpcmp))]
2724pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2725    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2726}
2727
2728/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2731#[inline]
2732#[target_feature(enable = "avx512bw")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpcmp))]
2735pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2736    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2737}
2738
2739/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2742#[inline]
2743#[target_feature(enable = "avx512bw")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2747    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2748}
2749
2750/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2751///
2752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2753#[inline]
2754#[target_feature(enable = "avx512bw,avx512vl")]
2755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2756#[cfg_attr(test, assert_instr(vpcmp))]
2757pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2758    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2759}
2760
2761/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2762///
2763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2764#[inline]
2765#[target_feature(enable = "avx512bw,avx512vl")]
2766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2767#[cfg_attr(test, assert_instr(vpcmp))]
2768pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2769    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2770}
2771
2772/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2773///
2774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2775#[inline]
2776#[target_feature(enable = "avx512bw,avx512vl")]
2777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2778#[cfg_attr(test, assert_instr(vpcmp))]
2779pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2780    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2781}
2782
2783/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2784///
2785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2786#[inline]
2787#[target_feature(enable = "avx512bw,avx512vl")]
2788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2789#[cfg_attr(test, assert_instr(vpcmp))]
2790pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2791    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2792}
2793
2794/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2795///
2796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2797#[inline]
2798#[target_feature(enable = "avx512bw")]
2799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2800#[cfg_attr(test, assert_instr(vpcmp))]
2801pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2802    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
2803}
2804
2805/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2806///
2807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
2808#[inline]
2809#[target_feature(enable = "avx512bw")]
2810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2811#[cfg_attr(test, assert_instr(vpcmp))]
2812pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2813    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2814}
2815
2816/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2817///
2818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
2819#[inline]
2820#[target_feature(enable = "avx512bw,avx512vl")]
2821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2822#[cfg_attr(test, assert_instr(vpcmp))]
2823pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2824    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
2825}
2826
2827/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
2830#[inline]
2831#[target_feature(enable = "avx512bw,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpcmp))]
2834pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2835    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2836}
2837
2838/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2839///
2840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
2841#[inline]
2842#[target_feature(enable = "avx512bw,avx512vl")]
2843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2844#[cfg_attr(test, assert_instr(vpcmp))]
2845pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2846    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
2847}
2848
2849/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2850///
2851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
2852#[inline]
2853#[target_feature(enable = "avx512bw,avx512vl")]
2854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2855#[cfg_attr(test, assert_instr(vpcmp))]
2856pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2857    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2858}
2859
2860/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2861///
2862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
2863#[inline]
2864#[target_feature(enable = "avx512bw")]
2865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2866#[cfg_attr(test, assert_instr(vpcmp))]
2867pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2868    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
2869}
2870
2871/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
2874#[inline]
2875#[target_feature(enable = "avx512bw")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2879    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2880}
2881
2882/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2883///
2884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
2885#[inline]
2886#[target_feature(enable = "avx512bw,avx512vl")]
2887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2888#[cfg_attr(test, assert_instr(vpcmp))]
2889pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2890    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
2891}
2892
2893/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2894///
2895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
2896#[inline]
2897#[target_feature(enable = "avx512bw,avx512vl")]
2898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2899#[cfg_attr(test, assert_instr(vpcmp))]
2900pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2901    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2902}
2903
2904/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2905///
2906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
2907#[inline]
2908#[target_feature(enable = "avx512bw,avx512vl")]
2909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2910#[cfg_attr(test, assert_instr(vpcmp))]
2911pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2912    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
2913}
2914
2915/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2916///
2917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
2918#[inline]
2919#[target_feature(enable = "avx512bw,avx512vl")]
2920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2921#[cfg_attr(test, assert_instr(vpcmp))]
2922pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2923    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2924}
2925
2926/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
2929#[inline]
2930#[target_feature(enable = "avx512bw")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpcmp))]
2933pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2934    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
2935}
2936
2937/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2938///
2939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
2940#[inline]
2941#[target_feature(enable = "avx512bw")]
2942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2943#[cfg_attr(test, assert_instr(vpcmp))]
2944pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2945    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2946}
2947
2948/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2949///
2950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
2951#[inline]
2952#[target_feature(enable = "avx512bw,avx512vl")]
2953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2954#[cfg_attr(test, assert_instr(vpcmp))]
2955pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2956    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
2957}
2958
2959/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2960///
2961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
2962#[inline]
2963#[target_feature(enable = "avx512bw,avx512vl")]
2964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2965#[cfg_attr(test, assert_instr(vpcmp))]
2966pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2967    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2968}
2969
2970/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2971///
2972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
2973#[inline]
2974#[target_feature(enable = "avx512bw,avx512vl")]
2975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2976#[cfg_attr(test, assert_instr(vpcmp))]
2977pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2978    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
2979}
2980
2981/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2982///
2983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
2984#[inline]
2985#[target_feature(enable = "avx512bw,avx512vl")]
2986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2987#[cfg_attr(test, assert_instr(vpcmp))]
2988pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2989    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2990}
2991
2992/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2993///
2994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
2995#[inline]
2996#[target_feature(enable = "avx512bw")]
2997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2998#[cfg_attr(test, assert_instr(vpcmp))]
2999pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3000    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3001}
3002
3003/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3011    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3012}
3013
3014/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3017#[inline]
3018#[target_feature(enable = "avx512bw,avx512vl")]
3019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3020#[cfg_attr(test, assert_instr(vpcmp))]
3021pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3022    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3023}
3024
3025/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3028#[inline]
3029#[target_feature(enable = "avx512bw,avx512vl")]
3030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3031#[cfg_attr(test, assert_instr(vpcmp))]
3032pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3033    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3034}
3035
3036/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3037///
3038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3039#[inline]
3040#[target_feature(enable = "avx512bw,avx512vl")]
3041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3042#[cfg_attr(test, assert_instr(vpcmp))]
3043pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3044    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3045}
3046
3047/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3048///
3049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3050#[inline]
3051#[target_feature(enable = "avx512bw,avx512vl")]
3052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3053#[cfg_attr(test, assert_instr(vpcmp))]
3054pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3055    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3056}
3057
3058/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3059///
3060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3061#[inline]
3062#[target_feature(enable = "avx512bw")]
3063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3064#[cfg_attr(test, assert_instr(vpcmp))]
3065pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3066    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3067}
3068
3069/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3070///
3071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3072#[inline]
3073#[target_feature(enable = "avx512bw")]
3074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3075#[cfg_attr(test, assert_instr(vpcmp))]
3076pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3077    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3078}
3079
3080/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3081///
3082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3083#[inline]
3084#[target_feature(enable = "avx512bw,avx512vl")]
3085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3086#[cfg_attr(test, assert_instr(vpcmp))]
3087pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3088    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3089}
3090
3091/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3092///
3093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3094#[inline]
3095#[target_feature(enable = "avx512bw,avx512vl")]
3096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3097#[cfg_attr(test, assert_instr(vpcmp))]
3098pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3099    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3100}
3101
3102/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3103///
3104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3105#[inline]
3106#[target_feature(enable = "avx512bw,avx512vl")]
3107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3108#[cfg_attr(test, assert_instr(vpcmp))]
3109pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3110    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3111}
3112
3113/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3114///
3115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3116#[inline]
3117#[target_feature(enable = "avx512bw,avx512vl")]
3118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3119#[cfg_attr(test, assert_instr(vpcmp))]
3120pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3121    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3122}
3123
3124/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3125///
3126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3127#[inline]
3128#[target_feature(enable = "avx512bw")]
3129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3130#[cfg_attr(test, assert_instr(vpcmp))]
3131pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3132    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3133}
3134
3135/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3143    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3144}
3145
3146/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3147///
3148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3149#[inline]
3150#[target_feature(enable = "avx512bw,avx512vl")]
3151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3152#[cfg_attr(test, assert_instr(vpcmp))]
3153pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3154    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3155}
3156
3157/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3158///
3159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3160#[inline]
3161#[target_feature(enable = "avx512bw,avx512vl")]
3162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3163#[cfg_attr(test, assert_instr(vpcmp))]
3164pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3165    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3166}
3167
3168/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3169///
3170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3171#[inline]
3172#[target_feature(enable = "avx512bw,avx512vl")]
3173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3174#[cfg_attr(test, assert_instr(vpcmp))]
3175pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3176    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3177}
3178
3179/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3180///
3181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3182#[inline]
3183#[target_feature(enable = "avx512bw,avx512vl")]
3184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3185#[cfg_attr(test, assert_instr(vpcmp))]
3186pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3187    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3188}
3189
3190/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3191///
3192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3193#[inline]
3194#[target_feature(enable = "avx512bw")]
3195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3196#[cfg_attr(test, assert_instr(vpcmp))]
3197pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3198    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3199}
3200
3201/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3202///
3203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3204#[inline]
3205#[target_feature(enable = "avx512bw")]
3206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3207#[cfg_attr(test, assert_instr(vpcmp))]
3208pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3209    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3210}
3211
3212/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3213///
3214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3215#[inline]
3216#[target_feature(enable = "avx512bw,avx512vl")]
3217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3218#[cfg_attr(test, assert_instr(vpcmp))]
3219pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3220    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3221}
3222
3223/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3224///
3225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3226#[inline]
3227#[target_feature(enable = "avx512bw,avx512vl")]
3228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3229#[cfg_attr(test, assert_instr(vpcmp))]
3230pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3231    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3232}
3233
3234/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3235///
3236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3237#[inline]
3238#[target_feature(enable = "avx512bw,avx512vl")]
3239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3240#[cfg_attr(test, assert_instr(vpcmp))]
3241pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3242    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3243}
3244
3245/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3246///
3247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3248#[inline]
3249#[target_feature(enable = "avx512bw,avx512vl")]
3250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3251#[cfg_attr(test, assert_instr(vpcmp))]
3252pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3253    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3254}
3255
3256/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3257///
3258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3259#[inline]
3260#[target_feature(enable = "avx512bw")]
3261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3262#[cfg_attr(test, assert_instr(vpcmp))]
3263pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3264    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3270#[inline]
3271#[target_feature(enable = "avx512bw")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3275    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3276}
3277
3278/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3279///
3280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3281#[inline]
3282#[target_feature(enable = "avx512bw,avx512vl")]
3283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3284#[cfg_attr(test, assert_instr(vpcmp))]
3285pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3286    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3287}
3288
3289/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3290///
3291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3292#[inline]
3293#[target_feature(enable = "avx512bw,avx512vl")]
3294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3295#[cfg_attr(test, assert_instr(vpcmp))]
3296pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3297    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3298}
3299
3300/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3301///
3302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3303#[inline]
3304#[target_feature(enable = "avx512bw,avx512vl")]
3305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3306#[cfg_attr(test, assert_instr(vpcmp))]
3307pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3308    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3309}
3310
3311/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3312///
3313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3314#[inline]
3315#[target_feature(enable = "avx512bw,avx512vl")]
3316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3317#[cfg_attr(test, assert_instr(vpcmp))]
3318pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3319    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3320}
3321
3322/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3323///
3324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3325#[inline]
3326#[target_feature(enable = "avx512bw")]
3327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3328#[cfg_attr(test, assert_instr(vpcmp))]
3329pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3330    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3331}
3332
3333/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3336#[inline]
3337#[target_feature(enable = "avx512bw")]
3338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3339#[cfg_attr(test, assert_instr(vpcmp))]
3340pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3341    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3342}
3343
3344/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3345///
3346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3347#[inline]
3348#[target_feature(enable = "avx512bw,avx512vl")]
3349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3350#[cfg_attr(test, assert_instr(vpcmp))]
3351pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3352    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3353}
3354
3355/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3356///
3357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3358#[inline]
3359#[target_feature(enable = "avx512bw,avx512vl")]
3360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3361#[cfg_attr(test, assert_instr(vpcmp))]
3362pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3363    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3364}
3365
3366/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3367///
3368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3369#[inline]
3370#[target_feature(enable = "avx512bw,avx512vl")]
3371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3372#[cfg_attr(test, assert_instr(vpcmp))]
3373pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3374    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3375}
3376
3377/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3378///
3379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3380#[inline]
3381#[target_feature(enable = "avx512bw,avx512vl")]
3382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3383#[cfg_attr(test, assert_instr(vpcmp))]
3384pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3385    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3386}
3387
3388/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3389///
3390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3391#[inline]
3392#[target_feature(enable = "avx512bw")]
3393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3394#[cfg_attr(test, assert_instr(vpcmp))]
3395pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3396    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3402#[inline]
3403#[target_feature(enable = "avx512bw")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3407    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3408}
3409
3410/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3413#[inline]
3414#[target_feature(enable = "avx512bw,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpcmp))]
3417pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3418    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3419}
3420
3421/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3422///
3423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3424#[inline]
3425#[target_feature(enable = "avx512bw,avx512vl")]
3426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3427#[cfg_attr(test, assert_instr(vpcmp))]
3428pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3429    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3430}
3431
3432/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3433///
3434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3435#[inline]
3436#[target_feature(enable = "avx512bw,avx512vl")]
3437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3438#[cfg_attr(test, assert_instr(vpcmp))]
3439pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3440    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3441}
3442
3443/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3444///
3445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3446#[inline]
3447#[target_feature(enable = "avx512bw,avx512vl")]
3448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3449#[cfg_attr(test, assert_instr(vpcmp))]
3450pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3451    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3452}
3453
3454/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3455///
3456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3457#[inline]
3458#[target_feature(enable = "avx512bw")]
3459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3460#[cfg_attr(test, assert_instr(vpcmp))]
3461pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3462    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3463}
3464
3465/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3466///
3467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3468#[inline]
3469#[target_feature(enable = "avx512bw")]
3470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3471#[cfg_attr(test, assert_instr(vpcmp))]
3472pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3473    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3474}
3475
3476/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3477///
3478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3479#[inline]
3480#[target_feature(enable = "avx512bw,avx512vl")]
3481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3482#[cfg_attr(test, assert_instr(vpcmp))]
3483pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3484    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3485}
3486
3487/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3488///
3489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3490#[inline]
3491#[target_feature(enable = "avx512bw,avx512vl")]
3492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3493#[cfg_attr(test, assert_instr(vpcmp))]
3494pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3495    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3496}
3497
3498/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3499///
3500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3501#[inline]
3502#[target_feature(enable = "avx512bw,avx512vl")]
3503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3504#[cfg_attr(test, assert_instr(vpcmp))]
3505pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3506    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3507}
3508
3509/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3512#[inline]
3513#[target_feature(enable = "avx512bw,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpcmp))]
3516pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3517    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3518}
3519
3520/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3521///
3522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3523#[inline]
3524#[target_feature(enable = "avx512bw")]
3525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3526#[cfg_attr(test, assert_instr(vpcmp))]
3527pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3528    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3534#[inline]
3535#[target_feature(enable = "avx512bw")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3539    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3540}
3541
3542/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3543///
3544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3545#[inline]
3546#[target_feature(enable = "avx512bw,avx512vl")]
3547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3548#[cfg_attr(test, assert_instr(vpcmp))]
3549pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3550    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3551}
3552
3553/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3554///
3555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3556#[inline]
3557#[target_feature(enable = "avx512bw,avx512vl")]
3558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3559#[cfg_attr(test, assert_instr(vpcmp))]
3560pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3561    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3562}
3563
3564/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3565///
3566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3567#[inline]
3568#[target_feature(enable = "avx512bw,avx512vl")]
3569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3570#[cfg_attr(test, assert_instr(vpcmp))]
3571pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3572    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3573}
3574
3575/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3576///
3577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3578#[inline]
3579#[target_feature(enable = "avx512bw,avx512vl")]
3580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3581#[cfg_attr(test, assert_instr(vpcmp))]
3582pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3583    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3584}
3585
3586/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3587///
3588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3589#[inline]
3590#[target_feature(enable = "avx512bw")]
3591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3592#[cfg_attr(test, assert_instr(vpcmp))]
3593pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3594    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3595}
3596
3597/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3598///
3599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3600#[inline]
3601#[target_feature(enable = "avx512bw")]
3602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3603#[cfg_attr(test, assert_instr(vpcmp))]
3604pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3605    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3606}
3607
3608/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3609///
3610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3611#[inline]
3612#[target_feature(enable = "avx512bw,avx512vl")]
3613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3614#[cfg_attr(test, assert_instr(vpcmp))]
3615pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3616    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3617}
3618
3619/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3620///
3621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3622#[inline]
3623#[target_feature(enable = "avx512bw,avx512vl")]
3624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3625#[cfg_attr(test, assert_instr(vpcmp))]
3626pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3627    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3628}
3629
3630/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3631///
3632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3633#[inline]
3634#[target_feature(enable = "avx512bw,avx512vl")]
3635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3636#[cfg_attr(test, assert_instr(vpcmp))]
3637pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3638    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3639}
3640
3641/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3642///
3643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3644#[inline]
3645#[target_feature(enable = "avx512bw,avx512vl")]
3646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3647#[cfg_attr(test, assert_instr(vpcmp))]
3648pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3649    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3650}
3651
3652/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3653///
3654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3655#[inline]
3656#[target_feature(enable = "avx512bw")]
3657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3658#[cfg_attr(test, assert_instr(vpcmp))]
3659pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3660    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3666#[inline]
3667#[target_feature(enable = "avx512bw")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3671    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3672}
3673
3674/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3675///
3676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3677#[inline]
3678#[target_feature(enable = "avx512bw,avx512vl")]
3679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3680#[cfg_attr(test, assert_instr(vpcmp))]
3681pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3682    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3683}
3684
3685/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3686///
3687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3688#[inline]
3689#[target_feature(enable = "avx512bw,avx512vl")]
3690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3691#[cfg_attr(test, assert_instr(vpcmp))]
3692pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3693    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3694}
3695
3696/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3697///
3698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3699#[inline]
3700#[target_feature(enable = "avx512bw,avx512vl")]
3701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3702#[cfg_attr(test, assert_instr(vpcmp))]
3703pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3704    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3705}
3706
3707/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3708///
3709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3710#[inline]
3711#[target_feature(enable = "avx512bw,avx512vl")]
3712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3713#[cfg_attr(test, assert_instr(vpcmp))]
3714pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3715    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3716}
3717
3718/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3719///
3720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
3721#[inline]
3722#[target_feature(enable = "avx512bw")]
3723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3724#[cfg_attr(test, assert_instr(vpcmp))]
3725pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3726    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
3727}
3728
3729/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3732#[inline]
3733#[target_feature(enable = "avx512bw")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vpcmp))]
3736pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3737    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3738}
3739
3740/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3741///
3742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
3743#[inline]
3744#[target_feature(enable = "avx512bw,avx512vl")]
3745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3746#[cfg_attr(test, assert_instr(vpcmp))]
3747pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3748    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
3749}
3750
3751/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3752///
3753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3754#[inline]
3755#[target_feature(enable = "avx512bw,avx512vl")]
3756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3757#[cfg_attr(test, assert_instr(vpcmp))]
3758pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3759    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3760}
3761
3762/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3763///
3764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
3765#[inline]
3766#[target_feature(enable = "avx512bw,avx512vl")]
3767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3768#[cfg_attr(test, assert_instr(vpcmp))]
3769pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3770    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
3771}
3772
3773/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3774///
3775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3776#[inline]
3777#[target_feature(enable = "avx512bw,avx512vl")]
3778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3779#[cfg_attr(test, assert_instr(vpcmp))]
3780pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3781    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3782}
3783
3784/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3785///
3786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
3787#[inline]
3788#[target_feature(enable = "avx512bw")]
3789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3790#[cfg_attr(test, assert_instr(vpcmp))]
3791pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3803    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3804}
3805
3806/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3807///
3808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
3809#[inline]
3810#[target_feature(enable = "avx512bw,avx512vl")]
3811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3812#[cfg_attr(test, assert_instr(vpcmp))]
3813pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3814    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
3815}
3816
3817/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3818///
3819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3820#[inline]
3821#[target_feature(enable = "avx512bw,avx512vl")]
3822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3823#[cfg_attr(test, assert_instr(vpcmp))]
3824pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3825    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3826}
3827
3828/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3829///
3830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
3831#[inline]
3832#[target_feature(enable = "avx512bw,avx512vl")]
3833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3834#[cfg_attr(test, assert_instr(vpcmp))]
3835pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3836    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
3837}
3838
3839/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3840///
3841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3842#[inline]
3843#[target_feature(enable = "avx512bw,avx512vl")]
3844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3845#[cfg_attr(test, assert_instr(vpcmp))]
3846pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3847    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3848}
3849
3850/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3851///
3852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
3853#[inline]
3854#[target_feature(enable = "avx512bw")]
3855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3856#[cfg_attr(test, assert_instr(vpcmp))]
3857pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3858    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
3859}
3860
3861/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3864#[inline]
3865#[target_feature(enable = "avx512bw")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vpcmp))]
3868pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3869    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3870}
3871
3872/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3873///
3874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
3875#[inline]
3876#[target_feature(enable = "avx512bw,avx512vl")]
3877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3878#[cfg_attr(test, assert_instr(vpcmp))]
3879pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3880    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
3881}
3882
3883/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3884///
3885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3886#[inline]
3887#[target_feature(enable = "avx512bw,avx512vl")]
3888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3889#[cfg_attr(test, assert_instr(vpcmp))]
3890pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3891    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3892}
3893
3894/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3895///
3896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
3897#[inline]
3898#[target_feature(enable = "avx512bw,avx512vl")]
3899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3900#[cfg_attr(test, assert_instr(vpcmp))]
3901pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3902    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
3903}
3904
3905/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3906///
3907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3908#[inline]
3909#[target_feature(enable = "avx512bw,avx512vl")]
3910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3911#[cfg_attr(test, assert_instr(vpcmp))]
3912pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3913    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3914}
3915
3916/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3917///
3918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
3919#[inline]
3920#[target_feature(enable = "avx512bw")]
3921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3922#[cfg_attr(test, assert_instr(vpcmp))]
3923pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3924    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3935    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3936}
3937
3938/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3939///
3940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
3941#[inline]
3942#[target_feature(enable = "avx512bw,avx512vl")]
3943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3944#[cfg_attr(test, assert_instr(vpcmp))]
3945pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3946    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
3947}
3948
3949/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3950///
3951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3952#[inline]
3953#[target_feature(enable = "avx512bw,avx512vl")]
3954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3955#[cfg_attr(test, assert_instr(vpcmp))]
3956pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3957    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3958}
3959
3960/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3961///
3962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
3963#[inline]
3964#[target_feature(enable = "avx512bw,avx512vl")]
3965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3966#[cfg_attr(test, assert_instr(vpcmp))]
3967pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3968    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
3969}
3970
3971/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3972///
3973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3974#[inline]
3975#[target_feature(enable = "avx512bw,avx512vl")]
3976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3977#[cfg_attr(test, assert_instr(vpcmp))]
3978pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3979    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3980}
3981
3982/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3983///
3984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
3985#[inline]
3986#[target_feature(enable = "avx512bw")]
3987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3988#[cfg_attr(test, assert_instr(vpcmp))]
3989pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3990    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
3991}
3992
3993/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3996#[inline]
3997#[target_feature(enable = "avx512bw")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vpcmp))]
4000pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4001    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4002}
4003
4004/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4005///
4006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4007#[inline]
4008#[target_feature(enable = "avx512bw,avx512vl")]
4009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4010#[cfg_attr(test, assert_instr(vpcmp))]
4011pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4012    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4013}
4014
4015/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4016///
4017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4018#[inline]
4019#[target_feature(enable = "avx512bw,avx512vl")]
4020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4021#[cfg_attr(test, assert_instr(vpcmp))]
4022pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4023    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4024}
4025
4026/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4027///
4028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4029#[inline]
4030#[target_feature(enable = "avx512bw,avx512vl")]
4031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4032#[cfg_attr(test, assert_instr(vpcmp))]
4033pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4034    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4035}
4036
4037/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4038///
4039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4040#[inline]
4041#[target_feature(enable = "avx512bw,avx512vl")]
4042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4043#[cfg_attr(test, assert_instr(vpcmp))]
4044pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4045    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4046}
4047
4048/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4049///
4050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4051#[inline]
4052#[target_feature(enable = "avx512bw")]
4053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4054#[cfg_attr(test, assert_instr(vpcmp))]
4055pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4056    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4057}
4058
4059/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4062#[inline]
4063#[target_feature(enable = "avx512bw")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4067    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4068}
4069
4070/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4071///
4072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4073#[inline]
4074#[target_feature(enable = "avx512bw,avx512vl")]
4075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4076#[cfg_attr(test, assert_instr(vpcmp))]
4077pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4078    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4079}
4080
4081/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4082///
4083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4084#[inline]
4085#[target_feature(enable = "avx512bw,avx512vl")]
4086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4087#[cfg_attr(test, assert_instr(vpcmp))]
4088pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4089    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4090}
4091
4092/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4093///
4094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4095#[inline]
4096#[target_feature(enable = "avx512bw,avx512vl")]
4097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4098#[cfg_attr(test, assert_instr(vpcmp))]
4099pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4100    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4101}
4102
4103/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4104///
4105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4106#[inline]
4107#[target_feature(enable = "avx512bw,avx512vl")]
4108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4109#[cfg_attr(test, assert_instr(vpcmp))]
4110pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4111    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4112}
4113
4114/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4115///
4116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4117#[inline]
4118#[target_feature(enable = "avx512bw")]
4119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4120#[rustc_legacy_const_generics(2)]
4121#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4122pub fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4123    unsafe {
4124        static_assert_uimm_bits!(IMM8, 3);
4125        let a = a.as_u16x32();
4126        let b = b.as_u16x32();
4127        let r = match IMM8 {
4128            0 => simd_eq(a, b),
4129            1 => simd_lt(a, b),
4130            2 => simd_le(a, b),
4131            3 => i16x32::ZERO,
4132            4 => simd_ne(a, b),
4133            5 => simd_ge(a, b),
4134            6 => simd_gt(a, b),
4135            _ => i16x32::splat(-1),
4136        };
4137        simd_bitmask(r)
4138    }
4139}
4140
4141/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4142///
4143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4144#[inline]
4145#[target_feature(enable = "avx512bw")]
4146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4147#[rustc_legacy_const_generics(3)]
4148#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4149pub fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4150    k1: __mmask32,
4151    a: __m512i,
4152    b: __m512i,
4153) -> __mmask32 {
4154    unsafe {
4155        static_assert_uimm_bits!(IMM8, 3);
4156        let a = a.as_u16x32();
4157        let b = b.as_u16x32();
4158        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4159        let r = match IMM8 {
4160            0 => simd_and(k1, simd_eq(a, b)),
4161            1 => simd_and(k1, simd_lt(a, b)),
4162            2 => simd_and(k1, simd_le(a, b)),
4163            3 => i16x32::ZERO,
4164            4 => simd_and(k1, simd_ne(a, b)),
4165            5 => simd_and(k1, simd_ge(a, b)),
4166            6 => simd_and(k1, simd_gt(a, b)),
4167            _ => k1,
4168        };
4169        simd_bitmask(r)
4170    }
4171}
4172
4173/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4176#[inline]
4177#[target_feature(enable = "avx512bw,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[rustc_legacy_const_generics(2)]
4180#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4181pub fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4182    unsafe {
4183        static_assert_uimm_bits!(IMM8, 3);
4184        let a = a.as_u16x16();
4185        let b = b.as_u16x16();
4186        let r = match IMM8 {
4187            0 => simd_eq(a, b),
4188            1 => simd_lt(a, b),
4189            2 => simd_le(a, b),
4190            3 => i16x16::ZERO,
4191            4 => simd_ne(a, b),
4192            5 => simd_ge(a, b),
4193            6 => simd_gt(a, b),
4194            _ => i16x16::splat(-1),
4195        };
4196        simd_bitmask(r)
4197    }
4198}
4199
4200/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4201///
4202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4203#[inline]
4204#[target_feature(enable = "avx512bw,avx512vl")]
4205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4206#[rustc_legacy_const_generics(3)]
4207#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4208pub fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4209    k1: __mmask16,
4210    a: __m256i,
4211    b: __m256i,
4212) -> __mmask16 {
4213    unsafe {
4214        static_assert_uimm_bits!(IMM8, 3);
4215        let a = a.as_u16x16();
4216        let b = b.as_u16x16();
4217        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4218        let r = match IMM8 {
4219            0 => simd_and(k1, simd_eq(a, b)),
4220            1 => simd_and(k1, simd_lt(a, b)),
4221            2 => simd_and(k1, simd_le(a, b)),
4222            3 => i16x16::ZERO,
4223            4 => simd_and(k1, simd_ne(a, b)),
4224            5 => simd_and(k1, simd_ge(a, b)),
4225            6 => simd_and(k1, simd_gt(a, b)),
4226            _ => k1,
4227        };
4228        simd_bitmask(r)
4229    }
4230}
4231
4232/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4233///
4234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4235#[inline]
4236#[target_feature(enable = "avx512bw,avx512vl")]
4237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4238#[rustc_legacy_const_generics(2)]
4239#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4240pub fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4241    unsafe {
4242        static_assert_uimm_bits!(IMM8, 3);
4243        let a = a.as_u16x8();
4244        let b = b.as_u16x8();
4245        let r = match IMM8 {
4246            0 => simd_eq(a, b),
4247            1 => simd_lt(a, b),
4248            2 => simd_le(a, b),
4249            3 => i16x8::ZERO,
4250            4 => simd_ne(a, b),
4251            5 => simd_ge(a, b),
4252            6 => simd_gt(a, b),
4253            _ => i16x8::splat(-1),
4254        };
4255        simd_bitmask(r)
4256    }
4257}
4258
4259/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4260///
4261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4262#[inline]
4263#[target_feature(enable = "avx512bw,avx512vl")]
4264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4265#[rustc_legacy_const_generics(3)]
4266#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4267pub fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4268    unsafe {
4269        static_assert_uimm_bits!(IMM8, 3);
4270        let a = a.as_u16x8();
4271        let b = b.as_u16x8();
4272        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4273        let r = match IMM8 {
4274            0 => simd_and(k1, simd_eq(a, b)),
4275            1 => simd_and(k1, simd_lt(a, b)),
4276            2 => simd_and(k1, simd_le(a, b)),
4277            3 => i16x8::ZERO,
4278            4 => simd_and(k1, simd_ne(a, b)),
4279            5 => simd_and(k1, simd_ge(a, b)),
4280            6 => simd_and(k1, simd_gt(a, b)),
4281            _ => k1,
4282        };
4283        simd_bitmask(r)
4284    }
4285}
4286
4287/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[rustc_legacy_const_generics(2)]
4294#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4295pub fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4296    unsafe {
4297        static_assert_uimm_bits!(IMM8, 3);
4298        let a = a.as_u8x64();
4299        let b = b.as_u8x64();
4300        let r = match IMM8 {
4301            0 => simd_eq(a, b),
4302            1 => simd_lt(a, b),
4303            2 => simd_le(a, b),
4304            3 => i8x64::ZERO,
4305            4 => simd_ne(a, b),
4306            5 => simd_ge(a, b),
4307            6 => simd_gt(a, b),
4308            _ => i8x64::splat(-1),
4309        };
4310        simd_bitmask(r)
4311    }
4312}
4313
4314/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4315///
4316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4317#[inline]
4318#[target_feature(enable = "avx512bw")]
4319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4320#[rustc_legacy_const_generics(3)]
4321#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4322pub fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4323    k1: __mmask64,
4324    a: __m512i,
4325    b: __m512i,
4326) -> __mmask64 {
4327    unsafe {
4328        static_assert_uimm_bits!(IMM8, 3);
4329        let a = a.as_u8x64();
4330        let b = b.as_u8x64();
4331        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4332        let r = match IMM8 {
4333            0 => simd_and(k1, simd_eq(a, b)),
4334            1 => simd_and(k1, simd_lt(a, b)),
4335            2 => simd_and(k1, simd_le(a, b)),
4336            3 => i8x64::ZERO,
4337            4 => simd_and(k1, simd_ne(a, b)),
4338            5 => simd_and(k1, simd_ge(a, b)),
4339            6 => simd_and(k1, simd_gt(a, b)),
4340            _ => k1,
4341        };
4342        simd_bitmask(r)
4343    }
4344}
4345
4346/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4347///
4348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4349#[inline]
4350#[target_feature(enable = "avx512bw,avx512vl")]
4351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4352#[rustc_legacy_const_generics(2)]
4353#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4354pub fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4355    unsafe {
4356        static_assert_uimm_bits!(IMM8, 3);
4357        let a = a.as_u8x32();
4358        let b = b.as_u8x32();
4359        let r = match IMM8 {
4360            0 => simd_eq(a, b),
4361            1 => simd_lt(a, b),
4362            2 => simd_le(a, b),
4363            3 => i8x32::ZERO,
4364            4 => simd_ne(a, b),
4365            5 => simd_ge(a, b),
4366            6 => simd_gt(a, b),
4367            _ => i8x32::splat(-1),
4368        };
4369        simd_bitmask(r)
4370    }
4371}
4372
4373/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4374///
4375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4376#[inline]
4377#[target_feature(enable = "avx512bw,avx512vl")]
4378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4379#[rustc_legacy_const_generics(3)]
4380#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4381pub fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4382    k1: __mmask32,
4383    a: __m256i,
4384    b: __m256i,
4385) -> __mmask32 {
4386    unsafe {
4387        static_assert_uimm_bits!(IMM8, 3);
4388        let a = a.as_u8x32();
4389        let b = b.as_u8x32();
4390        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4391        let r = match IMM8 {
4392            0 => simd_and(k1, simd_eq(a, b)),
4393            1 => simd_and(k1, simd_lt(a, b)),
4394            2 => simd_and(k1, simd_le(a, b)),
4395            3 => i8x32::ZERO,
4396            4 => simd_and(k1, simd_ne(a, b)),
4397            5 => simd_and(k1, simd_ge(a, b)),
4398            6 => simd_and(k1, simd_gt(a, b)),
4399            _ => k1,
4400        };
4401        simd_bitmask(r)
4402    }
4403}
4404
4405/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4406///
4407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4408#[inline]
4409#[target_feature(enable = "avx512bw,avx512vl")]
4410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4411#[rustc_legacy_const_generics(2)]
4412#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4413pub fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4414    unsafe {
4415        static_assert_uimm_bits!(IMM8, 3);
4416        let a = a.as_u8x16();
4417        let b = b.as_u8x16();
4418        let r = match IMM8 {
4419            0 => simd_eq(a, b),
4420            1 => simd_lt(a, b),
4421            2 => simd_le(a, b),
4422            3 => i8x16::ZERO,
4423            4 => simd_ne(a, b),
4424            5 => simd_ge(a, b),
4425            6 => simd_gt(a, b),
4426            _ => i8x16::splat(-1),
4427        };
4428        simd_bitmask(r)
4429    }
4430}
4431
4432/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4433///
4434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4435#[inline]
4436#[target_feature(enable = "avx512bw,avx512vl")]
4437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4438#[rustc_legacy_const_generics(3)]
4439#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4440pub fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u8x16();
4444        let b = b.as_u8x16();
4445        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4446        let r = match IMM8 {
4447            0 => simd_and(k1, simd_eq(a, b)),
4448            1 => simd_and(k1, simd_lt(a, b)),
4449            2 => simd_and(k1, simd_le(a, b)),
4450            3 => i8x16::ZERO,
4451            4 => simd_and(k1, simd_ne(a, b)),
4452            5 => simd_and(k1, simd_ge(a, b)),
4453            6 => simd_and(k1, simd_gt(a, b)),
4454            _ => k1,
4455        };
4456        simd_bitmask(r)
4457    }
4458}
4459
4460/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4461///
4462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4463#[inline]
4464#[target_feature(enable = "avx512bw")]
4465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4466#[rustc_legacy_const_generics(2)]
4467#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4468pub fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4469    unsafe {
4470        static_assert_uimm_bits!(IMM8, 3);
4471        let a = a.as_i16x32();
4472        let b = b.as_i16x32();
4473        let r = match IMM8 {
4474            0 => simd_eq(a, b),
4475            1 => simd_lt(a, b),
4476            2 => simd_le(a, b),
4477            3 => i16x32::ZERO,
4478            4 => simd_ne(a, b),
4479            5 => simd_ge(a, b),
4480            6 => simd_gt(a, b),
4481            _ => i16x32::splat(-1),
4482        };
4483        simd_bitmask(r)
4484    }
4485}
4486
4487/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4488///
4489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4490#[inline]
4491#[target_feature(enable = "avx512bw")]
4492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4493#[rustc_legacy_const_generics(3)]
4494#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4495pub fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4496    k1: __mmask32,
4497    a: __m512i,
4498    b: __m512i,
4499) -> __mmask32 {
4500    unsafe {
4501        static_assert_uimm_bits!(IMM8, 3);
4502        let a = a.as_i16x32();
4503        let b = b.as_i16x32();
4504        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4505        let r = match IMM8 {
4506            0 => simd_and(k1, simd_eq(a, b)),
4507            1 => simd_and(k1, simd_lt(a, b)),
4508            2 => simd_and(k1, simd_le(a, b)),
4509            3 => i16x32::ZERO,
4510            4 => simd_and(k1, simd_ne(a, b)),
4511            5 => simd_and(k1, simd_ge(a, b)),
4512            6 => simd_and(k1, simd_gt(a, b)),
4513            _ => k1,
4514        };
4515        simd_bitmask(r)
4516    }
4517}
4518
4519/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4520///
4521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4522#[inline]
4523#[target_feature(enable = "avx512bw,avx512vl")]
4524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4525#[rustc_legacy_const_generics(2)]
4526#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4527pub fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4528    unsafe {
4529        static_assert_uimm_bits!(IMM8, 3);
4530        let a = a.as_i16x16();
4531        let b = b.as_i16x16();
4532        let r = match IMM8 {
4533            0 => simd_eq(a, b),
4534            1 => simd_lt(a, b),
4535            2 => simd_le(a, b),
4536            3 => i16x16::ZERO,
4537            4 => simd_ne(a, b),
4538            5 => simd_ge(a, b),
4539            6 => simd_gt(a, b),
4540            _ => i16x16::splat(-1),
4541        };
4542        simd_bitmask(r)
4543    }
4544}
4545
4546/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4547///
4548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4549#[inline]
4550#[target_feature(enable = "avx512bw,avx512vl")]
4551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4552#[rustc_legacy_const_generics(3)]
4553#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4554pub fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4555    k1: __mmask16,
4556    a: __m256i,
4557    b: __m256i,
4558) -> __mmask16 {
4559    unsafe {
4560        static_assert_uimm_bits!(IMM8, 3);
4561        let a = a.as_i16x16();
4562        let b = b.as_i16x16();
4563        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4564        let r = match IMM8 {
4565            0 => simd_and(k1, simd_eq(a, b)),
4566            1 => simd_and(k1, simd_lt(a, b)),
4567            2 => simd_and(k1, simd_le(a, b)),
4568            3 => i16x16::ZERO,
4569            4 => simd_and(k1, simd_ne(a, b)),
4570            5 => simd_and(k1, simd_ge(a, b)),
4571            6 => simd_and(k1, simd_gt(a, b)),
4572            _ => k1,
4573        };
4574        simd_bitmask(r)
4575    }
4576}
4577
4578/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4579///
4580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4581#[inline]
4582#[target_feature(enable = "avx512bw,avx512vl")]
4583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4584#[rustc_legacy_const_generics(2)]
4585#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4586pub fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4587    unsafe {
4588        static_assert_uimm_bits!(IMM8, 3);
4589        let a = a.as_i16x8();
4590        let b = b.as_i16x8();
4591        let r = match IMM8 {
4592            0 => simd_eq(a, b),
4593            1 => simd_lt(a, b),
4594            2 => simd_le(a, b),
4595            3 => i16x8::ZERO,
4596            4 => simd_ne(a, b),
4597            5 => simd_ge(a, b),
4598            6 => simd_gt(a, b),
4599            _ => i16x8::splat(-1),
4600        };
4601        simd_bitmask(r)
4602    }
4603}
4604
4605/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4608#[inline]
4609#[target_feature(enable = "avx512bw,avx512vl")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[rustc_legacy_const_generics(3)]
4612#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4613pub fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4614    unsafe {
4615        static_assert_uimm_bits!(IMM8, 3);
4616        let a = a.as_i16x8();
4617        let b = b.as_i16x8();
4618        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4619        let r = match IMM8 {
4620            0 => simd_and(k1, simd_eq(a, b)),
4621            1 => simd_and(k1, simd_lt(a, b)),
4622            2 => simd_and(k1, simd_le(a, b)),
4623            3 => i16x8::ZERO,
4624            4 => simd_and(k1, simd_ne(a, b)),
4625            5 => simd_and(k1, simd_ge(a, b)),
4626            6 => simd_and(k1, simd_gt(a, b)),
4627            _ => k1,
4628        };
4629        simd_bitmask(r)
4630    }
4631}
4632
4633/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4634///
4635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4636#[inline]
4637#[target_feature(enable = "avx512bw")]
4638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4639#[rustc_legacy_const_generics(2)]
4640#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4641pub fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4642    unsafe {
4643        static_assert_uimm_bits!(IMM8, 3);
4644        let a = a.as_i8x64();
4645        let b = b.as_i8x64();
4646        let r = match IMM8 {
4647            0 => simd_eq(a, b),
4648            1 => simd_lt(a, b),
4649            2 => simd_le(a, b),
4650            3 => i8x64::ZERO,
4651            4 => simd_ne(a, b),
4652            5 => simd_ge(a, b),
4653            6 => simd_gt(a, b),
4654            _ => i8x64::splat(-1),
4655        };
4656        simd_bitmask(r)
4657    }
4658}
4659
4660/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4661///
4662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
4663#[inline]
4664#[target_feature(enable = "avx512bw")]
4665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4666#[rustc_legacy_const_generics(3)]
4667#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4668pub fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
4669    k1: __mmask64,
4670    a: __m512i,
4671    b: __m512i,
4672) -> __mmask64 {
4673    unsafe {
4674        static_assert_uimm_bits!(IMM8, 3);
4675        let a = a.as_i8x64();
4676        let b = b.as_i8x64();
4677        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4678        let r = match IMM8 {
4679            0 => simd_and(k1, simd_eq(a, b)),
4680            1 => simd_and(k1, simd_lt(a, b)),
4681            2 => simd_and(k1, simd_le(a, b)),
4682            3 => i8x64::ZERO,
4683            4 => simd_and(k1, simd_ne(a, b)),
4684            5 => simd_and(k1, simd_ge(a, b)),
4685            6 => simd_and(k1, simd_gt(a, b)),
4686            _ => k1,
4687        };
4688        simd_bitmask(r)
4689    }
4690}
4691
4692/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4693///
4694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
4695#[inline]
4696#[target_feature(enable = "avx512bw,avx512vl")]
4697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4698#[rustc_legacy_const_generics(2)]
4699#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4700pub fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4701    unsafe {
4702        static_assert_uimm_bits!(IMM8, 3);
4703        let a = a.as_i8x32();
4704        let b = b.as_i8x32();
4705        let r = match IMM8 {
4706            0 => simd_eq(a, b),
4707            1 => simd_lt(a, b),
4708            2 => simd_le(a, b),
4709            3 => i8x32::ZERO,
4710            4 => simd_ne(a, b),
4711            5 => simd_ge(a, b),
4712            6 => simd_gt(a, b),
4713            _ => i8x32::splat(-1),
4714        };
4715        simd_bitmask(r)
4716    }
4717}
4718
4719/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4720///
4721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
4722#[inline]
4723#[target_feature(enable = "avx512bw,avx512vl")]
4724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4725#[rustc_legacy_const_generics(3)]
4726#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4727pub fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
4728    k1: __mmask32,
4729    a: __m256i,
4730    b: __m256i,
4731) -> __mmask32 {
4732    unsafe {
4733        static_assert_uimm_bits!(IMM8, 3);
4734        let a = a.as_i8x32();
4735        let b = b.as_i8x32();
4736        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4737        let r = match IMM8 {
4738            0 => simd_and(k1, simd_eq(a, b)),
4739            1 => simd_and(k1, simd_lt(a, b)),
4740            2 => simd_and(k1, simd_le(a, b)),
4741            3 => i8x32::ZERO,
4742            4 => simd_and(k1, simd_ne(a, b)),
4743            5 => simd_and(k1, simd_ge(a, b)),
4744            6 => simd_and(k1, simd_gt(a, b)),
4745            _ => k1,
4746        };
4747        simd_bitmask(r)
4748    }
4749}
4750
4751/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4752///
4753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
4754#[inline]
4755#[target_feature(enable = "avx512bw,avx512vl")]
4756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4757#[rustc_legacy_const_generics(2)]
4758#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4759pub fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4760    unsafe {
4761        static_assert_uimm_bits!(IMM8, 3);
4762        let a = a.as_i8x16();
4763        let b = b.as_i8x16();
4764        let r = match IMM8 {
4765            0 => simd_eq(a, b),
4766            1 => simd_lt(a, b),
4767            2 => simd_le(a, b),
4768            3 => i8x16::ZERO,
4769            4 => simd_ne(a, b),
4770            5 => simd_ge(a, b),
4771            6 => simd_gt(a, b),
4772            _ => i8x16::splat(-1),
4773        };
4774        simd_bitmask(r)
4775    }
4776}
4777
4778/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
4781#[inline]
4782#[target_feature(enable = "avx512bw,avx512vl")]
4783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4784#[rustc_legacy_const_generics(3)]
4785#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4786pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4787    unsafe {
4788        static_assert_uimm_bits!(IMM8, 3);
4789        let a = a.as_i8x16();
4790        let b = b.as_i8x16();
4791        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4792        let r = match IMM8 {
4793            0 => simd_and(k1, simd_eq(a, b)),
4794            1 => simd_and(k1, simd_lt(a, b)),
4795            2 => simd_and(k1, simd_le(a, b)),
4796            3 => i8x16::ZERO,
4797            4 => simd_and(k1, simd_ne(a, b)),
4798            5 => simd_and(k1, simd_ge(a, b)),
4799            6 => simd_and(k1, simd_gt(a, b)),
4800            _ => k1,
4801        };
4802        simd_bitmask(r)
4803    }
4804}
4805
4806/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4807///
4808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
4809#[inline]
4810#[target_feature(enable = "avx512bw,avx512vl")]
4811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4812pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4813    unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
4814}
4815
4816/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4817///
4818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
4819#[inline]
4820#[target_feature(enable = "avx512bw,avx512vl")]
4821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4822pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4823    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
4824}
4825
4826/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4827///
4828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
4829#[inline]
4830#[target_feature(enable = "avx512bw,avx512vl")]
4831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4832pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4833    unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
4834}
4835
4836/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4837///
4838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
4839#[inline]
4840#[target_feature(enable = "avx512bw,avx512vl")]
4841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4842pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4843    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
4844}
4845
4846/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4847///
4848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
4849#[inline]
4850#[target_feature(enable = "avx512bw,avx512vl")]
4851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4852pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4853    unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
4854}
4855
4856/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4857///
4858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
4859#[inline]
4860#[target_feature(enable = "avx512bw,avx512vl")]
4861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4862pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4863    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
4864}
4865
4866/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
4869#[inline]
4870#[target_feature(enable = "avx512bw,avx512vl")]
4871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4872pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4873    unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
4874}
4875
4876/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4877///
4878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
4879#[inline]
4880#[target_feature(enable = "avx512bw,avx512vl")]
4881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4882pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4883    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
4884}
4885
4886/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
4893    unsafe { simd_reduce_and(a.as_i16x16()) }
4894}
4895
4896/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4897///
4898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
4899#[inline]
4900#[target_feature(enable = "avx512bw,avx512vl")]
4901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4902pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
4903    unsafe {
4904        simd_reduce_and(simd_select_bitmask(
4905            k,
4906            a.as_i16x16(),
4907            _mm256_set1_epi64x(-1).as_i16x16(),
4908        ))
4909    }
4910}
4911
4912/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4913///
4914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
4915#[inline]
4916#[target_feature(enable = "avx512bw,avx512vl")]
4917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4918pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
4919    unsafe { simd_reduce_and(a.as_i16x8()) }
4920}
4921
4922/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4923///
4924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
4925#[inline]
4926#[target_feature(enable = "avx512bw,avx512vl")]
4927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4928pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
4929    unsafe {
4930        simd_reduce_and(simd_select_bitmask(
4931            k,
4932            a.as_i16x8(),
4933            _mm_set1_epi64x(-1).as_i16x8(),
4934        ))
4935    }
4936}
4937
4938/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4939///
4940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
4941#[inline]
4942#[target_feature(enable = "avx512bw,avx512vl")]
4943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4944pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
4945    unsafe { simd_reduce_and(a.as_i8x32()) }
4946}
4947
4948/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4949///
4950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
4951#[inline]
4952#[target_feature(enable = "avx512bw,avx512vl")]
4953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4954pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
4955    unsafe {
4956        simd_reduce_and(simd_select_bitmask(
4957            k,
4958            a.as_i8x32(),
4959            _mm256_set1_epi64x(-1).as_i8x32(),
4960        ))
4961    }
4962}
4963
4964/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4965///
4966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
4967#[inline]
4968#[target_feature(enable = "avx512bw,avx512vl")]
4969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4970pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
4971    unsafe { simd_reduce_and(a.as_i8x16()) }
4972}
4973
4974/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4975///
4976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
4977#[inline]
4978#[target_feature(enable = "avx512bw,avx512vl")]
4979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4980pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
4981    unsafe {
4982        simd_reduce_and(simd_select_bitmask(
4983            k,
4984            a.as_i8x16(),
4985            _mm_set1_epi64x(-1).as_i8x16(),
4986        ))
4987    }
4988}
4989
4990/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4991///
4992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
4993#[inline]
4994#[target_feature(enable = "avx512bw,avx512vl")]
4995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4996pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
4997    unsafe { simd_reduce_max(a.as_i16x16()) }
4998}
4999
5000/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5001///
5002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5003#[inline]
5004#[target_feature(enable = "avx512bw,avx512vl")]
5005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5006pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5007    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5008}
5009
5010/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5011///
5012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5013#[inline]
5014#[target_feature(enable = "avx512bw,avx512vl")]
5015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5016pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5017    unsafe { simd_reduce_max(a.as_i16x8()) }
5018}
5019
5020/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5023#[inline]
5024#[target_feature(enable = "avx512bw,avx512vl")]
5025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5026pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5027    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5028}
5029
5030/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5031///
5032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5033#[inline]
5034#[target_feature(enable = "avx512bw,avx512vl")]
5035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5036pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5037    unsafe { simd_reduce_max(a.as_i8x32()) }
5038}
5039
5040/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5041///
5042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5043#[inline]
5044#[target_feature(enable = "avx512bw,avx512vl")]
5045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5046pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5047    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5048}
5049
5050/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5051///
5052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5053#[inline]
5054#[target_feature(enable = "avx512bw,avx512vl")]
5055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5056pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5057    unsafe { simd_reduce_max(a.as_i8x16()) }
5058}
5059
5060/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5061///
5062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5063#[inline]
5064#[target_feature(enable = "avx512bw,avx512vl")]
5065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5066pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5067    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5068}
5069
5070/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5071///
5072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5073#[inline]
5074#[target_feature(enable = "avx512bw,avx512vl")]
5075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5076pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5077    unsafe { simd_reduce_max(a.as_u16x16()) }
5078}
5079
5080/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5081///
5082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5083#[inline]
5084#[target_feature(enable = "avx512bw,avx512vl")]
5085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5086pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5087    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5088}
5089
5090/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5091///
5092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5093#[inline]
5094#[target_feature(enable = "avx512bw,avx512vl")]
5095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5096pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5097    unsafe { simd_reduce_max(a.as_u16x8()) }
5098}
5099
5100/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5101///
5102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5103#[inline]
5104#[target_feature(enable = "avx512bw,avx512vl")]
5105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5106pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5107    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5108}
5109
5110/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5111///
5112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5113#[inline]
5114#[target_feature(enable = "avx512bw,avx512vl")]
5115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5116pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5117    unsafe { simd_reduce_max(a.as_u8x32()) }
5118}
5119
5120/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5121///
5122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5123#[inline]
5124#[target_feature(enable = "avx512bw,avx512vl")]
5125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5126pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5127    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5128}
5129
5130/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5137    unsafe { simd_reduce_max(a.as_u8x16()) }
5138}
5139
5140/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5141///
5142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5143#[inline]
5144#[target_feature(enable = "avx512bw,avx512vl")]
5145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5146pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5147    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5148}
5149
5150/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5151///
5152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5153#[inline]
5154#[target_feature(enable = "avx512bw,avx512vl")]
5155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5156pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5157    unsafe { simd_reduce_min(a.as_i16x16()) }
5158}
5159
5160/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5161///
5162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5163#[inline]
5164#[target_feature(enable = "avx512bw,avx512vl")]
5165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5166pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5167    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5168}
5169
5170/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5171///
5172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5173#[inline]
5174#[target_feature(enable = "avx512bw,avx512vl")]
5175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5176pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5177    unsafe { simd_reduce_min(a.as_i16x8()) }
5178}
5179
5180/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5183#[inline]
5184#[target_feature(enable = "avx512bw,avx512vl")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5187    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5188}
5189
5190/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5191///
5192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5193#[inline]
5194#[target_feature(enable = "avx512bw,avx512vl")]
5195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5196pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5197    unsafe { simd_reduce_min(a.as_i8x32()) }
5198}
5199
5200/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5201///
5202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5203#[inline]
5204#[target_feature(enable = "avx512bw,avx512vl")]
5205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5206pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5207    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5208}
5209
5210/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5211///
5212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5213#[inline]
5214#[target_feature(enable = "avx512bw,avx512vl")]
5215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5216pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5217    unsafe { simd_reduce_min(a.as_i8x16()) }
5218}
5219
5220/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5221///
5222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5223#[inline]
5224#[target_feature(enable = "avx512bw,avx512vl")]
5225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5226pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5227    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5228}
5229
5230/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5231///
5232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5233#[inline]
5234#[target_feature(enable = "avx512bw,avx512vl")]
5235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5236pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5237    unsafe { simd_reduce_min(a.as_u16x16()) }
5238}
5239
5240/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5247    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5248}
5249
5250/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5251///
5252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5253#[inline]
5254#[target_feature(enable = "avx512bw,avx512vl")]
5255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5256pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5257    unsafe { simd_reduce_min(a.as_u16x8()) }
5258}
5259
5260/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5261///
5262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5263#[inline]
5264#[target_feature(enable = "avx512bw,avx512vl")]
5265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5266pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5267    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5268}
5269
5270/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5271///
5272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5273#[inline]
5274#[target_feature(enable = "avx512bw,avx512vl")]
5275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5276pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5277    unsafe { simd_reduce_min(a.as_u8x32()) }
5278}
5279
5280/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5281///
5282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5283#[inline]
5284#[target_feature(enable = "avx512bw,avx512vl")]
5285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5286pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5287    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5288}
5289
5290/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5297    unsafe { simd_reduce_min(a.as_u8x16()) }
5298}
5299
5300/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5301///
5302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5303#[inline]
5304#[target_feature(enable = "avx512bw,avx512vl")]
5305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5306pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5307    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5308}
5309
5310/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5311///
5312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5313#[inline]
5314#[target_feature(enable = "avx512bw,avx512vl")]
5315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5316pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5317    unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
5318}
5319
5320/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5323#[inline]
5324#[target_feature(enable = "avx512bw,avx512vl")]
5325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5326pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5327    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
5328}
5329
5330/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5331///
5332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5333#[inline]
5334#[target_feature(enable = "avx512bw,avx512vl")]
5335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5336pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5337    unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
5338}
5339
5340/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5341///
5342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5343#[inline]
5344#[target_feature(enable = "avx512bw,avx512vl")]
5345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5346pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5347    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
5348}
5349
5350/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5351///
5352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5353#[inline]
5354#[target_feature(enable = "avx512bw,avx512vl")]
5355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5356pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5357    unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
5358}
5359
5360/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5363#[inline]
5364#[target_feature(enable = "avx512bw,avx512vl")]
5365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5366pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5367    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
5368}
5369
5370/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5371///
5372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5373#[inline]
5374#[target_feature(enable = "avx512bw,avx512vl")]
5375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5376pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5377    unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
5378}
5379
5380/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5381///
5382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5383#[inline]
5384#[target_feature(enable = "avx512bw,avx512vl")]
5385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5386pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5387    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
5388}
5389
5390/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5391///
5392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5393#[inline]
5394#[target_feature(enable = "avx512bw,avx512vl")]
5395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5396pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5397    unsafe { simd_reduce_or(a.as_i16x16()) }
5398}
5399
5400/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5403#[inline]
5404#[target_feature(enable = "avx512bw,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5407    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5408}
5409
5410/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5411///
5412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5413#[inline]
5414#[target_feature(enable = "avx512bw,avx512vl")]
5415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5416pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5417    unsafe { simd_reduce_or(a.as_i16x8()) }
5418}
5419
5420/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5421///
5422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5423#[inline]
5424#[target_feature(enable = "avx512bw,avx512vl")]
5425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5426pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5427    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5428}
5429
5430/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5431///
5432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5433#[inline]
5434#[target_feature(enable = "avx512bw,avx512vl")]
5435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5436pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5437    unsafe { simd_reduce_or(a.as_i8x32()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5447    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5448}
5449
5450/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5451///
5452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5453#[inline]
5454#[target_feature(enable = "avx512bw,avx512vl")]
5455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5456pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5457    unsafe { simd_reduce_or(a.as_i8x16()) }
5458}
5459
5460/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5461///
5462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5463#[inline]
5464#[target_feature(enable = "avx512bw,avx512vl")]
5465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5466pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5467    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5468}
5469
5470/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5471///
5472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5473#[inline]
5474#[target_feature(enable = "avx512bw")]
5475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5476#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5477pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5478    ptr::read_unaligned(mem_addr as *const __m512i)
5479}
5480
5481/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5482///
5483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5484#[inline]
5485#[target_feature(enable = "avx512bw,avx512vl")]
5486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5487#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5488pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5489    ptr::read_unaligned(mem_addr as *const __m256i)
5490}
5491
5492/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5493///
5494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5495#[inline]
5496#[target_feature(enable = "avx512bw,avx512vl")]
5497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5498#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5499pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5500    ptr::read_unaligned(mem_addr as *const __m128i)
5501}
5502
5503/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5504///
5505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5506#[inline]
5507#[target_feature(enable = "avx512bw")]
5508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5509#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5510pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5511    ptr::read_unaligned(mem_addr as *const __m512i)
5512}
5513
5514/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5515///
5516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5517#[inline]
5518#[target_feature(enable = "avx512bw,avx512vl")]
5519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5520#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5521pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5522    ptr::read_unaligned(mem_addr as *const __m256i)
5523}
5524
5525/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5526///
5527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5528#[inline]
5529#[target_feature(enable = "avx512bw,avx512vl")]
5530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5531#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5532pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5533    ptr::read_unaligned(mem_addr as *const __m128i)
5534}
5535
5536/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5537///
5538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5539#[inline]
5540#[target_feature(enable = "avx512bw")]
5541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5542#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5543pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5544    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5545}
5546
5547/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5548///
5549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5550#[inline]
5551#[target_feature(enable = "avx512bw,avx512vl")]
5552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5553#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5554pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5555    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5556}
5557
5558/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5559///
5560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5561#[inline]
5562#[target_feature(enable = "avx512bw,avx512vl")]
5563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5564#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5565pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5566    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5567}
5568
5569/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5570///
5571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
5572#[inline]
5573#[target_feature(enable = "avx512bw")]
5574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5575#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5576pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
5577    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5578}
5579
5580/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5581///
5582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
5583#[inline]
5584#[target_feature(enable = "avx512bw,avx512vl")]
5585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5586#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5587pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
5588    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5589}
5590
5591/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5592///
5593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
5594#[inline]
5595#[target_feature(enable = "avx512bw,avx512vl")]
5596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5597#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5598pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
5599    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5600}
5601
5602/// Load packed 16-bit integers from memory into dst using writemask k
5603/// (elements are copied from src when the corresponding mask bit is not set).
5604/// mem_addr does not need to be aligned on any particular boundary.
5605///
5606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
5607#[inline]
5608#[target_feature(enable = "avx512bw")]
5609#[cfg_attr(test, assert_instr(vmovdqu16))]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
5612    transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k))
5613}
5614
5615/// Load packed 16-bit integers from memory into dst using zeromask k
5616/// (elements are zeroed out when the corresponding mask bit is not set).
5617/// mem_addr does not need to be aligned on any particular boundary.
5618///
5619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
5620#[inline]
5621#[target_feature(enable = "avx512bw")]
5622#[cfg_attr(test, assert_instr(vmovdqu16))]
5623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5624pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
5625    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
5626}
5627
5628/// Load packed 8-bit integers from memory into dst using writemask k
5629/// (elements are copied from src when the corresponding mask bit is not set).
5630/// mem_addr does not need to be aligned on any particular boundary.
5631///
5632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
5633#[inline]
5634#[target_feature(enable = "avx512bw")]
5635#[cfg_attr(test, assert_instr(vmovdqu8))]
5636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5637pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
5638    transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k))
5639}
5640
5641/// Load packed 8-bit integers from memory into dst using zeromask k
5642/// (elements are zeroed out when the corresponding mask bit is not set).
5643/// mem_addr does not need to be aligned on any particular boundary.
5644///
5645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
5646#[inline]
5647#[target_feature(enable = "avx512bw")]
5648#[cfg_attr(test, assert_instr(vmovdqu8))]
5649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5650pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
5651    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
5652}
5653
5654/// Load packed 16-bit integers from memory into dst using writemask k
5655/// (elements are copied from src when the corresponding mask bit is not set).
5656/// mem_addr does not need to be aligned on any particular boundary.
5657///
5658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
5659#[inline]
5660#[target_feature(enable = "avx512bw,avx512vl")]
5661#[cfg_attr(test, assert_instr(vmovdqu16))]
5662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5663pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
5664    transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k))
5665}
5666
5667/// Load packed 16-bit integers from memory into dst using zeromask k
5668/// (elements are zeroed out when the corresponding mask bit is not set).
5669/// mem_addr does not need to be aligned on any particular boundary.
5670///
5671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
5672#[inline]
5673#[target_feature(enable = "avx512bw,avx512vl")]
5674#[cfg_attr(test, assert_instr(vmovdqu16))]
5675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5676pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
5677    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
5678}
5679
5680/// Load packed 8-bit integers from memory into dst using writemask k
5681/// (elements are copied from src when the corresponding mask bit is not set).
5682/// mem_addr does not need to be aligned on any particular boundary.
5683///
5684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
5685#[inline]
5686#[target_feature(enable = "avx512bw,avx512vl")]
5687#[cfg_attr(test, assert_instr(vmovdqu8))]
5688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5689pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
5690    transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k))
5691}
5692
5693/// Load packed 8-bit integers from memory into dst using zeromask k
5694/// (elements are zeroed out when the corresponding mask bit is not set).
5695/// mem_addr does not need to be aligned on any particular boundary.
5696///
5697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
5698#[inline]
5699#[target_feature(enable = "avx512bw,avx512vl")]
5700#[cfg_attr(test, assert_instr(vmovdqu8))]
5701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5702pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
5703    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
5704}
5705
5706/// Load packed 16-bit integers from memory into dst using writemask k
5707/// (elements are copied from src when the corresponding mask bit is not set).
5708/// mem_addr does not need to be aligned on any particular boundary.
5709///
5710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
5711#[inline]
5712#[target_feature(enable = "avx512bw,avx512vl")]
5713#[cfg_attr(test, assert_instr(vmovdqu16))]
5714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5715pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
5716    transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k))
5717}
5718
5719/// Load packed 16-bit integers from memory into dst using zeromask k
5720/// (elements are zeroed out when the corresponding mask bit is not set).
5721/// mem_addr does not need to be aligned on any particular boundary.
5722///
5723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
5724#[inline]
5725#[target_feature(enable = "avx512bw,avx512vl")]
5726#[cfg_attr(test, assert_instr(vmovdqu16))]
5727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5728pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
5729    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
5730}
5731
5732/// Load packed 8-bit integers from memory into dst using writemask k
5733/// (elements are copied from src when the corresponding mask bit is not set).
5734/// mem_addr does not need to be aligned on any particular boundary.
5735///
5736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
5737#[inline]
5738#[target_feature(enable = "avx512bw,avx512vl")]
5739#[cfg_attr(test, assert_instr(vmovdqu8))]
5740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5741pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
5742    transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k))
5743}
5744
5745/// Load packed 8-bit integers from memory into dst using zeromask k
5746/// (elements are zeroed out when the corresponding mask bit is not set).
5747/// mem_addr does not need to be aligned on any particular boundary.
5748///
5749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
5750#[inline]
5751#[target_feature(enable = "avx512bw,avx512vl")]
5752#[cfg_attr(test, assert_instr(vmovdqu8))]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
5755    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
5756}
5757
5758/// Store packed 16-bit integers from a into memory using writemask k.
5759/// mem_addr does not need to be aligned on any particular boundary.
5760///
5761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
5762#[inline]
5763#[target_feature(enable = "avx512bw")]
5764#[cfg_attr(test, assert_instr(vmovdqu16))]
5765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5766pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
5767    storedqu16_512(mem_addr, a.as_i16x32(), mask)
5768}
5769
5770/// Store packed 8-bit integers from a into memory using writemask k.
5771/// mem_addr does not need to be aligned on any particular boundary.
5772///
5773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
5774#[inline]
5775#[target_feature(enable = "avx512bw")]
5776#[cfg_attr(test, assert_instr(vmovdqu8))]
5777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5778pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
5779    storedqu8_512(mem_addr, a.as_i8x64(), mask)
5780}
5781
5782/// Store packed 16-bit integers from a into memory using writemask k.
5783/// mem_addr does not need to be aligned on any particular boundary.
5784///
5785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
5786#[inline]
5787#[target_feature(enable = "avx512bw,avx512vl")]
5788#[cfg_attr(test, assert_instr(vmovdqu16))]
5789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5790pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
5791    storedqu16_256(mem_addr, a.as_i16x16(), mask)
5792}
5793
5794/// Store packed 8-bit integers from a into memory using writemask k.
5795/// mem_addr does not need to be aligned on any particular boundary.
5796///
5797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
5798#[inline]
5799#[target_feature(enable = "avx512bw,avx512vl")]
5800#[cfg_attr(test, assert_instr(vmovdqu8))]
5801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5802pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
5803    storedqu8_256(mem_addr, a.as_i8x32(), mask)
5804}
5805
5806/// Store packed 16-bit integers from a into memory using writemask k.
5807/// mem_addr does not need to be aligned on any particular boundary.
5808///
5809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
5810#[inline]
5811#[target_feature(enable = "avx512bw,avx512vl")]
5812#[cfg_attr(test, assert_instr(vmovdqu16))]
5813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5814pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
5815    storedqu16_128(mem_addr, a.as_i16x8(), mask)
5816}
5817
5818/// Store packed 8-bit integers from a into memory using writemask k.
5819/// mem_addr does not need to be aligned on any particular boundary.
5820///
5821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
5822#[inline]
5823#[target_feature(enable = "avx512bw,avx512vl")]
5824#[cfg_attr(test, assert_instr(vmovdqu8))]
5825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5826pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
5827    storedqu8_128(mem_addr, a.as_i8x16(), mask)
5828}
5829
5830/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
5831///
5832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
5833#[inline]
5834#[target_feature(enable = "avx512bw")]
5835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5836#[cfg_attr(test, assert_instr(vpmaddwd))]
5837pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5838    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
5839}
5840
5841/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5842///
5843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
5844#[inline]
5845#[target_feature(enable = "avx512bw")]
5846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5847#[cfg_attr(test, assert_instr(vpmaddwd))]
5848pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5849    unsafe {
5850        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5851        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
5852    }
5853}
5854
5855/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5856///
5857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
5858#[inline]
5859#[target_feature(enable = "avx512bw")]
5860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5861#[cfg_attr(test, assert_instr(vpmaddwd))]
5862pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5863    unsafe {
5864        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5865        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
5866    }
5867}
5868
5869/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5870///
5871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
5872#[inline]
5873#[target_feature(enable = "avx512bw,avx512vl")]
5874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5875#[cfg_attr(test, assert_instr(vpmaddwd))]
5876pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5877    unsafe {
5878        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5879        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
5880    }
5881}
5882
5883/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5884///
5885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
5886#[inline]
5887#[target_feature(enable = "avx512bw,avx512vl")]
5888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5889#[cfg_attr(test, assert_instr(vpmaddwd))]
5890pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5891    unsafe {
5892        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5893        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
5894    }
5895}
5896
5897/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5898///
5899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
5900#[inline]
5901#[target_feature(enable = "avx512bw,avx512vl")]
5902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5903#[cfg_attr(test, assert_instr(vpmaddwd))]
5904pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5905    unsafe {
5906        let madd = _mm_madd_epi16(a, b).as_i32x4();
5907        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
5908    }
5909}
5910
5911/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5912///
5913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
5914#[inline]
5915#[target_feature(enable = "avx512bw,avx512vl")]
5916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5917#[cfg_attr(test, assert_instr(vpmaddwd))]
5918pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5919    unsafe {
5920        let madd = _mm_madd_epi16(a, b).as_i32x4();
5921        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
5922    }
5923}
5924
5925/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
5926///
5927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
5928#[inline]
5929#[target_feature(enable = "avx512bw")]
5930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5931#[cfg_attr(test, assert_instr(vpmaddubsw))]
5932pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
5933    unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
5934}
5935
5936/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5937///
5938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
5939#[inline]
5940#[target_feature(enable = "avx512bw")]
5941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5942#[cfg_attr(test, assert_instr(vpmaddubsw))]
5943pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5944    unsafe {
5945        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5946        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
5947    }
5948}
5949
5950/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5951///
5952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
5953#[inline]
5954#[target_feature(enable = "avx512bw")]
5955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5956#[cfg_attr(test, assert_instr(vpmaddubsw))]
5957pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5958    unsafe {
5959        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5960        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
5961    }
5962}
5963
5964/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5965///
5966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
5967#[inline]
5968#[target_feature(enable = "avx512bw,avx512vl")]
5969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5970#[cfg_attr(test, assert_instr(vpmaddubsw))]
5971pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5972    unsafe {
5973        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5974        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
5975    }
5976}
5977
5978/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5979///
5980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
5981#[inline]
5982#[target_feature(enable = "avx512bw,avx512vl")]
5983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5984#[cfg_attr(test, assert_instr(vpmaddubsw))]
5985pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5986    unsafe {
5987        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5988        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
5989    }
5990}
5991
5992/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5993///
5994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
5995#[inline]
5996#[target_feature(enable = "avx512bw,avx512vl")]
5997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5998#[cfg_attr(test, assert_instr(vpmaddubsw))]
5999pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6000    unsafe {
6001        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6002        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6003    }
6004}
6005
6006/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6007///
6008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6009#[inline]
6010#[target_feature(enable = "avx512bw,avx512vl")]
6011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6012#[cfg_attr(test, assert_instr(vpmaddubsw))]
6013pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6014    unsafe {
6015        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6016        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6017    }
6018}
6019
6020/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6021///
6022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6023#[inline]
6024#[target_feature(enable = "avx512bw")]
6025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6026#[cfg_attr(test, assert_instr(vpackssdw))]
6027pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6028    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6029}
6030
6031/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6032///
6033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6034#[inline]
6035#[target_feature(enable = "avx512bw")]
6036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6037#[cfg_attr(test, assert_instr(vpackssdw))]
6038pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6039    unsafe {
6040        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6041        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6042    }
6043}
6044
6045/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6046///
6047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6048#[inline]
6049#[target_feature(enable = "avx512bw")]
6050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6051#[cfg_attr(test, assert_instr(vpackssdw))]
6052pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6053    unsafe {
6054        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6055        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6056    }
6057}
6058
6059/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6060///
6061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6062#[inline]
6063#[target_feature(enable = "avx512bw,avx512vl")]
6064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6065#[cfg_attr(test, assert_instr(vpackssdw))]
6066pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6067    unsafe {
6068        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6069        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6070    }
6071}
6072
6073/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6076#[inline]
6077#[target_feature(enable = "avx512bw,avx512vl")]
6078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6079#[cfg_attr(test, assert_instr(vpackssdw))]
6080pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6081    unsafe {
6082        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6083        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6084    }
6085}
6086
6087/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6088///
6089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6090#[inline]
6091#[target_feature(enable = "avx512bw,avx512vl")]
6092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6093#[cfg_attr(test, assert_instr(vpackssdw))]
6094pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6095    unsafe {
6096        let pack = _mm_packs_epi32(a, b).as_i16x8();
6097        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6098    }
6099}
6100
6101/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6102///
6103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6104#[inline]
6105#[target_feature(enable = "avx512bw,avx512vl")]
6106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6107#[cfg_attr(test, assert_instr(vpackssdw))]
6108pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6109    unsafe {
6110        let pack = _mm_packs_epi32(a, b).as_i16x8();
6111        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6112    }
6113}
6114
6115/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6116///
6117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6118#[inline]
6119#[target_feature(enable = "avx512bw")]
6120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6121#[cfg_attr(test, assert_instr(vpacksswb))]
6122pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6123    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6124}
6125
6126/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6127///
6128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6129#[inline]
6130#[target_feature(enable = "avx512bw")]
6131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6132#[cfg_attr(test, assert_instr(vpacksswb))]
6133pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6134    unsafe {
6135        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6136        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6137    }
6138}
6139
6140/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6141///
6142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6143#[inline]
6144#[target_feature(enable = "avx512bw")]
6145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6146#[cfg_attr(test, assert_instr(vpacksswb))]
6147pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6148    unsafe {
6149        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6150        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6151    }
6152}
6153
6154/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6155///
6156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6157#[inline]
6158#[target_feature(enable = "avx512bw,avx512vl")]
6159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6160#[cfg_attr(test, assert_instr(vpacksswb))]
6161pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6162    unsafe {
6163        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6164        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6165    }
6166}
6167
6168/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6169///
6170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6171#[inline]
6172#[target_feature(enable = "avx512bw,avx512vl")]
6173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6174#[cfg_attr(test, assert_instr(vpacksswb))]
6175pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6176    unsafe {
6177        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6178        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6179    }
6180}
6181
6182/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6183///
6184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6185#[inline]
6186#[target_feature(enable = "avx512bw,avx512vl")]
6187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6188#[cfg_attr(test, assert_instr(vpacksswb))]
6189pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6190    unsafe {
6191        let pack = _mm_packs_epi16(a, b).as_i8x16();
6192        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6193    }
6194}
6195
6196/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6197///
6198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6199#[inline]
6200#[target_feature(enable = "avx512bw,avx512vl")]
6201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6202#[cfg_attr(test, assert_instr(vpacksswb))]
6203pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6204    unsafe {
6205        let pack = _mm_packs_epi16(a, b).as_i8x16();
6206        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6207    }
6208}
6209
6210/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6213#[inline]
6214#[target_feature(enable = "avx512bw")]
6215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6216#[cfg_attr(test, assert_instr(vpackusdw))]
6217pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6218    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6219}
6220
6221/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6224#[inline]
6225#[target_feature(enable = "avx512bw")]
6226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6227#[cfg_attr(test, assert_instr(vpackusdw))]
6228pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6229    unsafe {
6230        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6231        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6232    }
6233}
6234
6235/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6236///
6237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6238#[inline]
6239#[target_feature(enable = "avx512bw")]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241#[cfg_attr(test, assert_instr(vpackusdw))]
6242pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6243    unsafe {
6244        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6245        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6246    }
6247}
6248
6249/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6250///
6251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6252#[inline]
6253#[target_feature(enable = "avx512bw,avx512vl")]
6254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6255#[cfg_attr(test, assert_instr(vpackusdw))]
6256pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6257    unsafe {
6258        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6259        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6260    }
6261}
6262
6263/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6264///
6265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6266#[inline]
6267#[target_feature(enable = "avx512bw,avx512vl")]
6268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6269#[cfg_attr(test, assert_instr(vpackusdw))]
6270pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6271    unsafe {
6272        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6273        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6274    }
6275}
6276
6277/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6278///
6279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6280#[inline]
6281#[target_feature(enable = "avx512bw,avx512vl")]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[cfg_attr(test, assert_instr(vpackusdw))]
6284pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6285    unsafe {
6286        let pack = _mm_packus_epi32(a, b).as_i16x8();
6287        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6288    }
6289}
6290
6291/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6292///
6293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6294#[inline]
6295#[target_feature(enable = "avx512bw,avx512vl")]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297#[cfg_attr(test, assert_instr(vpackusdw))]
6298pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6299    unsafe {
6300        let pack = _mm_packus_epi32(a, b).as_i16x8();
6301        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6302    }
6303}
6304
6305/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6306///
6307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6308#[inline]
6309#[target_feature(enable = "avx512bw")]
6310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6311#[cfg_attr(test, assert_instr(vpackuswb))]
6312pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6313    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6314}
6315
6316/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6317///
6318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6319#[inline]
6320#[target_feature(enable = "avx512bw")]
6321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6322#[cfg_attr(test, assert_instr(vpackuswb))]
6323pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6324    unsafe {
6325        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6326        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6327    }
6328}
6329
6330/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6331///
6332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6333#[inline]
6334#[target_feature(enable = "avx512bw")]
6335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6336#[cfg_attr(test, assert_instr(vpackuswb))]
6337pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6338    unsafe {
6339        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6340        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6341    }
6342}
6343
6344/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6345///
6346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6347#[inline]
6348#[target_feature(enable = "avx512bw,avx512vl")]
6349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6350#[cfg_attr(test, assert_instr(vpackuswb))]
6351pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6352    unsafe {
6353        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6354        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6355    }
6356}
6357
6358/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6359///
6360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6361#[inline]
6362#[target_feature(enable = "avx512bw,avx512vl")]
6363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6364#[cfg_attr(test, assert_instr(vpackuswb))]
6365pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6366    unsafe {
6367        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6368        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6369    }
6370}
6371
6372/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6373///
6374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6375#[inline]
6376#[target_feature(enable = "avx512bw,avx512vl")]
6377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6378#[cfg_attr(test, assert_instr(vpackuswb))]
6379pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6380    unsafe {
6381        let pack = _mm_packus_epi16(a, b).as_i8x16();
6382        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6383    }
6384}
6385
6386/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6387///
6388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6389#[inline]
6390#[target_feature(enable = "avx512bw,avx512vl")]
6391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6392#[cfg_attr(test, assert_instr(vpackuswb))]
6393pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6394    unsafe {
6395        let pack = _mm_packus_epi16(a, b).as_i8x16();
6396        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6397    }
6398}
6399
6400/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6401///
6402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6403#[inline]
6404#[target_feature(enable = "avx512bw")]
6405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6406#[cfg_attr(test, assert_instr(vpavgw))]
6407pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6408    unsafe {
6409        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6410        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6411        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6412        transmute(simd_cast::<_, u16x32>(r))
6413    }
6414}
6415
6416/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6417///
6418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6419#[inline]
6420#[target_feature(enable = "avx512bw")]
6421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6422#[cfg_attr(test, assert_instr(vpavgw))]
6423pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6424    unsafe {
6425        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6426        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6427    }
6428}
6429
6430/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6431///
6432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6433#[inline]
6434#[target_feature(enable = "avx512bw")]
6435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6436#[cfg_attr(test, assert_instr(vpavgw))]
6437pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6438    unsafe {
6439        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6440        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6441    }
6442}
6443
6444/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6445///
6446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6447#[inline]
6448#[target_feature(enable = "avx512bw,avx512vl")]
6449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6450#[cfg_attr(test, assert_instr(vpavgw))]
6451pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6452    unsafe {
6453        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6454        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6455    }
6456}
6457
6458/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6459///
6460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6461#[inline]
6462#[target_feature(enable = "avx512bw,avx512vl")]
6463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6464#[cfg_attr(test, assert_instr(vpavgw))]
6465pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6466    unsafe {
6467        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6468        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6469    }
6470}
6471
6472/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6473///
6474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6475#[inline]
6476#[target_feature(enable = "avx512bw,avx512vl")]
6477#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6478#[cfg_attr(test, assert_instr(vpavgw))]
6479pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6480    unsafe {
6481        let avg = _mm_avg_epu16(a, b).as_u16x8();
6482        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6483    }
6484}
6485
6486/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6487///
6488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6489#[inline]
6490#[target_feature(enable = "avx512bw,avx512vl")]
6491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6492#[cfg_attr(test, assert_instr(vpavgw))]
6493pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6494    unsafe {
6495        let avg = _mm_avg_epu16(a, b).as_u16x8();
6496        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
6497    }
6498}
6499
6500/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
6503#[inline]
6504#[target_feature(enable = "avx512bw")]
6505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6506#[cfg_attr(test, assert_instr(vpavgb))]
6507pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
6508    unsafe {
6509        let a = simd_cast::<_, u16x64>(a.as_u8x64());
6510        let b = simd_cast::<_, u16x64>(b.as_u8x64());
6511        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
6512        transmute(simd_cast::<_, u8x64>(r))
6513    }
6514}
6515
6516/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6517///
6518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
6519#[inline]
6520#[target_feature(enable = "avx512bw")]
6521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6522#[cfg_attr(test, assert_instr(vpavgb))]
6523pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6524    unsafe {
6525        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6526        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
6527    }
6528}
6529
6530/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6531///
6532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
6533#[inline]
6534#[target_feature(enable = "avx512bw")]
6535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6536#[cfg_attr(test, assert_instr(vpavgb))]
6537pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6538    unsafe {
6539        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6540        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
6541    }
6542}
6543
6544/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6545///
6546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
6547#[inline]
6548#[target_feature(enable = "avx512bw,avx512vl")]
6549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6550#[cfg_attr(test, assert_instr(vpavgb))]
6551pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6552    unsafe {
6553        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6554        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
6555    }
6556}
6557
6558/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6559///
6560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
6561#[inline]
6562#[target_feature(enable = "avx512bw,avx512vl")]
6563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6564#[cfg_attr(test, assert_instr(vpavgb))]
6565pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6566    unsafe {
6567        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6568        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
6569    }
6570}
6571
6572/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6573///
6574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
6575#[inline]
6576#[target_feature(enable = "avx512bw,avx512vl")]
6577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6578#[cfg_attr(test, assert_instr(vpavgb))]
6579pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6580    unsafe {
6581        let avg = _mm_avg_epu8(a, b).as_u8x16();
6582        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
6583    }
6584}
6585
6586/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
6589#[inline]
6590#[target_feature(enable = "avx512bw,avx512vl")]
6591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6592#[cfg_attr(test, assert_instr(vpavgb))]
6593pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6594    unsafe {
6595        let avg = _mm_avg_epu8(a, b).as_u8x16();
6596        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
6597    }
6598}
6599
6600/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
6601///
6602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
6603#[inline]
6604#[target_feature(enable = "avx512bw")]
6605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6606#[cfg_attr(test, assert_instr(vpsllw))]
6607pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
6608    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
6609}
6610
6611/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6612///
6613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
6614#[inline]
6615#[target_feature(enable = "avx512bw")]
6616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6617#[cfg_attr(test, assert_instr(vpsllw))]
6618pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6619    unsafe {
6620        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6621        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6622    }
6623}
6624
6625/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6626///
6627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
6628#[inline]
6629#[target_feature(enable = "avx512bw")]
6630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6631#[cfg_attr(test, assert_instr(vpsllw))]
6632pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6633    unsafe {
6634        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6635        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6636    }
6637}
6638
6639/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6640///
6641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
6642#[inline]
6643#[target_feature(enable = "avx512bw,avx512vl")]
6644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6645#[cfg_attr(test, assert_instr(vpsllw))]
6646pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6647    unsafe {
6648        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6649        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6650    }
6651}
6652
6653/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
6656#[inline]
6657#[target_feature(enable = "avx512bw,avx512vl")]
6658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6659#[cfg_attr(test, assert_instr(vpsllw))]
6660pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6661    unsafe {
6662        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6663        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6664    }
6665}
6666
6667/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6668///
6669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
6670#[inline]
6671#[target_feature(enable = "avx512bw,avx512vl")]
6672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6673#[cfg_attr(test, assert_instr(vpsllw))]
6674pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6675    unsafe {
6676        let shf = _mm_sll_epi16(a, count).as_i16x8();
6677        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6678    }
6679}
6680
6681/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6682///
6683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
6684#[inline]
6685#[target_feature(enable = "avx512bw,avx512vl")]
6686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6687#[cfg_attr(test, assert_instr(vpsllw))]
6688pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6689    unsafe {
6690        let shf = _mm_sll_epi16(a, count).as_i16x8();
6691        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6692    }
6693}
6694
6695/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6696///
6697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
6698#[inline]
6699#[target_feature(enable = "avx512bw")]
6700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6701#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6702#[rustc_legacy_const_generics(1)]
6703pub fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
6704    unsafe {
6705        static_assert_uimm_bits!(IMM8, 8);
6706        if IMM8 >= 16 {
6707            _mm512_setzero_si512()
6708        } else {
6709            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
6710        }
6711    }
6712}
6713
6714/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6715///
6716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
6717#[inline]
6718#[target_feature(enable = "avx512bw")]
6719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6721#[rustc_legacy_const_generics(3)]
6722pub fn _mm512_mask_slli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
6723    unsafe {
6724        static_assert_uimm_bits!(IMM8, 8);
6725        let shf = if IMM8 >= 16 {
6726            u16x32::ZERO
6727        } else {
6728            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
6729        };
6730        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
6731    }
6732}
6733
6734/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6735///
6736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
6737#[inline]
6738#[target_feature(enable = "avx512bw")]
6739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6740#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6741#[rustc_legacy_const_generics(2)]
6742pub fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
6743    unsafe {
6744        static_assert_uimm_bits!(IMM8, 8);
6745        if IMM8 >= 16 {
6746            _mm512_setzero_si512()
6747        } else {
6748            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
6749            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
6750        }
6751    }
6752}
6753
6754/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6755///
6756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
6757#[inline]
6758#[target_feature(enable = "avx512bw,avx512vl")]
6759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6760#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6761#[rustc_legacy_const_generics(3)]
6762pub fn _mm256_mask_slli_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
6763    unsafe {
6764        static_assert_uimm_bits!(IMM8, 8);
6765        let shf = if IMM8 >= 16 {
6766            u16x16::ZERO
6767        } else {
6768            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
6769        };
6770        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
6771    }
6772}
6773
6774/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6775///
6776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
6777#[inline]
6778#[target_feature(enable = "avx512bw,avx512vl")]
6779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6780#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6781#[rustc_legacy_const_generics(2)]
6782pub fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
6783    unsafe {
6784        static_assert_uimm_bits!(IMM8, 8);
6785        if IMM8 >= 16 {
6786            _mm256_setzero_si256()
6787        } else {
6788            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
6789            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
6790        }
6791    }
6792}
6793
6794/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6795///
6796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
6797#[inline]
6798#[target_feature(enable = "avx512bw,avx512vl")]
6799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6800#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6801#[rustc_legacy_const_generics(3)]
6802pub fn _mm_mask_slli_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6803    unsafe {
6804        static_assert_uimm_bits!(IMM8, 8);
6805        let shf = if IMM8 >= 16 {
6806            u16x8::ZERO
6807        } else {
6808            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
6809        };
6810        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
6811    }
6812}
6813
6814/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6815///
6816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
6817#[inline]
6818#[target_feature(enable = "avx512bw,avx512vl")]
6819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6820#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6821#[rustc_legacy_const_generics(2)]
6822pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
6823    unsafe {
6824        static_assert_uimm_bits!(IMM8, 8);
6825        if IMM8 >= 16 {
6826            _mm_setzero_si128()
6827        } else {
6828            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
6829            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
6830        }
6831    }
6832}
6833
6834/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6835///
6836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
6837#[inline]
6838#[target_feature(enable = "avx512bw")]
6839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6840#[cfg_attr(test, assert_instr(vpsllvw))]
6841pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
6842    unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
6843}
6844
6845/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6846///
6847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
6848#[inline]
6849#[target_feature(enable = "avx512bw")]
6850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6851#[cfg_attr(test, assert_instr(vpsllvw))]
6852pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6853    unsafe {
6854        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6855        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6856    }
6857}
6858
6859/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6860///
6861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
6862#[inline]
6863#[target_feature(enable = "avx512bw")]
6864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6865#[cfg_attr(test, assert_instr(vpsllvw))]
6866pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6867    unsafe {
6868        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6869        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6870    }
6871}
6872
6873/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6874///
6875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
6876#[inline]
6877#[target_feature(enable = "avx512bw,avx512vl")]
6878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6879#[cfg_attr(test, assert_instr(vpsllvw))]
6880pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
6881    unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
6882}
6883
6884/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6885///
6886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
6887#[inline]
6888#[target_feature(enable = "avx512bw,avx512vl")]
6889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6890#[cfg_attr(test, assert_instr(vpsllvw))]
6891pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6892    unsafe {
6893        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6894        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6895    }
6896}
6897
6898/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6899///
6900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
6901#[inline]
6902#[target_feature(enable = "avx512bw,avx512vl")]
6903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6904#[cfg_attr(test, assert_instr(vpsllvw))]
6905pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6906    unsafe {
6907        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6908        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6909    }
6910}
6911
6912/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6913///
6914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
6915#[inline]
6916#[target_feature(enable = "avx512bw,avx512vl")]
6917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6918#[cfg_attr(test, assert_instr(vpsllvw))]
6919pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
6920    unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
6921}
6922
6923/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6924///
6925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
6926#[inline]
6927#[target_feature(enable = "avx512bw,avx512vl")]
6928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6929#[cfg_attr(test, assert_instr(vpsllvw))]
6930pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6931    unsafe {
6932        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6933        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6934    }
6935}
6936
6937/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6938///
6939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
6940#[inline]
6941#[target_feature(enable = "avx512bw,avx512vl")]
6942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6943#[cfg_attr(test, assert_instr(vpsllvw))]
6944pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6945    unsafe {
6946        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6947        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6948    }
6949}
6950
6951/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
6952///
6953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
6954#[inline]
6955#[target_feature(enable = "avx512bw")]
6956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6957#[cfg_attr(test, assert_instr(vpsrlw))]
6958pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
6959    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
6960}
6961
6962/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6963///
6964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
6965#[inline]
6966#[target_feature(enable = "avx512bw")]
6967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6968#[cfg_attr(test, assert_instr(vpsrlw))]
6969pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6970    unsafe {
6971        let shf = _mm512_srl_epi16(a, count).as_i16x32();
6972        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6973    }
6974}
6975
6976/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6977///
6978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
6979#[inline]
6980#[target_feature(enable = "avx512bw")]
6981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6982#[cfg_attr(test, assert_instr(vpsrlw))]
6983pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6984    unsafe {
6985        let shf = _mm512_srl_epi16(a, count).as_i16x32();
6986        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6987    }
6988}
6989
6990/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6991///
6992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
6993#[inline]
6994#[target_feature(enable = "avx512bw,avx512vl")]
6995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6996#[cfg_attr(test, assert_instr(vpsrlw))]
6997pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6998    unsafe {
6999        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7000        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7001    }
7002}
7003
7004/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7005///
7006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7007#[inline]
7008#[target_feature(enable = "avx512bw,avx512vl")]
7009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7010#[cfg_attr(test, assert_instr(vpsrlw))]
7011pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7012    unsafe {
7013        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7014        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7015    }
7016}
7017
7018/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7019///
7020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7021#[inline]
7022#[target_feature(enable = "avx512bw,avx512vl")]
7023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7024#[cfg_attr(test, assert_instr(vpsrlw))]
7025pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7026    unsafe {
7027        let shf = _mm_srl_epi16(a, count).as_i16x8();
7028        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7029    }
7030}
7031
7032/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7033///
7034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7035#[inline]
7036#[target_feature(enable = "avx512bw,avx512vl")]
7037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7038#[cfg_attr(test, assert_instr(vpsrlw))]
7039pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7040    unsafe {
7041        let shf = _mm_srl_epi16(a, count).as_i16x8();
7042        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7043    }
7044}
7045
7046/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7047///
7048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7049#[inline]
7050#[target_feature(enable = "avx512bw")]
7051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7052#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7053#[rustc_legacy_const_generics(1)]
7054pub fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7055    unsafe {
7056        static_assert_uimm_bits!(IMM8, 8);
7057        if IMM8 >= 16 {
7058            _mm512_setzero_si512()
7059        } else {
7060            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7061        }
7062    }
7063}
7064
7065/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7066///
7067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7068#[inline]
7069#[target_feature(enable = "avx512bw")]
7070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7071#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7072#[rustc_legacy_const_generics(3)]
7073pub fn _mm512_mask_srli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7074    unsafe {
7075        static_assert_uimm_bits!(IMM8, 8);
7076        let shf = if IMM8 >= 16 {
7077            u16x32::ZERO
7078        } else {
7079            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7080        };
7081        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7082    }
7083}
7084
7085/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7086///
7087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7088#[inline]
7089#[target_feature(enable = "avx512bw")]
7090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7091#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7092#[rustc_legacy_const_generics(2)]
7093pub fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7094    unsafe {
7095        static_assert_uimm_bits!(IMM8, 8);
7096        //imm8 should be u32, it seems the document to verify is incorrect
7097        if IMM8 >= 16 {
7098            _mm512_setzero_si512()
7099        } else {
7100            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7101            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7102        }
7103    }
7104}
7105
7106/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7107///
7108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7109#[inline]
7110#[target_feature(enable = "avx512bw,avx512vl")]
7111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7112#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7113#[rustc_legacy_const_generics(3)]
7114pub fn _mm256_mask_srli_epi16<const IMM8: i32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7115    unsafe {
7116        static_assert_uimm_bits!(IMM8, 8);
7117        let shf = _mm256_srli_epi16::<IMM8>(a);
7118        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7119    }
7120}
7121
7122/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7123///
7124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7125#[inline]
7126#[target_feature(enable = "avx512bw,avx512vl")]
7127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7128#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7129#[rustc_legacy_const_generics(2)]
7130pub fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7131    unsafe {
7132        static_assert_uimm_bits!(IMM8, 8);
7133        let shf = _mm256_srli_epi16::<IMM8>(a);
7134        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7135    }
7136}
7137
7138/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7139///
7140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7141#[inline]
7142#[target_feature(enable = "avx512bw,avx512vl")]
7143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7144#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7145#[rustc_legacy_const_generics(3)]
7146pub fn _mm_mask_srli_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7147    unsafe {
7148        static_assert_uimm_bits!(IMM8, 8);
7149        let shf = _mm_srli_epi16::<IMM8>(a);
7150        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7151    }
7152}
7153
7154/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7155///
7156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7157#[inline]
7158#[target_feature(enable = "avx512bw,avx512vl")]
7159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7160#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7161#[rustc_legacy_const_generics(2)]
7162pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7163    unsafe {
7164        static_assert_uimm_bits!(IMM8, 8);
7165        let shf = _mm_srli_epi16::<IMM8>(a);
7166        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7167    }
7168}
7169
7170/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7171///
7172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7173#[inline]
7174#[target_feature(enable = "avx512bw")]
7175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7176#[cfg_attr(test, assert_instr(vpsrlvw))]
7177pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7178    unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
7179}
7180
7181/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7182///
7183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7184#[inline]
7185#[target_feature(enable = "avx512bw")]
7186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7187#[cfg_attr(test, assert_instr(vpsrlvw))]
7188pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7189    unsafe {
7190        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7191        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7192    }
7193}
7194
7195/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7196///
7197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7198#[inline]
7199#[target_feature(enable = "avx512bw")]
7200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7201#[cfg_attr(test, assert_instr(vpsrlvw))]
7202pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7203    unsafe {
7204        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7205        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7206    }
7207}
7208
7209/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7210///
7211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7212#[inline]
7213#[target_feature(enable = "avx512bw,avx512vl")]
7214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7215#[cfg_attr(test, assert_instr(vpsrlvw))]
7216pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7217    unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
7218}
7219
7220/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7221///
7222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7223#[inline]
7224#[target_feature(enable = "avx512bw,avx512vl")]
7225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7226#[cfg_attr(test, assert_instr(vpsrlvw))]
7227pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7228    unsafe {
7229        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7230        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7231    }
7232}
7233
7234/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7235///
7236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7237#[inline]
7238#[target_feature(enable = "avx512bw,avx512vl")]
7239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7240#[cfg_attr(test, assert_instr(vpsrlvw))]
7241pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7242    unsafe {
7243        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7244        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7245    }
7246}
7247
7248/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7249///
7250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7251#[inline]
7252#[target_feature(enable = "avx512bw,avx512vl")]
7253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7254#[cfg_attr(test, assert_instr(vpsrlvw))]
7255pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7256    unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
7257}
7258
7259/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7260///
7261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7262#[inline]
7263#[target_feature(enable = "avx512bw,avx512vl")]
7264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7265#[cfg_attr(test, assert_instr(vpsrlvw))]
7266pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7267    unsafe {
7268        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7269        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7270    }
7271}
7272
7273/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7274///
7275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7276#[inline]
7277#[target_feature(enable = "avx512bw,avx512vl")]
7278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7279#[cfg_attr(test, assert_instr(vpsrlvw))]
7280pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7281    unsafe {
7282        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7283        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7284    }
7285}
7286
7287/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7288///
7289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7290#[inline]
7291#[target_feature(enable = "avx512bw")]
7292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7293#[cfg_attr(test, assert_instr(vpsraw))]
7294pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7295    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7296}
7297
7298/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7299///
7300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7301#[inline]
7302#[target_feature(enable = "avx512bw")]
7303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7304#[cfg_attr(test, assert_instr(vpsraw))]
7305pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7306    unsafe {
7307        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7308        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7309    }
7310}
7311
7312/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7313///
7314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7315#[inline]
7316#[target_feature(enable = "avx512bw")]
7317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7318#[cfg_attr(test, assert_instr(vpsraw))]
7319pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7320    unsafe {
7321        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7322        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7323    }
7324}
7325
7326/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7327///
7328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7329#[inline]
7330#[target_feature(enable = "avx512bw,avx512vl")]
7331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7332#[cfg_attr(test, assert_instr(vpsraw))]
7333pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7334    unsafe {
7335        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7336        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7337    }
7338}
7339
7340/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7341///
7342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7343#[inline]
7344#[target_feature(enable = "avx512bw,avx512vl")]
7345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7346#[cfg_attr(test, assert_instr(vpsraw))]
7347pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7348    unsafe {
7349        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7350        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7351    }
7352}
7353
7354/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7355///
7356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7357#[inline]
7358#[target_feature(enable = "avx512bw,avx512vl")]
7359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7360#[cfg_attr(test, assert_instr(vpsraw))]
7361pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7362    unsafe {
7363        let shf = _mm_sra_epi16(a, count).as_i16x8();
7364        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7365    }
7366}
7367
7368/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7369///
7370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7371#[inline]
7372#[target_feature(enable = "avx512bw,avx512vl")]
7373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7374#[cfg_attr(test, assert_instr(vpsraw))]
7375pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7376    unsafe {
7377        let shf = _mm_sra_epi16(a, count).as_i16x8();
7378        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7379    }
7380}
7381
7382/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7383///
7384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
7385#[inline]
7386#[target_feature(enable = "avx512bw")]
7387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7388#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7389#[rustc_legacy_const_generics(1)]
7390pub fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7391    unsafe {
7392        static_assert_uimm_bits!(IMM8, 8);
7393        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
7394    }
7395}
7396
7397/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7398///
7399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
7400#[inline]
7401#[target_feature(enable = "avx512bw")]
7402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7403#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7404#[rustc_legacy_const_generics(3)]
7405pub fn _mm512_mask_srai_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7406    unsafe {
7407        static_assert_uimm_bits!(IMM8, 8);
7408        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7409        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7410    }
7411}
7412
7413/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7414///
7415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
7416#[inline]
7417#[target_feature(enable = "avx512bw")]
7418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7419#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7420#[rustc_legacy_const_generics(2)]
7421pub fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7422    unsafe {
7423        static_assert_uimm_bits!(IMM8, 8);
7424        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7425        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7426    }
7427}
7428
7429/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7430///
7431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
7432#[inline]
7433#[target_feature(enable = "avx512bw,avx512vl")]
7434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7435#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7436#[rustc_legacy_const_generics(3)]
7437pub fn _mm256_mask_srai_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7438    unsafe {
7439        static_assert_uimm_bits!(IMM8, 8);
7440        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7441        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7442    }
7443}
7444
7445/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7446///
7447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
7448#[inline]
7449#[target_feature(enable = "avx512bw,avx512vl")]
7450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7451#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7452#[rustc_legacy_const_generics(2)]
7453pub fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7454    unsafe {
7455        static_assert_uimm_bits!(IMM8, 8);
7456        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7457        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
7458    }
7459}
7460
7461/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7462///
7463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
7464#[inline]
7465#[target_feature(enable = "avx512bw,avx512vl")]
7466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7467#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7468#[rustc_legacy_const_generics(3)]
7469pub fn _mm_mask_srai_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7470    unsafe {
7471        static_assert_uimm_bits!(IMM8, 8);
7472        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7473        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7474    }
7475}
7476
7477/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7478///
7479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
7480#[inline]
7481#[target_feature(enable = "avx512bw,avx512vl")]
7482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7483#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7484#[rustc_legacy_const_generics(2)]
7485pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7486    unsafe {
7487        static_assert_uimm_bits!(IMM8, 8);
7488        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7489        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
7490    }
7491}
7492
7493/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7494///
7495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
7496#[inline]
7497#[target_feature(enable = "avx512bw")]
7498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7499#[cfg_attr(test, assert_instr(vpsravw))]
7500pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
7501    unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
7502}
7503
7504/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7505///
7506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
7507#[inline]
7508#[target_feature(enable = "avx512bw")]
7509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7510#[cfg_attr(test, assert_instr(vpsravw))]
7511pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7512    unsafe {
7513        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7514        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7515    }
7516}
7517
7518/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7519///
7520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
7521#[inline]
7522#[target_feature(enable = "avx512bw")]
7523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7524#[cfg_attr(test, assert_instr(vpsravw))]
7525pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7526    unsafe {
7527        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7528        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7529    }
7530}
7531
7532/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7533///
7534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
7535#[inline]
7536#[target_feature(enable = "avx512bw,avx512vl")]
7537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7538#[cfg_attr(test, assert_instr(vpsravw))]
7539pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
7540    unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
7541}
7542
7543/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7544///
7545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
7546#[inline]
7547#[target_feature(enable = "avx512bw,avx512vl")]
7548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7549#[cfg_attr(test, assert_instr(vpsravw))]
7550pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7551    unsafe {
7552        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7553        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7554    }
7555}
7556
7557/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7558///
7559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
7560#[inline]
7561#[target_feature(enable = "avx512bw,avx512vl")]
7562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7563#[cfg_attr(test, assert_instr(vpsravw))]
7564pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7565    unsafe {
7566        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7567        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7568    }
7569}
7570
7571/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7572///
7573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
7574#[inline]
7575#[target_feature(enable = "avx512bw,avx512vl")]
7576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7577#[cfg_attr(test, assert_instr(vpsravw))]
7578pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
7579    unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
7580}
7581
7582/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7583///
7584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
7585#[inline]
7586#[target_feature(enable = "avx512bw,avx512vl")]
7587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7588#[cfg_attr(test, assert_instr(vpsravw))]
7589pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7590    unsafe {
7591        let shf = _mm_srav_epi16(a, count).as_i16x8();
7592        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7593    }
7594}
7595
7596/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7597///
7598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
7599#[inline]
7600#[target_feature(enable = "avx512bw,avx512vl")]
7601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7602#[cfg_attr(test, assert_instr(vpsravw))]
7603pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7604    unsafe {
7605        let shf = _mm_srav_epi16(a, count).as_i16x8();
7606        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7607    }
7608}
7609
7610/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7611///
7612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
7613#[inline]
7614#[target_feature(enable = "avx512bw")]
7615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7616#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7617pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
7618    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
7619}
7620
7621/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7622///
7623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
7624#[inline]
7625#[target_feature(enable = "avx512bw")]
7626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7627#[cfg_attr(test, assert_instr(vpermt2w))]
7628pub fn _mm512_mask_permutex2var_epi16(
7629    a: __m512i,
7630    k: __mmask32,
7631    idx: __m512i,
7632    b: __m512i,
7633) -> __m512i {
7634    unsafe {
7635        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7636        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
7637    }
7638}
7639
7640/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7641///
7642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
7643#[inline]
7644#[target_feature(enable = "avx512bw")]
7645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7646#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7647pub fn _mm512_maskz_permutex2var_epi16(
7648    k: __mmask32,
7649    a: __m512i,
7650    idx: __m512i,
7651    b: __m512i,
7652) -> __m512i {
7653    unsafe {
7654        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7655        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7656    }
7657}
7658
7659/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7660///
7661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
7662#[inline]
7663#[target_feature(enable = "avx512bw")]
7664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7665#[cfg_attr(test, assert_instr(vpermi2w))]
7666pub fn _mm512_mask2_permutex2var_epi16(
7667    a: __m512i,
7668    idx: __m512i,
7669    k: __mmask32,
7670    b: __m512i,
7671) -> __m512i {
7672    unsafe {
7673        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7674        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
7675    }
7676}
7677
7678/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7679///
7680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
7681#[inline]
7682#[target_feature(enable = "avx512bw,avx512vl")]
7683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7684#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7685pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
7686    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
7687}
7688
7689/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7690///
7691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
7692#[inline]
7693#[target_feature(enable = "avx512bw,avx512vl")]
7694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7695#[cfg_attr(test, assert_instr(vpermt2w))]
7696pub fn _mm256_mask_permutex2var_epi16(
7697    a: __m256i,
7698    k: __mmask16,
7699    idx: __m256i,
7700    b: __m256i,
7701) -> __m256i {
7702    unsafe {
7703        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7704        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
7705    }
7706}
7707
7708/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7709///
7710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
7711#[inline]
7712#[target_feature(enable = "avx512bw,avx512vl")]
7713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7714#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7715pub fn _mm256_maskz_permutex2var_epi16(
7716    k: __mmask16,
7717    a: __m256i,
7718    idx: __m256i,
7719    b: __m256i,
7720) -> __m256i {
7721    unsafe {
7722        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7723        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7724    }
7725}
7726
7727/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7728///
7729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
7730#[inline]
7731#[target_feature(enable = "avx512bw,avx512vl")]
7732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7733#[cfg_attr(test, assert_instr(vpermi2w))]
7734pub fn _mm256_mask2_permutex2var_epi16(
7735    a: __m256i,
7736    idx: __m256i,
7737    k: __mmask16,
7738    b: __m256i,
7739) -> __m256i {
7740    unsafe {
7741        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7742        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
7743    }
7744}
7745
7746/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7747///
7748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
7749#[inline]
7750#[target_feature(enable = "avx512bw,avx512vl")]
7751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7752#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7753pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7754    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
7755}
7756
7757/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7758///
7759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
7760#[inline]
7761#[target_feature(enable = "avx512bw,avx512vl")]
7762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7763#[cfg_attr(test, assert_instr(vpermt2w))]
7764pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
7765    unsafe {
7766        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7767        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
7768    }
7769}
7770
7771/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7772///
7773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
7774#[inline]
7775#[target_feature(enable = "avx512bw,avx512vl")]
7776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7777#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7778pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7779    unsafe {
7780        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7781        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7782    }
7783}
7784
7785/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7786///
7787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
7788#[inline]
7789#[target_feature(enable = "avx512bw,avx512vl")]
7790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7791#[cfg_attr(test, assert_instr(vpermi2w))]
7792pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
7793    unsafe {
7794        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7795        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
7796    }
7797}
7798
7799/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7800///
7801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
7802#[inline]
7803#[target_feature(enable = "avx512bw")]
7804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7805#[cfg_attr(test, assert_instr(vpermw))]
7806pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
7807    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
7808}
7809
7810/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7811///
7812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
7813#[inline]
7814#[target_feature(enable = "avx512bw")]
7815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7816#[cfg_attr(test, assert_instr(vpermw))]
7817pub fn _mm512_mask_permutexvar_epi16(
7818    src: __m512i,
7819    k: __mmask32,
7820    idx: __m512i,
7821    a: __m512i,
7822) -> __m512i {
7823    unsafe {
7824        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7825        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
7826    }
7827}
7828
7829/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7830///
7831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
7832#[inline]
7833#[target_feature(enable = "avx512bw")]
7834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7835#[cfg_attr(test, assert_instr(vpermw))]
7836pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
7837    unsafe {
7838        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7839        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7840    }
7841}
7842
7843/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7844///
7845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
7846#[inline]
7847#[target_feature(enable = "avx512bw,avx512vl")]
7848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7849#[cfg_attr(test, assert_instr(vpermw))]
7850pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
7851    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
7852}
7853
7854/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7855///
7856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
7857#[inline]
7858#[target_feature(enable = "avx512bw,avx512vl")]
7859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7860#[cfg_attr(test, assert_instr(vpermw))]
7861pub fn _mm256_mask_permutexvar_epi16(
7862    src: __m256i,
7863    k: __mmask16,
7864    idx: __m256i,
7865    a: __m256i,
7866) -> __m256i {
7867    unsafe {
7868        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7869        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
7870    }
7871}
7872
7873/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7874///
7875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
7876#[inline]
7877#[target_feature(enable = "avx512bw,avx512vl")]
7878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7879#[cfg_attr(test, assert_instr(vpermw))]
7880pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
7881    unsafe {
7882        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7883        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7884    }
7885}
7886
7887/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7888///
7889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
7890#[inline]
7891#[target_feature(enable = "avx512bw,avx512vl")]
7892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7893#[cfg_attr(test, assert_instr(vpermw))]
7894pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
7895    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
7896}
7897
7898/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7899///
7900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
7901#[inline]
7902#[target_feature(enable = "avx512bw,avx512vl")]
7903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7904#[cfg_attr(test, assert_instr(vpermw))]
7905pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7906    unsafe {
7907        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7908        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
7909    }
7910}
7911
7912/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7913///
7914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
7915#[inline]
7916#[target_feature(enable = "avx512bw,avx512vl")]
7917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7918#[cfg_attr(test, assert_instr(vpermw))]
7919pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7920    unsafe {
7921        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7922        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7923    }
7924}
7925
7926/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7927///
7928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
7929#[inline]
7930#[target_feature(enable = "avx512bw")]
7931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7932#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7933pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
7934    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
7935}
7936
7937/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7938///
7939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
7940#[inline]
7941#[target_feature(enable = "avx512bw,avx512vl")]
7942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7943#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7944pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7945    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
7946}
7947
7948/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7949///
7950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
7951#[inline]
7952#[target_feature(enable = "avx512bw,avx512vl")]
7953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7954#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7955pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7956    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
7957}
7958
7959/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7960///
7961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
7962#[inline]
7963#[target_feature(enable = "avx512bw")]
7964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7965#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7966pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7967    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
7968}
7969
7970/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7971///
7972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
7973#[inline]
7974#[target_feature(enable = "avx512bw,avx512vl")]
7975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7976#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7977pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7978    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
7979}
7980
7981/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7982///
7983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
7984#[inline]
7985#[target_feature(enable = "avx512bw,avx512vl")]
7986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7987#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7988pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7989    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
7990}
7991
7992/// Broadcast the low packed 16-bit integer from a to all elements of dst.
7993///
7994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
7995#[inline]
7996#[target_feature(enable = "avx512bw")]
7997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7998#[cfg_attr(test, assert_instr(vpbroadcastw))]
7999pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8000    unsafe {
8001        let a = _mm512_castsi128_si512(a).as_i16x32();
8002        let ret: i16x32 = simd_shuffle!(
8003            a,
8004            a,
8005            [
8006                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8007                0, 0, 0, 0,
8008            ],
8009        );
8010        transmute(ret)
8011    }
8012}
8013
8014/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8015///
8016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8017#[inline]
8018#[target_feature(enable = "avx512bw")]
8019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8020#[cfg_attr(test, assert_instr(vpbroadcastw))]
8021pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8022    unsafe {
8023        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8024        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8025    }
8026}
8027
8028/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8029///
8030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8031#[inline]
8032#[target_feature(enable = "avx512bw")]
8033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8034#[cfg_attr(test, assert_instr(vpbroadcastw))]
8035pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8036    unsafe {
8037        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8038        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8039    }
8040}
8041
8042/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8045#[inline]
8046#[target_feature(enable = "avx512bw,avx512vl")]
8047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8048#[cfg_attr(test, assert_instr(vpbroadcastw))]
8049pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8050    unsafe {
8051        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8052        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8053    }
8054}
8055
8056/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8057///
8058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8059#[inline]
8060#[target_feature(enable = "avx512bw,avx512vl")]
8061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8062#[cfg_attr(test, assert_instr(vpbroadcastw))]
8063pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8064    unsafe {
8065        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8066        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8067    }
8068}
8069
8070/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8071///
8072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8073#[inline]
8074#[target_feature(enable = "avx512bw,avx512vl")]
8075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8076#[cfg_attr(test, assert_instr(vpbroadcastw))]
8077pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8078    unsafe {
8079        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8080        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8081    }
8082}
8083
8084/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8085///
8086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8087#[inline]
8088#[target_feature(enable = "avx512bw,avx512vl")]
8089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8090#[cfg_attr(test, assert_instr(vpbroadcastw))]
8091pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8092    unsafe {
8093        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8094        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8095    }
8096}
8097
8098/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8099///
8100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8101#[inline]
8102#[target_feature(enable = "avx512bw")]
8103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8104#[cfg_attr(test, assert_instr(vpbroadcastb))]
8105pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8106    unsafe {
8107        let a = _mm512_castsi128_si512(a).as_i8x64();
8108        let ret: i8x64 = simd_shuffle!(
8109            a,
8110            a,
8111            [
8112                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8113                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8114                0, 0, 0, 0, 0, 0, 0, 0,
8115            ],
8116        );
8117        transmute(ret)
8118    }
8119}
8120
8121/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8122///
8123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8124#[inline]
8125#[target_feature(enable = "avx512bw")]
8126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8127#[cfg_attr(test, assert_instr(vpbroadcastb))]
8128pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8129    unsafe {
8130        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8131        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8132    }
8133}
8134
8135/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8136///
8137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8138#[inline]
8139#[target_feature(enable = "avx512bw")]
8140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8141#[cfg_attr(test, assert_instr(vpbroadcastb))]
8142pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8143    unsafe {
8144        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8145        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8146    }
8147}
8148
8149/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8150///
8151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8152#[inline]
8153#[target_feature(enable = "avx512bw,avx512vl")]
8154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8155#[cfg_attr(test, assert_instr(vpbroadcastb))]
8156pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8157    unsafe {
8158        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8159        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8160    }
8161}
8162
8163/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8164///
8165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8166#[inline]
8167#[target_feature(enable = "avx512bw,avx512vl")]
8168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8169#[cfg_attr(test, assert_instr(vpbroadcastb))]
8170pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8171    unsafe {
8172        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8173        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8174    }
8175}
8176
8177/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8178///
8179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8180#[inline]
8181#[target_feature(enable = "avx512bw,avx512vl")]
8182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8183#[cfg_attr(test, assert_instr(vpbroadcastb))]
8184pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8185    unsafe {
8186        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8187        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8188    }
8189}
8190
8191/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8192///
8193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8194#[inline]
8195#[target_feature(enable = "avx512bw,avx512vl")]
8196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8197#[cfg_attr(test, assert_instr(vpbroadcastb))]
8198pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8199    unsafe {
8200        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8201        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8202    }
8203}
8204
8205/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8206///
8207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8208#[inline]
8209#[target_feature(enable = "avx512bw")]
8210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8211#[cfg_attr(test, assert_instr(vpunpckhwd))]
8212pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8213    unsafe {
8214        let a = a.as_i16x32();
8215        let b = b.as_i16x32();
8216        #[rustfmt::skip]
8217        let r: i16x32 = simd_shuffle!(
8218            a,
8219            b,
8220            [
8221                4, 32 + 4, 5, 32 + 5,
8222                6, 32 + 6, 7, 32 + 7,
8223                12, 32 + 12, 13, 32 + 13,
8224                14, 32 + 14, 15, 32 + 15,
8225                20, 32 + 20, 21, 32 + 21,
8226                22, 32 + 22, 23, 32 + 23,
8227                28, 32 + 28, 29, 32 + 29,
8228                30, 32 + 30, 31, 32 + 31,
8229            ],
8230        );
8231        transmute(r)
8232    }
8233}
8234
8235/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8236///
8237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8238#[inline]
8239#[target_feature(enable = "avx512bw")]
8240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8241#[cfg_attr(test, assert_instr(vpunpckhwd))]
8242pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8243    unsafe {
8244        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8245        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8246    }
8247}
8248
8249/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8250///
8251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8252#[inline]
8253#[target_feature(enable = "avx512bw")]
8254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8255#[cfg_attr(test, assert_instr(vpunpckhwd))]
8256pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8257    unsafe {
8258        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8259        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8260    }
8261}
8262
8263/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8264///
8265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8266#[inline]
8267#[target_feature(enable = "avx512bw,avx512vl")]
8268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8269#[cfg_attr(test, assert_instr(vpunpckhwd))]
8270pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8271    unsafe {
8272        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8273        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8274    }
8275}
8276
8277/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8278///
8279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8280#[inline]
8281#[target_feature(enable = "avx512bw,avx512vl")]
8282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8283#[cfg_attr(test, assert_instr(vpunpckhwd))]
8284pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8285    unsafe {
8286        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8287        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8288    }
8289}
8290
8291/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8292///
8293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
8294#[inline]
8295#[target_feature(enable = "avx512bw,avx512vl")]
8296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8297#[cfg_attr(test, assert_instr(vpunpckhwd))]
8298pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8299    unsafe {
8300        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8301        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
8302    }
8303}
8304
8305/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8306///
8307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
8308#[inline]
8309#[target_feature(enable = "avx512bw,avx512vl")]
8310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8311#[cfg_attr(test, assert_instr(vpunpckhwd))]
8312pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8313    unsafe {
8314        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8315        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
8316    }
8317}
8318
8319/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8320///
8321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
8322#[inline]
8323#[target_feature(enable = "avx512bw")]
8324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8325#[cfg_attr(test, assert_instr(vpunpckhbw))]
8326pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
8327    unsafe {
8328        let a = a.as_i8x64();
8329        let b = b.as_i8x64();
8330        #[rustfmt::skip]
8331        let r: i8x64 = simd_shuffle!(
8332            a,
8333            b,
8334            [
8335                8, 64 + 8, 9, 64 + 9,
8336                10, 64 + 10, 11, 64 + 11,
8337                12, 64 + 12, 13, 64 + 13,
8338                14, 64 + 14, 15, 64 + 15,
8339                24, 64 + 24, 25, 64 + 25,
8340                26, 64 + 26, 27, 64 + 27,
8341                28, 64 + 28, 29, 64 + 29,
8342                30, 64 + 30, 31, 64 + 31,
8343                40, 64 + 40, 41, 64 + 41,
8344                42, 64 + 42, 43, 64 + 43,
8345                44, 64 + 44, 45, 64 + 45,
8346                46, 64 + 46, 47, 64 + 47,
8347                56, 64 + 56, 57, 64 + 57,
8348                58, 64 + 58, 59, 64 + 59,
8349                60, 64 + 60, 61, 64 + 61,
8350                62, 64 + 62, 63, 64 + 63,
8351            ],
8352        );
8353        transmute(r)
8354    }
8355}
8356
8357/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8358///
8359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
8360#[inline]
8361#[target_feature(enable = "avx512bw")]
8362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8363#[cfg_attr(test, assert_instr(vpunpckhbw))]
8364pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8365    unsafe {
8366        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8367        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
8368    }
8369}
8370
8371/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8372///
8373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
8374#[inline]
8375#[target_feature(enable = "avx512bw")]
8376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8377#[cfg_attr(test, assert_instr(vpunpckhbw))]
8378pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8379    unsafe {
8380        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8381        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
8382    }
8383}
8384
8385/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8386///
8387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
8388#[inline]
8389#[target_feature(enable = "avx512bw,avx512vl")]
8390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8391#[cfg_attr(test, assert_instr(vpunpckhbw))]
8392pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8393    unsafe {
8394        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8395        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
8396    }
8397}
8398
8399/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
8402#[inline]
8403#[target_feature(enable = "avx512bw,avx512vl")]
8404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8405#[cfg_attr(test, assert_instr(vpunpckhbw))]
8406pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8407    unsafe {
8408        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8409        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
8410    }
8411}
8412
8413/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8414///
8415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
8416#[inline]
8417#[target_feature(enable = "avx512bw,avx512vl")]
8418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8419#[cfg_attr(test, assert_instr(vpunpckhbw))]
8420pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8421    unsafe {
8422        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8423        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
8424    }
8425}
8426
8427/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8428///
8429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
8430#[inline]
8431#[target_feature(enable = "avx512bw,avx512vl")]
8432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8433#[cfg_attr(test, assert_instr(vpunpckhbw))]
8434pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8435    unsafe {
8436        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8437        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
8438    }
8439}
8440
8441/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8442///
8443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
8444#[inline]
8445#[target_feature(enable = "avx512bw")]
8446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8447#[cfg_attr(test, assert_instr(vpunpcklwd))]
8448pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
8449    unsafe {
8450        let a = a.as_i16x32();
8451        let b = b.as_i16x32();
8452        #[rustfmt::skip]
8453        let r: i16x32 = simd_shuffle!(
8454            a,
8455            b,
8456            [
8457               0,  32+0,   1, 32+1,
8458               2,  32+2,   3, 32+3,
8459               8,  32+8,   9, 32+9,
8460               10, 32+10, 11, 32+11,
8461               16, 32+16, 17, 32+17,
8462               18, 32+18, 19, 32+19,
8463               24, 32+24, 25, 32+25,
8464               26, 32+26, 27, 32+27
8465            ],
8466        );
8467        transmute(r)
8468    }
8469}
8470
8471/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8472///
8473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
8474#[inline]
8475#[target_feature(enable = "avx512bw")]
8476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8477#[cfg_attr(test, assert_instr(vpunpcklwd))]
8478pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8479    unsafe {
8480        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8481        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
8482    }
8483}
8484
8485/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8486///
8487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
8488#[inline]
8489#[target_feature(enable = "avx512bw")]
8490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8491#[cfg_attr(test, assert_instr(vpunpcklwd))]
8492pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8493    unsafe {
8494        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8495        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
8496    }
8497}
8498
8499/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8500///
8501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
8502#[inline]
8503#[target_feature(enable = "avx512bw,avx512vl")]
8504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8505#[cfg_attr(test, assert_instr(vpunpcklwd))]
8506pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8507    unsafe {
8508        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8509        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
8510    }
8511}
8512
8513/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8514///
8515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
8516#[inline]
8517#[target_feature(enable = "avx512bw,avx512vl")]
8518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8519#[cfg_attr(test, assert_instr(vpunpcklwd))]
8520pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8521    unsafe {
8522        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8523        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
8524    }
8525}
8526
8527/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8528///
8529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
8530#[inline]
8531#[target_feature(enable = "avx512bw,avx512vl")]
8532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8533#[cfg_attr(test, assert_instr(vpunpcklwd))]
8534pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8535    unsafe {
8536        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8537        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
8538    }
8539}
8540
8541/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8542///
8543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
8544#[inline]
8545#[target_feature(enable = "avx512bw,avx512vl")]
8546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8547#[cfg_attr(test, assert_instr(vpunpcklwd))]
8548pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8549    unsafe {
8550        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8551        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
8552    }
8553}
8554
8555/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8556///
8557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
8558#[inline]
8559#[target_feature(enable = "avx512bw")]
8560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8561#[cfg_attr(test, assert_instr(vpunpcklbw))]
8562pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
8563    unsafe {
8564        let a = a.as_i8x64();
8565        let b = b.as_i8x64();
8566        #[rustfmt::skip]
8567        let r: i8x64 = simd_shuffle!(
8568            a,
8569            b,
8570            [
8571                0,  64+0,   1, 64+1,
8572                2,  64+2,   3, 64+3,
8573                4,  64+4,   5, 64+5,
8574                6,  64+6,   7, 64+7,
8575                16, 64+16, 17, 64+17,
8576                18, 64+18, 19, 64+19,
8577                20, 64+20, 21, 64+21,
8578                22, 64+22, 23, 64+23,
8579                32, 64+32, 33, 64+33,
8580                34, 64+34, 35, 64+35,
8581                36, 64+36, 37, 64+37,
8582                38, 64+38, 39, 64+39,
8583                48, 64+48, 49, 64+49,
8584                50, 64+50, 51, 64+51,
8585                52, 64+52, 53, 64+53,
8586                54, 64+54, 55, 64+55,
8587            ],
8588        );
8589        transmute(r)
8590    }
8591}
8592
8593/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8594///
8595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
8596#[inline]
8597#[target_feature(enable = "avx512bw")]
8598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8599#[cfg_attr(test, assert_instr(vpunpcklbw))]
8600pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8601    unsafe {
8602        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8603        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
8604    }
8605}
8606
8607/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8608///
8609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
8610#[inline]
8611#[target_feature(enable = "avx512bw")]
8612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8613#[cfg_attr(test, assert_instr(vpunpcklbw))]
8614pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8615    unsafe {
8616        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8617        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
8618    }
8619}
8620
8621/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8622///
8623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
8624#[inline]
8625#[target_feature(enable = "avx512bw,avx512vl")]
8626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8627#[cfg_attr(test, assert_instr(vpunpcklbw))]
8628pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8629    unsafe {
8630        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8631        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
8632    }
8633}
8634
8635/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8636///
8637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
8638#[inline]
8639#[target_feature(enable = "avx512bw,avx512vl")]
8640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8641#[cfg_attr(test, assert_instr(vpunpcklbw))]
8642pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8643    unsafe {
8644        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8645        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
8646    }
8647}
8648
8649/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8650///
8651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
8652#[inline]
8653#[target_feature(enable = "avx512bw,avx512vl")]
8654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8655#[cfg_attr(test, assert_instr(vpunpcklbw))]
8656pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8657    unsafe {
8658        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8659        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
8660    }
8661}
8662
8663/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8664///
8665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
8666#[inline]
8667#[target_feature(enable = "avx512bw,avx512vl")]
8668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8669#[cfg_attr(test, assert_instr(vpunpcklbw))]
8670pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8671    unsafe {
8672        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8673        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
8674    }
8675}
8676
8677/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8678///
8679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
8680#[inline]
8681#[target_feature(enable = "avx512bw")]
8682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8683#[cfg_attr(test, assert_instr(vmovdqu16))]
8684pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
8685    unsafe {
8686        let mov = a.as_i16x32();
8687        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
8688    }
8689}
8690
8691/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8692///
8693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
8694#[inline]
8695#[target_feature(enable = "avx512bw")]
8696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8697#[cfg_attr(test, assert_instr(vmovdqu16))]
8698pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
8699    unsafe {
8700        let mov = a.as_i16x32();
8701        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
8702    }
8703}
8704
8705/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8706///
8707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
8708#[inline]
8709#[target_feature(enable = "avx512bw,avx512vl")]
8710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8711#[cfg_attr(test, assert_instr(vmovdqu16))]
8712pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
8713    unsafe {
8714        let mov = a.as_i16x16();
8715        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
8716    }
8717}
8718
8719/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8720///
8721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
8722#[inline]
8723#[target_feature(enable = "avx512bw,avx512vl")]
8724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8725#[cfg_attr(test, assert_instr(vmovdqu16))]
8726pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
8727    unsafe {
8728        let mov = a.as_i16x16();
8729        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
8730    }
8731}
8732
8733/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8734///
8735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
8736#[inline]
8737#[target_feature(enable = "avx512bw,avx512vl")]
8738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8739#[cfg_attr(test, assert_instr(vmovdqu16))]
8740pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8741    unsafe {
8742        let mov = a.as_i16x8();
8743        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
8744    }
8745}
8746
8747/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8748///
8749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
8750#[inline]
8751#[target_feature(enable = "avx512bw,avx512vl")]
8752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8753#[cfg_attr(test, assert_instr(vmovdqu16))]
8754pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
8755    unsafe {
8756        let mov = a.as_i16x8();
8757        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
8758    }
8759}
8760
8761/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8762///
8763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
8764#[inline]
8765#[target_feature(enable = "avx512bw")]
8766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8767#[cfg_attr(test, assert_instr(vmovdqu8))]
8768pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
8769    unsafe {
8770        let mov = a.as_i8x64();
8771        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
8772    }
8773}
8774
8775/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8776///
8777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
8778#[inline]
8779#[target_feature(enable = "avx512bw")]
8780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8781#[cfg_attr(test, assert_instr(vmovdqu8))]
8782pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
8783    unsafe {
8784        let mov = a.as_i8x64();
8785        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
8786    }
8787}
8788
8789/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8790///
8791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
8792#[inline]
8793#[target_feature(enable = "avx512bw,avx512vl")]
8794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8795#[cfg_attr(test, assert_instr(vmovdqu8))]
8796pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
8797    unsafe {
8798        let mov = a.as_i8x32();
8799        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
8800    }
8801}
8802
8803/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8804///
8805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
8806#[inline]
8807#[target_feature(enable = "avx512bw,avx512vl")]
8808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8809#[cfg_attr(test, assert_instr(vmovdqu8))]
8810pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
8811    unsafe {
8812        let mov = a.as_i8x32();
8813        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
8814    }
8815}
8816
8817/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8818///
8819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
8820#[inline]
8821#[target_feature(enable = "avx512bw,avx512vl")]
8822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8823#[cfg_attr(test, assert_instr(vmovdqu8))]
8824pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8825    unsafe {
8826        let mov = a.as_i8x16();
8827        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
8828    }
8829}
8830
8831/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8832///
8833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
8834#[inline]
8835#[target_feature(enable = "avx512bw,avx512vl")]
8836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8837#[cfg_attr(test, assert_instr(vmovdqu8))]
8838pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
8839    unsafe {
8840        let mov = a.as_i8x16();
8841        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
8842    }
8843}
8844
8845/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8846///
8847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
8848#[inline]
8849#[target_feature(enable = "avx512bw")]
8850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8851#[cfg_attr(test, assert_instr(vpbroadcastw))]
8852pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
8853    unsafe {
8854        let r = _mm512_set1_epi16(a).as_i16x32();
8855        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
8856    }
8857}
8858
8859/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8860///
8861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
8862#[inline]
8863#[target_feature(enable = "avx512bw")]
8864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8865#[cfg_attr(test, assert_instr(vpbroadcastw))]
8866pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
8867    unsafe {
8868        let r = _mm512_set1_epi16(a).as_i16x32();
8869        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
8870    }
8871}
8872
8873/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8874///
8875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
8876#[inline]
8877#[target_feature(enable = "avx512bw,avx512vl")]
8878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8879#[cfg_attr(test, assert_instr(vpbroadcastw))]
8880pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
8881    unsafe {
8882        let r = _mm256_set1_epi16(a).as_i16x16();
8883        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8884    }
8885}
8886
8887/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8888///
8889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
8890#[inline]
8891#[target_feature(enable = "avx512bw,avx512vl")]
8892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8893#[cfg_attr(test, assert_instr(vpbroadcastw))]
8894pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
8895    unsafe {
8896        let r = _mm256_set1_epi16(a).as_i16x16();
8897        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8898    }
8899}
8900
8901/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8902///
8903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
8904#[inline]
8905#[target_feature(enable = "avx512bw,avx512vl")]
8906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8907#[cfg_attr(test, assert_instr(vpbroadcastw))]
8908pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
8909    unsafe {
8910        let r = _mm_set1_epi16(a).as_i16x8();
8911        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8912    }
8913}
8914
8915/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
8918#[inline]
8919#[target_feature(enable = "avx512bw,avx512vl")]
8920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8921#[cfg_attr(test, assert_instr(vpbroadcastw))]
8922pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
8923    unsafe {
8924        let r = _mm_set1_epi16(a).as_i16x8();
8925        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8926    }
8927}
8928
8929/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8930///
8931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
8932#[inline]
8933#[target_feature(enable = "avx512bw")]
8934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8935#[cfg_attr(test, assert_instr(vpbroadcast))]
8936pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
8937    unsafe {
8938        let r = _mm512_set1_epi8(a).as_i8x64();
8939        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
8940    }
8941}
8942
8943/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8944///
8945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
8946#[inline]
8947#[target_feature(enable = "avx512bw")]
8948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8949#[cfg_attr(test, assert_instr(vpbroadcast))]
8950pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
8951    unsafe {
8952        let r = _mm512_set1_epi8(a).as_i8x64();
8953        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
8954    }
8955}
8956
8957/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8958///
8959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
8960#[inline]
8961#[target_feature(enable = "avx512bw,avx512vl")]
8962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8963#[cfg_attr(test, assert_instr(vpbroadcast))]
8964pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
8965    unsafe {
8966        let r = _mm256_set1_epi8(a).as_i8x32();
8967        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
8968    }
8969}
8970
8971/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8972///
8973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
8974#[inline]
8975#[target_feature(enable = "avx512bw,avx512vl")]
8976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8977#[cfg_attr(test, assert_instr(vpbroadcast))]
8978pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
8979    unsafe {
8980        let r = _mm256_set1_epi8(a).as_i8x32();
8981        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
8982    }
8983}
8984
8985/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8986///
8987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
8988#[inline]
8989#[target_feature(enable = "avx512bw,avx512vl")]
8990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8991#[cfg_attr(test, assert_instr(vpbroadcast))]
8992pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
8993    unsafe {
8994        let r = _mm_set1_epi8(a).as_i8x16();
8995        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
8996    }
8997}
8998
8999/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9000///
9001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9002#[inline]
9003#[target_feature(enable = "avx512bw,avx512vl")]
9004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9005#[cfg_attr(test, assert_instr(vpbroadcast))]
9006pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9007    unsafe {
9008        let r = _mm_set1_epi8(a).as_i8x16();
9009        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9010    }
9011}
9012
9013/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9014///
9015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9016#[inline]
9017#[target_feature(enable = "avx512bw")]
9018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9019#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9020#[rustc_legacy_const_generics(1)]
9021pub fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9022    unsafe {
9023        static_assert_uimm_bits!(IMM8, 8);
9024        let a = a.as_i16x32();
9025        let r: i16x32 = simd_shuffle!(
9026            a,
9027            a,
9028            [
9029                IMM8 as u32 & 0b11,
9030                (IMM8 as u32 >> 2) & 0b11,
9031                (IMM8 as u32 >> 4) & 0b11,
9032                (IMM8 as u32 >> 6) & 0b11,
9033                4,
9034                5,
9035                6,
9036                7,
9037                (IMM8 as u32 & 0b11) + 8,
9038                ((IMM8 as u32 >> 2) & 0b11) + 8,
9039                ((IMM8 as u32 >> 4) & 0b11) + 8,
9040                ((IMM8 as u32 >> 6) & 0b11) + 8,
9041                12,
9042                13,
9043                14,
9044                15,
9045                (IMM8 as u32 & 0b11) + 16,
9046                ((IMM8 as u32 >> 2) & 0b11) + 16,
9047                ((IMM8 as u32 >> 4) & 0b11) + 16,
9048                ((IMM8 as u32 >> 6) & 0b11) + 16,
9049                20,
9050                21,
9051                22,
9052                23,
9053                (IMM8 as u32 & 0b11) + 24,
9054                ((IMM8 as u32 >> 2) & 0b11) + 24,
9055                ((IMM8 as u32 >> 4) & 0b11) + 24,
9056                ((IMM8 as u32 >> 6) & 0b11) + 24,
9057                28,
9058                29,
9059                30,
9060                31,
9061            ],
9062        );
9063        transmute(r)
9064    }
9065}
9066
9067/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9068///
9069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9070#[inline]
9071#[target_feature(enable = "avx512bw")]
9072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9073#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9074#[rustc_legacy_const_generics(3)]
9075pub fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9076    src: __m512i,
9077    k: __mmask32,
9078    a: __m512i,
9079) -> __m512i {
9080    unsafe {
9081        static_assert_uimm_bits!(IMM8, 8);
9082        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9083        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9084    }
9085}
9086
9087/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9088///
9089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9090#[inline]
9091#[target_feature(enable = "avx512bw")]
9092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9093#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9094#[rustc_legacy_const_generics(2)]
9095pub fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9096    unsafe {
9097        static_assert_uimm_bits!(IMM8, 8);
9098        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9099        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9100    }
9101}
9102
9103/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9106#[inline]
9107#[target_feature(enable = "avx512bw,avx512vl")]
9108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9109#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9112    src: __m256i,
9113    k: __mmask16,
9114    a: __m256i,
9115) -> __m256i {
9116    unsafe {
9117        static_assert_uimm_bits!(IMM8, 8);
9118        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9119        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9120    }
9121}
9122
9123/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9124///
9125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9126#[inline]
9127#[target_feature(enable = "avx512bw,avx512vl")]
9128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9129#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9130#[rustc_legacy_const_generics(2)]
9131pub fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9132    unsafe {
9133        static_assert_uimm_bits!(IMM8, 8);
9134        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9135        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9136    }
9137}
9138
9139/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9140///
9141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9142#[inline]
9143#[target_feature(enable = "avx512bw,avx512vl")]
9144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9145#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9146#[rustc_legacy_const_generics(3)]
9147pub fn _mm_mask_shufflelo_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9148    unsafe {
9149        static_assert_uimm_bits!(IMM8, 8);
9150        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9151        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9152    }
9153}
9154
9155/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9156///
9157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9158#[inline]
9159#[target_feature(enable = "avx512bw,avx512vl")]
9160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9161#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9162#[rustc_legacy_const_generics(2)]
9163pub fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9164    unsafe {
9165        static_assert_uimm_bits!(IMM8, 8);
9166        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9167        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9168    }
9169}
9170
9171/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9172///
9173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9174#[inline]
9175#[target_feature(enable = "avx512bw")]
9176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9177#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9178#[rustc_legacy_const_generics(1)]
9179pub fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9180    unsafe {
9181        static_assert_uimm_bits!(IMM8, 8);
9182        let a = a.as_i16x32();
9183        let r: i16x32 = simd_shuffle!(
9184            a,
9185            a,
9186            [
9187                0,
9188                1,
9189                2,
9190                3,
9191                (IMM8 as u32 & 0b11) + 4,
9192                ((IMM8 as u32 >> 2) & 0b11) + 4,
9193                ((IMM8 as u32 >> 4) & 0b11) + 4,
9194                ((IMM8 as u32 >> 6) & 0b11) + 4,
9195                8,
9196                9,
9197                10,
9198                11,
9199                (IMM8 as u32 & 0b11) + 12,
9200                ((IMM8 as u32 >> 2) & 0b11) + 12,
9201                ((IMM8 as u32 >> 4) & 0b11) + 12,
9202                ((IMM8 as u32 >> 6) & 0b11) + 12,
9203                16,
9204                17,
9205                18,
9206                19,
9207                (IMM8 as u32 & 0b11) + 20,
9208                ((IMM8 as u32 >> 2) & 0b11) + 20,
9209                ((IMM8 as u32 >> 4) & 0b11) + 20,
9210                ((IMM8 as u32 >> 6) & 0b11) + 20,
9211                24,
9212                25,
9213                26,
9214                27,
9215                (IMM8 as u32 & 0b11) + 28,
9216                ((IMM8 as u32 >> 2) & 0b11) + 28,
9217                ((IMM8 as u32 >> 4) & 0b11) + 28,
9218                ((IMM8 as u32 >> 6) & 0b11) + 28,
9219            ],
9220        );
9221        transmute(r)
9222    }
9223}
9224
9225/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9226///
9227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
9228#[inline]
9229#[target_feature(enable = "avx512bw")]
9230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9231#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9232#[rustc_legacy_const_generics(3)]
9233pub fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
9234    src: __m512i,
9235    k: __mmask32,
9236    a: __m512i,
9237) -> __m512i {
9238    unsafe {
9239        static_assert_uimm_bits!(IMM8, 8);
9240        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9241        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9242    }
9243}
9244
9245/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9246///
9247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
9248#[inline]
9249#[target_feature(enable = "avx512bw")]
9250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9251#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9252#[rustc_legacy_const_generics(2)]
9253pub fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9254    unsafe {
9255        static_assert_uimm_bits!(IMM8, 8);
9256        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9257        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9258    }
9259}
9260
9261/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9262///
9263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
9264#[inline]
9265#[target_feature(enable = "avx512bw,avx512vl")]
9266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9267#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9268#[rustc_legacy_const_generics(3)]
9269pub fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
9270    src: __m256i,
9271    k: __mmask16,
9272    a: __m256i,
9273) -> __m256i {
9274    unsafe {
9275        static_assert_uimm_bits!(IMM8, 8);
9276        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9277        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9278    }
9279}
9280
9281/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9282///
9283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
9284#[inline]
9285#[target_feature(enable = "avx512bw,avx512vl")]
9286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9287#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9288#[rustc_legacy_const_generics(2)]
9289pub fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9290    unsafe {
9291        static_assert_uimm_bits!(IMM8, 8);
9292        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9293        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9294    }
9295}
9296
9297/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9298///
9299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
9300#[inline]
9301#[target_feature(enable = "avx512bw,avx512vl")]
9302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9303#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9304#[rustc_legacy_const_generics(3)]
9305pub fn _mm_mask_shufflehi_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9306    unsafe {
9307        static_assert_uimm_bits!(IMM8, 8);
9308        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9309        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9310    }
9311}
9312
9313/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9314///
9315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
9316#[inline]
9317#[target_feature(enable = "avx512bw,avx512vl")]
9318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9319#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9320#[rustc_legacy_const_generics(2)]
9321pub fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9322    unsafe {
9323        static_assert_uimm_bits!(IMM8, 8);
9324        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9325        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9326    }
9327}
9328
9329/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
9330///
9331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
9332#[inline]
9333#[target_feature(enable = "avx512bw")]
9334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9335#[cfg_attr(test, assert_instr(vpshufb))]
9336pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
9337    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
9338}
9339
9340/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9341///
9342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
9343#[inline]
9344#[target_feature(enable = "avx512bw")]
9345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9346#[cfg_attr(test, assert_instr(vpshufb))]
9347pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9348    unsafe {
9349        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9350        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
9351    }
9352}
9353
9354/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9355///
9356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
9357#[inline]
9358#[target_feature(enable = "avx512bw")]
9359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9360#[cfg_attr(test, assert_instr(vpshufb))]
9361pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9362    unsafe {
9363        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9364        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
9365    }
9366}
9367
9368/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9369///
9370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
9371#[inline]
9372#[target_feature(enable = "avx512bw,avx512vl")]
9373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9374#[cfg_attr(test, assert_instr(vpshufb))]
9375pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9376    unsafe {
9377        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9378        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
9379    }
9380}
9381
9382/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9383///
9384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
9385#[inline]
9386#[target_feature(enable = "avx512bw,avx512vl")]
9387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9388#[cfg_attr(test, assert_instr(vpshufb))]
9389pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9390    unsafe {
9391        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9392        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
9393    }
9394}
9395
9396/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9397///
9398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
9399#[inline]
9400#[target_feature(enable = "avx512bw,avx512vl")]
9401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9402#[cfg_attr(test, assert_instr(vpshufb))]
9403pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9404    unsafe {
9405        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9406        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
9407    }
9408}
9409
9410/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9411///
9412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
9413#[inline]
9414#[target_feature(enable = "avx512bw,avx512vl")]
9415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9416#[cfg_attr(test, assert_instr(vpshufb))]
9417pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9418    unsafe {
9419        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9420        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
9421    }
9422}
9423
9424/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9425///
9426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
9427#[inline]
9428#[target_feature(enable = "avx512bw")]
9429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9430#[cfg_attr(test, assert_instr(vptestmw))]
9431pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9432    let and = _mm512_and_si512(a, b);
9433    let zero = _mm512_setzero_si512();
9434    _mm512_cmpneq_epi16_mask(and, zero)
9435}
9436
9437/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9438///
9439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
9440#[inline]
9441#[target_feature(enable = "avx512bw")]
9442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9443#[cfg_attr(test, assert_instr(vptestmw))]
9444pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9445    let and = _mm512_and_si512(a, b);
9446    let zero = _mm512_setzero_si512();
9447    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
9448}
9449
9450/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9451///
9452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
9453#[inline]
9454#[target_feature(enable = "avx512bw,avx512vl")]
9455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9456#[cfg_attr(test, assert_instr(vptestmw))]
9457pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9458    let and = _mm256_and_si256(a, b);
9459    let zero = _mm256_setzero_si256();
9460    _mm256_cmpneq_epi16_mask(and, zero)
9461}
9462
9463/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9464///
9465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
9466#[inline]
9467#[target_feature(enable = "avx512bw,avx512vl")]
9468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9469#[cfg_attr(test, assert_instr(vptestmw))]
9470pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9471    let and = _mm256_and_si256(a, b);
9472    let zero = _mm256_setzero_si256();
9473    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
9474}
9475
9476/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9477///
9478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
9479#[inline]
9480#[target_feature(enable = "avx512bw,avx512vl")]
9481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9482#[cfg_attr(test, assert_instr(vptestmw))]
9483pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9484    let and = _mm_and_si128(a, b);
9485    let zero = _mm_setzero_si128();
9486    _mm_cmpneq_epi16_mask(and, zero)
9487}
9488
9489/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9490///
9491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
9492#[inline]
9493#[target_feature(enable = "avx512bw,avx512vl")]
9494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9495#[cfg_attr(test, assert_instr(vptestmw))]
9496pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9497    let and = _mm_and_si128(a, b);
9498    let zero = _mm_setzero_si128();
9499    _mm_mask_cmpneq_epi16_mask(k, and, zero)
9500}
9501
9502/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9503///
9504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
9505#[inline]
9506#[target_feature(enable = "avx512bw")]
9507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9508#[cfg_attr(test, assert_instr(vptestmb))]
9509pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9510    let and = _mm512_and_si512(a, b);
9511    let zero = _mm512_setzero_si512();
9512    _mm512_cmpneq_epi8_mask(and, zero)
9513}
9514
9515/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9516///
9517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
9518#[inline]
9519#[target_feature(enable = "avx512bw")]
9520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9521#[cfg_attr(test, assert_instr(vptestmb))]
9522pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9523    let and = _mm512_and_si512(a, b);
9524    let zero = _mm512_setzero_si512();
9525    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
9526}
9527
9528/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9529///
9530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
9531#[inline]
9532#[target_feature(enable = "avx512bw,avx512vl")]
9533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9534#[cfg_attr(test, assert_instr(vptestmb))]
9535pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9536    let and = _mm256_and_si256(a, b);
9537    let zero = _mm256_setzero_si256();
9538    _mm256_cmpneq_epi8_mask(and, zero)
9539}
9540
9541/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9542///
9543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
9544#[inline]
9545#[target_feature(enable = "avx512bw,avx512vl")]
9546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9547#[cfg_attr(test, assert_instr(vptestmb))]
9548pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9549    let and = _mm256_and_si256(a, b);
9550    let zero = _mm256_setzero_si256();
9551    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
9552}
9553
9554/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9555///
9556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
9557#[inline]
9558#[target_feature(enable = "avx512bw,avx512vl")]
9559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9560#[cfg_attr(test, assert_instr(vptestmb))]
9561pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9562    let and = _mm_and_si128(a, b);
9563    let zero = _mm_setzero_si128();
9564    _mm_cmpneq_epi8_mask(and, zero)
9565}
9566
9567/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9568///
9569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
9570#[inline]
9571#[target_feature(enable = "avx512bw,avx512vl")]
9572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9573#[cfg_attr(test, assert_instr(vptestmb))]
9574pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9575    let and = _mm_and_si128(a, b);
9576    let zero = _mm_setzero_si128();
9577    _mm_mask_cmpneq_epi8_mask(k, and, zero)
9578}
9579
9580/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9581///
9582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
9583#[inline]
9584#[target_feature(enable = "avx512bw")]
9585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9586#[cfg_attr(test, assert_instr(vptestnmw))]
9587pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9588    let and = _mm512_and_si512(a, b);
9589    let zero = _mm512_setzero_si512();
9590    _mm512_cmpeq_epi16_mask(and, zero)
9591}
9592
9593/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9594///
9595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
9596#[inline]
9597#[target_feature(enable = "avx512bw")]
9598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9599#[cfg_attr(test, assert_instr(vptestnmw))]
9600pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9601    let and = _mm512_and_si512(a, b);
9602    let zero = _mm512_setzero_si512();
9603    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
9604}
9605
9606/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9607///
9608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
9609#[inline]
9610#[target_feature(enable = "avx512bw,avx512vl")]
9611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9612#[cfg_attr(test, assert_instr(vptestnmw))]
9613pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9614    let and = _mm256_and_si256(a, b);
9615    let zero = _mm256_setzero_si256();
9616    _mm256_cmpeq_epi16_mask(and, zero)
9617}
9618
9619/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9620///
9621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
9622#[inline]
9623#[target_feature(enable = "avx512bw,avx512vl")]
9624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9625#[cfg_attr(test, assert_instr(vptestnmw))]
9626pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9627    let and = _mm256_and_si256(a, b);
9628    let zero = _mm256_setzero_si256();
9629    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
9630}
9631
9632/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9633///
9634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
9635#[inline]
9636#[target_feature(enable = "avx512bw,avx512vl")]
9637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9638#[cfg_attr(test, assert_instr(vptestnmw))]
9639pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9640    let and = _mm_and_si128(a, b);
9641    let zero = _mm_setzero_si128();
9642    _mm_cmpeq_epi16_mask(and, zero)
9643}
9644
9645/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9646///
9647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
9648#[inline]
9649#[target_feature(enable = "avx512bw,avx512vl")]
9650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9651#[cfg_attr(test, assert_instr(vptestnmw))]
9652pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9653    let and = _mm_and_si128(a, b);
9654    let zero = _mm_setzero_si128();
9655    _mm_mask_cmpeq_epi16_mask(k, and, zero)
9656}
9657
9658/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9659///
9660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
9661#[inline]
9662#[target_feature(enable = "avx512bw")]
9663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9664#[cfg_attr(test, assert_instr(vptestnmb))]
9665pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9666    let and = _mm512_and_si512(a, b);
9667    let zero = _mm512_setzero_si512();
9668    _mm512_cmpeq_epi8_mask(and, zero)
9669}
9670
9671/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9672///
9673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
9674#[inline]
9675#[target_feature(enable = "avx512bw")]
9676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9677#[cfg_attr(test, assert_instr(vptestnmb))]
9678pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9679    let and = _mm512_and_si512(a, b);
9680    let zero = _mm512_setzero_si512();
9681    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
9682}
9683
9684/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9685///
9686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
9687#[inline]
9688#[target_feature(enable = "avx512bw,avx512vl")]
9689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9690#[cfg_attr(test, assert_instr(vptestnmb))]
9691pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9692    let and = _mm256_and_si256(a, b);
9693    let zero = _mm256_setzero_si256();
9694    _mm256_cmpeq_epi8_mask(and, zero)
9695}
9696
9697/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9698///
9699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
9700#[inline]
9701#[target_feature(enable = "avx512bw,avx512vl")]
9702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9703#[cfg_attr(test, assert_instr(vptestnmb))]
9704pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9705    let and = _mm256_and_si256(a, b);
9706    let zero = _mm256_setzero_si256();
9707    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
9708}
9709
9710/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9711///
9712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
9713#[inline]
9714#[target_feature(enable = "avx512bw,avx512vl")]
9715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9716#[cfg_attr(test, assert_instr(vptestnmb))]
9717pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9718    let and = _mm_and_si128(a, b);
9719    let zero = _mm_setzero_si128();
9720    _mm_cmpeq_epi8_mask(and, zero)
9721}
9722
9723/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9724///
9725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
9726#[inline]
9727#[target_feature(enable = "avx512bw,avx512vl")]
9728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9729#[cfg_attr(test, assert_instr(vptestnmb))]
9730pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9731    let and = _mm_and_si128(a, b);
9732    let zero = _mm_setzero_si128();
9733    _mm_mask_cmpeq_epi8_mask(k, and, zero)
9734}
9735
9736/// Store 64-bit mask from a into memory.
9737///
9738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
9739#[inline]
9740#[target_feature(enable = "avx512bw")]
9741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9742#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9743pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
9744    ptr::write(mem_addr as *mut __mmask64, a);
9745}
9746
9747/// Store 32-bit mask from a into memory.
9748///
9749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
9750#[inline]
9751#[target_feature(enable = "avx512bw")]
9752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9753#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9754pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
9755    ptr::write(mem_addr as *mut __mmask32, a);
9756}
9757
9758/// Load 64-bit mask from memory into k.
9759///
9760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
9761#[inline]
9762#[target_feature(enable = "avx512bw")]
9763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9764#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9765pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
9766    ptr::read(mem_addr as *const __mmask64)
9767}
9768
9769/// Load 32-bit mask from memory into k.
9770///
9771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
9772#[inline]
9773#[target_feature(enable = "avx512bw")]
9774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9775#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9776pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
9777    ptr::read(mem_addr as *const __mmask32)
9778}
9779
9780/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
9783#[inline]
9784#[target_feature(enable = "avx512bw")]
9785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9786#[cfg_attr(test, assert_instr(vpsadbw))]
9787pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
9788    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
9789}
9790
9791/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9792///
9793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
9794#[inline]
9795#[target_feature(enable = "avx512bw")]
9796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9797#[rustc_legacy_const_generics(2)]
9798#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9799pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
9800    unsafe {
9801        static_assert_uimm_bits!(IMM8, 8);
9802        let a = a.as_u8x64();
9803        let b = b.as_u8x64();
9804        let r = vdbpsadbw(a, b, IMM8);
9805        transmute(r)
9806    }
9807}
9808
9809/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9810///
9811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
9812#[inline]
9813#[target_feature(enable = "avx512bw")]
9814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9815#[rustc_legacy_const_generics(4)]
9816#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9817pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
9818    src: __m512i,
9819    k: __mmask32,
9820    a: __m512i,
9821    b: __m512i,
9822) -> __m512i {
9823    unsafe {
9824        static_assert_uimm_bits!(IMM8, 8);
9825        let a = a.as_u8x64();
9826        let b = b.as_u8x64();
9827        let r = vdbpsadbw(a, b, IMM8);
9828        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
9829    }
9830}
9831
9832/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9833///
9834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
9835#[inline]
9836#[target_feature(enable = "avx512bw")]
9837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9838#[rustc_legacy_const_generics(3)]
9839#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9840pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9841    unsafe {
9842        static_assert_uimm_bits!(IMM8, 8);
9843        let a = a.as_u8x64();
9844        let b = b.as_u8x64();
9845        let r = vdbpsadbw(a, b, IMM8);
9846        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
9847    }
9848}
9849
9850/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9851///
9852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
9853#[inline]
9854#[target_feature(enable = "avx512bw,avx512vl")]
9855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9856#[rustc_legacy_const_generics(2)]
9857#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9858pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
9859    unsafe {
9860        static_assert_uimm_bits!(IMM8, 8);
9861        let a = a.as_u8x32();
9862        let b = b.as_u8x32();
9863        let r = vdbpsadbw256(a, b, IMM8);
9864        transmute(r)
9865    }
9866}
9867
9868/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9869///
9870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
9871#[inline]
9872#[target_feature(enable = "avx512bw,avx512vl")]
9873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9874#[rustc_legacy_const_generics(4)]
9875#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9876pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
9877    src: __m256i,
9878    k: __mmask16,
9879    a: __m256i,
9880    b: __m256i,
9881) -> __m256i {
9882    unsafe {
9883        static_assert_uimm_bits!(IMM8, 8);
9884        let a = a.as_u8x32();
9885        let b = b.as_u8x32();
9886        let r = vdbpsadbw256(a, b, IMM8);
9887        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
9888    }
9889}
9890
9891/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9892///
9893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
9894#[inline]
9895#[target_feature(enable = "avx512bw,avx512vl")]
9896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9897#[rustc_legacy_const_generics(3)]
9898#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9899pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9900    unsafe {
9901        static_assert_uimm_bits!(IMM8, 8);
9902        let a = a.as_u8x32();
9903        let b = b.as_u8x32();
9904        let r = vdbpsadbw256(a, b, IMM8);
9905        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
9906    }
9907}
9908
9909/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9910///
9911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
9912#[inline]
9913#[target_feature(enable = "avx512bw,avx512vl")]
9914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9915#[rustc_legacy_const_generics(2)]
9916#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9917pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
9918    unsafe {
9919        static_assert_uimm_bits!(IMM8, 8);
9920        let a = a.as_u8x16();
9921        let b = b.as_u8x16();
9922        let r = vdbpsadbw128(a, b, IMM8);
9923        transmute(r)
9924    }
9925}
9926
9927/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9928///
9929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
9930#[inline]
9931#[target_feature(enable = "avx512bw,avx512vl")]
9932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9933#[rustc_legacy_const_generics(4)]
9934#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9935pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
9936    src: __m128i,
9937    k: __mmask8,
9938    a: __m128i,
9939    b: __m128i,
9940) -> __m128i {
9941    unsafe {
9942        static_assert_uimm_bits!(IMM8, 8);
9943        let a = a.as_u8x16();
9944        let b = b.as_u8x16();
9945        let r = vdbpsadbw128(a, b, IMM8);
9946        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
9947    }
9948}
9949
9950/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9951///
9952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
9953#[inline]
9954#[target_feature(enable = "avx512bw,avx512vl")]
9955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9956#[rustc_legacy_const_generics(3)]
9957#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9958pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9959    unsafe {
9960        static_assert_uimm_bits!(IMM8, 8);
9961        let a = a.as_u8x16();
9962        let b = b.as_u8x16();
9963        let r = vdbpsadbw128(a, b, IMM8);
9964        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
9965    }
9966}
9967
9968/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9969///
9970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
9971#[inline]
9972#[target_feature(enable = "avx512bw")]
9973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9974#[cfg_attr(test, assert_instr(vpmovw2m))]
9975pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
9976    let filter = _mm512_set1_epi16(1 << 15);
9977    let a = _mm512_and_si512(a, filter);
9978    _mm512_cmpeq_epi16_mask(a, filter)
9979}
9980
9981/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9982///
9983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
9984#[inline]
9985#[target_feature(enable = "avx512bw,avx512vl")]
9986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9987#[cfg_attr(test, assert_instr(vpmovw2m))]
9988pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
9989    let filter = _mm256_set1_epi16(1 << 15);
9990    let a = _mm256_and_si256(a, filter);
9991    _mm256_cmpeq_epi16_mask(a, filter)
9992}
9993
9994/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9995///
9996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
9997#[inline]
9998#[target_feature(enable = "avx512bw,avx512vl")]
9999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10000#[cfg_attr(test, assert_instr(vpmovw2m))]
10001pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10002    let filter = _mm_set1_epi16(1 << 15);
10003    let a = _mm_and_si128(a, filter);
10004    _mm_cmpeq_epi16_mask(a, filter)
10005}
10006
10007/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10008///
10009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10010#[inline]
10011#[target_feature(enable = "avx512bw")]
10012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10013#[cfg_attr(test, assert_instr(vpmovb2m))]
10014pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10015    let filter = _mm512_set1_epi8(1 << 7);
10016    let a = _mm512_and_si512(a, filter);
10017    _mm512_cmpeq_epi8_mask(a, filter)
10018}
10019
10020/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10021///
10022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10023#[inline]
10024#[target_feature(enable = "avx512bw,avx512vl")]
10025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10026#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10027// using vpmovb2m plus converting the mask register to a standard register.
10028pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10029    let filter = _mm256_set1_epi8(1 << 7);
10030    let a = _mm256_and_si256(a, filter);
10031    _mm256_cmpeq_epi8_mask(a, filter)
10032}
10033
10034/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10035///
10036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10037#[inline]
10038#[target_feature(enable = "avx512bw,avx512vl")]
10039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10040#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10041// using vpmovb2m plus converting the mask register to a standard register.
10042pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10043    let filter = _mm_set1_epi8(1 << 7);
10044    let a = _mm_and_si128(a, filter);
10045    _mm_cmpeq_epi8_mask(a, filter)
10046}
10047
10048/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10049///
10050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10051#[inline]
10052#[target_feature(enable = "avx512bw")]
10053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10054#[cfg_attr(test, assert_instr(vpmovm2w))]
10055pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10056    unsafe {
10057        let one = _mm512_set1_epi16(
10058            1 << 15
10059                | 1 << 14
10060                | 1 << 13
10061                | 1 << 12
10062                | 1 << 11
10063                | 1 << 10
10064                | 1 << 9
10065                | 1 << 8
10066                | 1 << 7
10067                | 1 << 6
10068                | 1 << 5
10069                | 1 << 4
10070                | 1 << 3
10071                | 1 << 2
10072                | 1 << 1
10073                | 1 << 0,
10074        )
10075        .as_i16x32();
10076        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10077    }
10078}
10079
10080/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10083#[inline]
10084#[target_feature(enable = "avx512bw,avx512vl")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vpmovm2w))]
10087pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10088    unsafe {
10089        let one = _mm256_set1_epi16(
10090            1 << 15
10091                | 1 << 14
10092                | 1 << 13
10093                | 1 << 12
10094                | 1 << 11
10095                | 1 << 10
10096                | 1 << 9
10097                | 1 << 8
10098                | 1 << 7
10099                | 1 << 6
10100                | 1 << 5
10101                | 1 << 4
10102                | 1 << 3
10103                | 1 << 2
10104                | 1 << 1
10105                | 1 << 0,
10106        )
10107        .as_i16x16();
10108        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10109    }
10110}
10111
10112/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10113///
10114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10115#[inline]
10116#[target_feature(enable = "avx512bw,avx512vl")]
10117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10118#[cfg_attr(test, assert_instr(vpmovm2w))]
10119pub fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10120    unsafe {
10121        let one = _mm_set1_epi16(
10122            1 << 15
10123                | 1 << 14
10124                | 1 << 13
10125                | 1 << 12
10126                | 1 << 11
10127                | 1 << 10
10128                | 1 << 9
10129                | 1 << 8
10130                | 1 << 7
10131                | 1 << 6
10132                | 1 << 5
10133                | 1 << 4
10134                | 1 << 3
10135                | 1 << 2
10136                | 1 << 1
10137                | 1 << 0,
10138        )
10139        .as_i16x8();
10140        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10141    }
10142}
10143
10144/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10145///
10146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10147#[inline]
10148#[target_feature(enable = "avx512bw")]
10149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10150#[cfg_attr(test, assert_instr(vpmovm2b))]
10151pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10152    unsafe {
10153        let one =
10154            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10155                .as_i8x64();
10156        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
10157    }
10158}
10159
10160/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10161///
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
10163#[inline]
10164#[target_feature(enable = "avx512bw,avx512vl")]
10165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10166#[cfg_attr(test, assert_instr(vpmovm2b))]
10167pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
10168    unsafe {
10169        let one =
10170            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10171                .as_i8x32();
10172        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
10173    }
10174}
10175
10176/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10177///
10178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
10179#[inline]
10180#[target_feature(enable = "avx512bw,avx512vl")]
10181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10182#[cfg_attr(test, assert_instr(vpmovm2b))]
10183pub fn _mm_movm_epi8(k: __mmask16) -> __m128i {
10184    unsafe {
10185        let one =
10186            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10187                .as_i8x16();
10188        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
10189    }
10190}
10191
10192/// Convert 32-bit mask a into an integer value, and store the result in dst.
10193///
10194/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
10195#[inline]
10196#[target_feature(enable = "avx512bw")]
10197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10198pub fn _cvtmask32_u32(a: __mmask32) -> u32 {
10199    a
10200}
10201
10202/// Convert integer value a into an 32-bit mask, and store the result in k.
10203///
10204/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
10205#[inline]
10206#[target_feature(enable = "avx512bw")]
10207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10208pub fn _cvtu32_mask32(a: u32) -> __mmask32 {
10209    a
10210}
10211
10212/// Add 32-bit masks in a and b, and store the result in k.
10213///
10214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
10215#[inline]
10216#[target_feature(enable = "avx512bw")]
10217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10218pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10219    a + b
10220}
10221
10222/// Add 64-bit masks in a and b, and store the result in k.
10223///
10224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
10225#[inline]
10226#[target_feature(enable = "avx512bw")]
10227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10228pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10229    a + b
10230}
10231
10232/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
10233///
10234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
10235#[inline]
10236#[target_feature(enable = "avx512bw")]
10237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10238pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10239    a & b
10240}
10241
10242/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
10243///
10244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
10245#[inline]
10246#[target_feature(enable = "avx512bw")]
10247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10248pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10249    a & b
10250}
10251
10252/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
10253///
10254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
10255#[inline]
10256#[target_feature(enable = "avx512bw")]
10257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10258pub fn _knot_mask32(a: __mmask32) -> __mmask32 {
10259    !a
10260}
10261
10262/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
10263///
10264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
10265#[inline]
10266#[target_feature(enable = "avx512bw")]
10267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10268pub fn _knot_mask64(a: __mmask64) -> __mmask64 {
10269    !a
10270}
10271
10272/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
10273///
10274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
10275#[inline]
10276#[target_feature(enable = "avx512bw")]
10277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10278pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10279    _knot_mask32(a) & b
10280}
10281
10282/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
10283///
10284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
10285#[inline]
10286#[target_feature(enable = "avx512bw")]
10287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10288pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10289    _knot_mask64(a) & b
10290}
10291
10292/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
10293///
10294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
10295#[inline]
10296#[target_feature(enable = "avx512bw")]
10297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10298pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10299    a | b
10300}
10301
10302/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
10303///
10304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
10305#[inline]
10306#[target_feature(enable = "avx512bw")]
10307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10308pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10309    a | b
10310}
10311
10312/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
10313///
10314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
10315#[inline]
10316#[target_feature(enable = "avx512bw")]
10317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10318pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10319    a ^ b
10320}
10321
10322/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
10323///
10324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
10325#[inline]
10326#[target_feature(enable = "avx512bw")]
10327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10328pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10329    a ^ b
10330}
10331
10332/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
10333///
10334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
10335#[inline]
10336#[target_feature(enable = "avx512bw")]
10337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10338pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10339    _knot_mask32(a ^ b)
10340}
10341
10342/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
10343///
10344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
10345#[inline]
10346#[target_feature(enable = "avx512bw")]
10347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10348pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10349    _knot_mask64(a ^ b)
10350}
10351
10352/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10353/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10354///
10355/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
10356#[inline]
10357#[target_feature(enable = "avx512bw")]
10358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10359pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
10360    let tmp = _kor_mask32(a, b);
10361    *all_ones = (tmp == 0xffffffff) as u8;
10362    (tmp == 0) as u8
10363}
10364
10365/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10366/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10367///
10368/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
10369#[inline]
10370#[target_feature(enable = "avx512bw")]
10371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10372pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
10373    let tmp = _kor_mask64(a, b);
10374    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
10375    (tmp == 0) as u8
10376}
10377
10378/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10379/// store 0 in dst.
10380///
10381/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
10382#[inline]
10383#[target_feature(enable = "avx512bw")]
10384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10385pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10386    (_kor_mask32(a, b) == 0xffffffff) as u8
10387}
10388
10389/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10390/// store 0 in dst.
10391///
10392/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
10393#[inline]
10394#[target_feature(enable = "avx512bw")]
10395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10396pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10397    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
10398}
10399
10400/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10401/// store 0 in dst.
10402///
10403/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
10404#[inline]
10405#[target_feature(enable = "avx512bw")]
10406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10407pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10408    (_kor_mask32(a, b) == 0) as u8
10409}
10410
10411/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10412/// store 0 in dst.
10413///
10414/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
10415#[inline]
10416#[target_feature(enable = "avx512bw")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10419    (_kor_mask64(a, b) == 0) as u8
10420}
10421
10422/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10423///
10424/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
10425#[inline]
10426#[target_feature(enable = "avx512bw")]
10427#[rustc_legacy_const_generics(1)]
10428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10429pub fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10430    a << COUNT
10431}
10432
10433/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10434///
10435/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
10436#[inline]
10437#[target_feature(enable = "avx512bw")]
10438#[rustc_legacy_const_generics(1)]
10439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10440pub fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10441    a << COUNT
10442}
10443
10444/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10445///
10446/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
10447#[inline]
10448#[target_feature(enable = "avx512bw")]
10449#[rustc_legacy_const_generics(1)]
10450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10451pub fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10452    a >> COUNT
10453}
10454
10455/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10456///
10457/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
10458#[inline]
10459#[target_feature(enable = "avx512bw")]
10460#[rustc_legacy_const_generics(1)]
10461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10462pub fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10463    a >> COUNT
10464}
10465
10466/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
10467/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10468/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10469///
10470/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
10471#[inline]
10472#[target_feature(enable = "avx512bw")]
10473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10474pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
10475    *and_not = (_kandn_mask32(a, b) == 0) as u8;
10476    (_kand_mask32(a, b) == 0) as u8
10477}
10478
10479/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
10480/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10481/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10482///
10483/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
10484#[inline]
10485#[target_feature(enable = "avx512bw")]
10486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10487pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
10488    *and_not = (_kandn_mask64(a, b) == 0) as u8;
10489    (_kand_mask64(a, b) == 0) as u8
10490}
10491
10492/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
10493/// zeros, store 1 in dst, otherwise store 0 in dst.
10494///
10495/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
10496#[inline]
10497#[target_feature(enable = "avx512bw")]
10498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10499pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10500    (_kandn_mask32(a, b) == 0) as u8
10501}
10502
10503/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
10504/// zeros, store 1 in dst, otherwise store 0 in dst.
10505///
10506/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
10507#[inline]
10508#[target_feature(enable = "avx512bw")]
10509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10510pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10511    (_kandn_mask64(a, b) == 0) as u8
10512}
10513
10514/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10515/// store 0 in dst.
10516///
10517/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
10518#[inline]
10519#[target_feature(enable = "avx512bw")]
10520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10521pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10522    (_kand_mask32(a, b) == 0) as u8
10523}
10524
10525/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10526/// store 0 in dst.
10527///
10528/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
10529#[inline]
10530#[target_feature(enable = "avx512bw")]
10531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10532pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10533    (_kand_mask64(a, b) == 0) as u8
10534}
10535
10536/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
10537///
10538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
10539#[inline]
10540#[target_feature(enable = "avx512bw")]
10541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10542#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
10543pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
10544    ((a & 0xffff) << 16) | (b & 0xffff)
10545}
10546
10547/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
10548///
10549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
10550#[inline]
10551#[target_feature(enable = "avx512bw")]
10552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10553#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
10554pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
10555    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
10556}
10557
10558/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10559///
10560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
10561#[inline]
10562#[target_feature(enable = "avx512bw")]
10563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10564#[cfg_attr(test, assert_instr(vpmovwb))]
10565pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
10566    unsafe {
10567        let a = a.as_i16x32();
10568        transmute::<i8x32, _>(simd_cast(a))
10569    }
10570}
10571
10572/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10573///
10574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
10575#[inline]
10576#[target_feature(enable = "avx512bw")]
10577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10578#[cfg_attr(test, assert_instr(vpmovwb))]
10579pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10580    unsafe {
10581        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10582        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
10583    }
10584}
10585
10586/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10587///
10588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
10589#[inline]
10590#[target_feature(enable = "avx512bw")]
10591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10592#[cfg_attr(test, assert_instr(vpmovwb))]
10593pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10594    unsafe {
10595        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10596        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
10597    }
10598}
10599
10600/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10601///
10602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
10603#[inline]
10604#[target_feature(enable = "avx512bw,avx512vl")]
10605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10606#[cfg_attr(test, assert_instr(vpmovwb))]
10607pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
10608    unsafe {
10609        let a = a.as_i16x16();
10610        transmute::<i8x16, _>(simd_cast(a))
10611    }
10612}
10613
10614/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10615///
10616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
10617#[inline]
10618#[target_feature(enable = "avx512bw,avx512vl")]
10619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10620#[cfg_attr(test, assert_instr(vpmovwb))]
10621pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10622    unsafe {
10623        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10624        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10625    }
10626}
10627
10628/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10629///
10630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
10631#[inline]
10632#[target_feature(enable = "avx512bw,avx512vl")]
10633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10634#[cfg_attr(test, assert_instr(vpmovwb))]
10635pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10636    unsafe {
10637        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10638        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10639    }
10640}
10641
10642/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10643///
10644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
10645#[inline]
10646#[target_feature(enable = "avx512bw,avx512vl")]
10647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10648#[cfg_attr(test, assert_instr(vpmovwb))]
10649pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
10650    unsafe {
10651        let a = a.as_i16x8();
10652        let v256: i16x16 = simd_shuffle!(
10653            a,
10654            i16x8::ZERO,
10655            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
10656        );
10657        transmute::<i8x16, _>(simd_cast(v256))
10658    }
10659}
10660
10661/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10662///
10663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
10664#[inline]
10665#[target_feature(enable = "avx512bw,avx512vl")]
10666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10667#[cfg_attr(test, assert_instr(vpmovwb))]
10668pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10669    unsafe {
10670        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10671        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10672        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10673    }
10674}
10675
10676/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10677///
10678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
10679#[inline]
10680#[target_feature(enable = "avx512bw,avx512vl")]
10681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10682#[cfg_attr(test, assert_instr(vpmovwb))]
10683pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10684    unsafe {
10685        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10686        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10687        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10688    }
10689}
10690
10691/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10692///
10693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
10694#[inline]
10695#[target_feature(enable = "avx512bw")]
10696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10697#[cfg_attr(test, assert_instr(vpmovswb))]
10698pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
10699    unsafe {
10700        transmute(vpmovswb(
10701            a.as_i16x32(),
10702            i8x32::ZERO,
10703            0b11111111_11111111_11111111_11111111,
10704        ))
10705    }
10706}
10707
10708/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10709///
10710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
10711#[inline]
10712#[target_feature(enable = "avx512bw")]
10713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10714#[cfg_attr(test, assert_instr(vpmovswb))]
10715pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10716    unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
10717}
10718
10719/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10720///
10721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
10722#[inline]
10723#[target_feature(enable = "avx512bw")]
10724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10725#[cfg_attr(test, assert_instr(vpmovswb))]
10726pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10727    unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) }
10728}
10729
10730/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10731///
10732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
10733#[inline]
10734#[target_feature(enable = "avx512bw,avx512vl")]
10735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10736#[cfg_attr(test, assert_instr(vpmovswb))]
10737pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
10738    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
10739}
10740
10741/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10742///
10743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
10744#[inline]
10745#[target_feature(enable = "avx512bw,avx512vl")]
10746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10747#[cfg_attr(test, assert_instr(vpmovswb))]
10748pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10749    unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
10750}
10751
10752/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10753///
10754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
10755#[inline]
10756#[target_feature(enable = "avx512bw,avx512vl")]
10757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10758#[cfg_attr(test, assert_instr(vpmovswb))]
10759pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10760    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) }
10761}
10762
10763/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10764///
10765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
10766#[inline]
10767#[target_feature(enable = "avx512bw,avx512vl")]
10768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10769#[cfg_attr(test, assert_instr(vpmovswb))]
10770pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
10771    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
10772}
10773
10774/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10775///
10776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
10777#[inline]
10778#[target_feature(enable = "avx512bw,avx512vl")]
10779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10780#[cfg_attr(test, assert_instr(vpmovswb))]
10781pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10782    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
10783}
10784
10785/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10786///
10787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
10788#[inline]
10789#[target_feature(enable = "avx512bw,avx512vl")]
10790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10791#[cfg_attr(test, assert_instr(vpmovswb))]
10792pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10793    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
10794}
10795
10796/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10797///
10798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
10799#[inline]
10800#[target_feature(enable = "avx512bw")]
10801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10802#[cfg_attr(test, assert_instr(vpmovuswb))]
10803pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
10804    unsafe {
10805        transmute(vpmovuswb(
10806            a.as_u16x32(),
10807            u8x32::ZERO,
10808            0b11111111_11111111_11111111_11111111,
10809        ))
10810    }
10811}
10812
10813/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10814///
10815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
10816#[inline]
10817#[target_feature(enable = "avx512bw")]
10818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10819#[cfg_attr(test, assert_instr(vpmovuswb))]
10820pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10821    unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
10822}
10823
10824/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
10827#[inline]
10828#[target_feature(enable = "avx512bw")]
10829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10830#[cfg_attr(test, assert_instr(vpmovuswb))]
10831pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10832    unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) }
10833}
10834
10835/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10836///
10837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
10838#[inline]
10839#[target_feature(enable = "avx512bw,avx512vl")]
10840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10841#[cfg_attr(test, assert_instr(vpmovuswb))]
10842pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
10843    unsafe {
10844        transmute(vpmovuswb256(
10845            a.as_u16x16(),
10846            u8x16::ZERO,
10847            0b11111111_11111111,
10848        ))
10849    }
10850}
10851
10852/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10853///
10854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
10855#[inline]
10856#[target_feature(enable = "avx512bw,avx512vl")]
10857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10858#[cfg_attr(test, assert_instr(vpmovuswb))]
10859pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10860    unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
10861}
10862
10863/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10864///
10865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
10866#[inline]
10867#[target_feature(enable = "avx512bw,avx512vl")]
10868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10869#[cfg_attr(test, assert_instr(vpmovuswb))]
10870pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10871    unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) }
10872}
10873
10874/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
10877#[inline]
10878#[target_feature(enable = "avx512bw,avx512vl")]
10879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10880#[cfg_attr(test, assert_instr(vpmovuswb))]
10881pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
10882    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
10883}
10884
10885/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10886///
10887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
10888#[inline]
10889#[target_feature(enable = "avx512bw,avx512vl")]
10890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10891#[cfg_attr(test, assert_instr(vpmovuswb))]
10892pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10893    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
10894}
10895
10896/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10897///
10898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
10899#[inline]
10900#[target_feature(enable = "avx512bw,avx512vl")]
10901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10902#[cfg_attr(test, assert_instr(vpmovuswb))]
10903pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10904    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
10905}
10906
10907/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
10908///
10909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
10910#[inline]
10911#[target_feature(enable = "avx512bw")]
10912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10913#[cfg_attr(test, assert_instr(vpmovsxbw))]
10914pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
10915    unsafe {
10916        let a = a.as_i8x32();
10917        transmute::<i16x32, _>(simd_cast(a))
10918    }
10919}
10920
10921/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10922///
10923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
10924#[inline]
10925#[target_feature(enable = "avx512bw")]
10926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10927#[cfg_attr(test, assert_instr(vpmovsxbw))]
10928pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
10929    unsafe {
10930        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10931        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
10932    }
10933}
10934
10935/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10936///
10937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
10938#[inline]
10939#[target_feature(enable = "avx512bw")]
10940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10941#[cfg_attr(test, assert_instr(vpmovsxbw))]
10942pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
10943    unsafe {
10944        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10945        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
10946    }
10947}
10948
10949/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10950///
10951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
10952#[inline]
10953#[target_feature(enable = "avx512bw,avx512vl")]
10954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10955#[cfg_attr(test, assert_instr(vpmovsxbw))]
10956pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
10957    unsafe {
10958        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10959        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
10960    }
10961}
10962
10963/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10964///
10965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
10966#[inline]
10967#[target_feature(enable = "avx512bw,avx512vl")]
10968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10969#[cfg_attr(test, assert_instr(vpmovsxbw))]
10970pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
10971    unsafe {
10972        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10973        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
10974    }
10975}
10976
10977/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10978///
10979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
10980#[inline]
10981#[target_feature(enable = "avx512bw,avx512vl")]
10982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10983#[cfg_attr(test, assert_instr(vpmovsxbw))]
10984pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10985    unsafe {
10986        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
10987        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
10988    }
10989}
10990
10991/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10992///
10993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
10994#[inline]
10995#[target_feature(enable = "avx512bw,avx512vl")]
10996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10997#[cfg_attr(test, assert_instr(vpmovsxbw))]
10998pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
10999    unsafe {
11000        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11001        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11002    }
11003}
11004
11005/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11006///
11007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11008#[inline]
11009#[target_feature(enable = "avx512bw")]
11010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11011#[cfg_attr(test, assert_instr(vpmovzxbw))]
11012pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11013    unsafe {
11014        let a = a.as_u8x32();
11015        transmute::<i16x32, _>(simd_cast(a))
11016    }
11017}
11018
11019/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11020///
11021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11022#[inline]
11023#[target_feature(enable = "avx512bw")]
11024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11025#[cfg_attr(test, assert_instr(vpmovzxbw))]
11026pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11027    unsafe {
11028        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11029        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11030    }
11031}
11032
11033/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11034///
11035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11036#[inline]
11037#[target_feature(enable = "avx512bw")]
11038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11039#[cfg_attr(test, assert_instr(vpmovzxbw))]
11040pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11041    unsafe {
11042        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11043        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11044    }
11045}
11046
11047/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11048///
11049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11050#[inline]
11051#[target_feature(enable = "avx512bw,avx512vl")]
11052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11053#[cfg_attr(test, assert_instr(vpmovzxbw))]
11054pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11055    unsafe {
11056        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11057        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11058    }
11059}
11060
11061/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11062///
11063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11064#[inline]
11065#[target_feature(enable = "avx512bw,avx512vl")]
11066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11067#[cfg_attr(test, assert_instr(vpmovzxbw))]
11068pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11069    unsafe {
11070        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11071        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11072    }
11073}
11074
11075/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11076///
11077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11078#[inline]
11079#[target_feature(enable = "avx512bw,avx512vl")]
11080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11081#[cfg_attr(test, assert_instr(vpmovzxbw))]
11082pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11083    unsafe {
11084        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11085        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11086    }
11087}
11088
11089/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11090///
11091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
11092#[inline]
11093#[target_feature(enable = "avx512bw,avx512vl")]
11094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11095#[cfg_attr(test, assert_instr(vpmovzxbw))]
11096pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11097    unsafe {
11098        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11099        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11100    }
11101}
11102
11103/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
11104///
11105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
11106#[inline]
11107#[target_feature(enable = "avx512bw")]
11108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11109#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
11110#[rustc_legacy_const_generics(1)]
11111pub fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11112    unsafe {
11113        static_assert_uimm_bits!(IMM8, 8);
11114        const fn mask(shift: i32, i: u32) -> u32 {
11115            let shift = shift as u32 & 0xff;
11116            if shift > 15 || i % 16 < shift {
11117                0
11118            } else {
11119                64 + (i - shift)
11120            }
11121        }
11122        let a = a.as_i8x64();
11123        let zero = i8x64::ZERO;
11124        let r: i8x64 = simd_shuffle!(
11125            zero,
11126            a,
11127            [
11128                mask(IMM8, 0),
11129                mask(IMM8, 1),
11130                mask(IMM8, 2),
11131                mask(IMM8, 3),
11132                mask(IMM8, 4),
11133                mask(IMM8, 5),
11134                mask(IMM8, 6),
11135                mask(IMM8, 7),
11136                mask(IMM8, 8),
11137                mask(IMM8, 9),
11138                mask(IMM8, 10),
11139                mask(IMM8, 11),
11140                mask(IMM8, 12),
11141                mask(IMM8, 13),
11142                mask(IMM8, 14),
11143                mask(IMM8, 15),
11144                mask(IMM8, 16),
11145                mask(IMM8, 17),
11146                mask(IMM8, 18),
11147                mask(IMM8, 19),
11148                mask(IMM8, 20),
11149                mask(IMM8, 21),
11150                mask(IMM8, 22),
11151                mask(IMM8, 23),
11152                mask(IMM8, 24),
11153                mask(IMM8, 25),
11154                mask(IMM8, 26),
11155                mask(IMM8, 27),
11156                mask(IMM8, 28),
11157                mask(IMM8, 29),
11158                mask(IMM8, 30),
11159                mask(IMM8, 31),
11160                mask(IMM8, 32),
11161                mask(IMM8, 33),
11162                mask(IMM8, 34),
11163                mask(IMM8, 35),
11164                mask(IMM8, 36),
11165                mask(IMM8, 37),
11166                mask(IMM8, 38),
11167                mask(IMM8, 39),
11168                mask(IMM8, 40),
11169                mask(IMM8, 41),
11170                mask(IMM8, 42),
11171                mask(IMM8, 43),
11172                mask(IMM8, 44),
11173                mask(IMM8, 45),
11174                mask(IMM8, 46),
11175                mask(IMM8, 47),
11176                mask(IMM8, 48),
11177                mask(IMM8, 49),
11178                mask(IMM8, 50),
11179                mask(IMM8, 51),
11180                mask(IMM8, 52),
11181                mask(IMM8, 53),
11182                mask(IMM8, 54),
11183                mask(IMM8, 55),
11184                mask(IMM8, 56),
11185                mask(IMM8, 57),
11186                mask(IMM8, 58),
11187                mask(IMM8, 59),
11188                mask(IMM8, 60),
11189                mask(IMM8, 61),
11190                mask(IMM8, 62),
11191                mask(IMM8, 63),
11192            ],
11193        );
11194        transmute(r)
11195    }
11196}
11197
11198/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
11199///
11200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
11201#[inline]
11202#[target_feature(enable = "avx512bw")]
11203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11204#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
11205#[rustc_legacy_const_generics(1)]
11206pub fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11207    unsafe {
11208        static_assert_uimm_bits!(IMM8, 8);
11209        const fn mask(shift: i32, i: u32) -> u32 {
11210            let shift = shift as u32 & 0xff;
11211            if shift > 15 || (15 - (i % 16)) < shift {
11212                0
11213            } else {
11214                64 + (i + shift)
11215            }
11216        }
11217        let a = a.as_i8x64();
11218        let zero = i8x64::ZERO;
11219        let r: i8x64 = simd_shuffle!(
11220            zero,
11221            a,
11222            [
11223                mask(IMM8, 0),
11224                mask(IMM8, 1),
11225                mask(IMM8, 2),
11226                mask(IMM8, 3),
11227                mask(IMM8, 4),
11228                mask(IMM8, 5),
11229                mask(IMM8, 6),
11230                mask(IMM8, 7),
11231                mask(IMM8, 8),
11232                mask(IMM8, 9),
11233                mask(IMM8, 10),
11234                mask(IMM8, 11),
11235                mask(IMM8, 12),
11236                mask(IMM8, 13),
11237                mask(IMM8, 14),
11238                mask(IMM8, 15),
11239                mask(IMM8, 16),
11240                mask(IMM8, 17),
11241                mask(IMM8, 18),
11242                mask(IMM8, 19),
11243                mask(IMM8, 20),
11244                mask(IMM8, 21),
11245                mask(IMM8, 22),
11246                mask(IMM8, 23),
11247                mask(IMM8, 24),
11248                mask(IMM8, 25),
11249                mask(IMM8, 26),
11250                mask(IMM8, 27),
11251                mask(IMM8, 28),
11252                mask(IMM8, 29),
11253                mask(IMM8, 30),
11254                mask(IMM8, 31),
11255                mask(IMM8, 32),
11256                mask(IMM8, 33),
11257                mask(IMM8, 34),
11258                mask(IMM8, 35),
11259                mask(IMM8, 36),
11260                mask(IMM8, 37),
11261                mask(IMM8, 38),
11262                mask(IMM8, 39),
11263                mask(IMM8, 40),
11264                mask(IMM8, 41),
11265                mask(IMM8, 42),
11266                mask(IMM8, 43),
11267                mask(IMM8, 44),
11268                mask(IMM8, 45),
11269                mask(IMM8, 46),
11270                mask(IMM8, 47),
11271                mask(IMM8, 48),
11272                mask(IMM8, 49),
11273                mask(IMM8, 50),
11274                mask(IMM8, 51),
11275                mask(IMM8, 52),
11276                mask(IMM8, 53),
11277                mask(IMM8, 54),
11278                mask(IMM8, 55),
11279                mask(IMM8, 56),
11280                mask(IMM8, 57),
11281                mask(IMM8, 58),
11282                mask(IMM8, 59),
11283                mask(IMM8, 60),
11284                mask(IMM8, 61),
11285                mask(IMM8, 62),
11286                mask(IMM8, 63),
11287            ],
11288        );
11289        transmute(r)
11290    }
11291}
11292
11293/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
11294/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
11295/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
11296///
11297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
11298#[inline]
11299#[target_feature(enable = "avx512bw")]
11300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11301#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11302#[rustc_legacy_const_generics(2)]
11303pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
11304    const fn mask(shift: u32, i: u32) -> u32 {
11305        let shift = shift % 16;
11306        let mod_i = i % 16;
11307        if mod_i < (16 - shift) {
11308            i + shift
11309        } else {
11310            i + 48 + shift
11311        }
11312    }
11313
11314    // If palignr is shifting the pair of vectors more than the size of two
11315    // lanes, emit zero.
11316    if IMM8 >= 32 {
11317        return _mm512_setzero_si512();
11318    }
11319    // If palignr is shifting the pair of input vectors more than one lane,
11320    // but less than two lanes, convert to shifting in zeroes.
11321    let (a, b) = if IMM8 > 16 {
11322        (_mm512_setzero_si512(), a)
11323    } else {
11324        (a, b)
11325    };
11326    unsafe {
11327        if IMM8 == 16 {
11328            return transmute(a);
11329        }
11330
11331        let r: i8x64 = simd_shuffle!(
11332            b.as_i8x64(),
11333            a.as_i8x64(),
11334            [
11335                mask(IMM8 as u32, 0),
11336                mask(IMM8 as u32, 1),
11337                mask(IMM8 as u32, 2),
11338                mask(IMM8 as u32, 3),
11339                mask(IMM8 as u32, 4),
11340                mask(IMM8 as u32, 5),
11341                mask(IMM8 as u32, 6),
11342                mask(IMM8 as u32, 7),
11343                mask(IMM8 as u32, 8),
11344                mask(IMM8 as u32, 9),
11345                mask(IMM8 as u32, 10),
11346                mask(IMM8 as u32, 11),
11347                mask(IMM8 as u32, 12),
11348                mask(IMM8 as u32, 13),
11349                mask(IMM8 as u32, 14),
11350                mask(IMM8 as u32, 15),
11351                mask(IMM8 as u32, 16),
11352                mask(IMM8 as u32, 17),
11353                mask(IMM8 as u32, 18),
11354                mask(IMM8 as u32, 19),
11355                mask(IMM8 as u32, 20),
11356                mask(IMM8 as u32, 21),
11357                mask(IMM8 as u32, 22),
11358                mask(IMM8 as u32, 23),
11359                mask(IMM8 as u32, 24),
11360                mask(IMM8 as u32, 25),
11361                mask(IMM8 as u32, 26),
11362                mask(IMM8 as u32, 27),
11363                mask(IMM8 as u32, 28),
11364                mask(IMM8 as u32, 29),
11365                mask(IMM8 as u32, 30),
11366                mask(IMM8 as u32, 31),
11367                mask(IMM8 as u32, 32),
11368                mask(IMM8 as u32, 33),
11369                mask(IMM8 as u32, 34),
11370                mask(IMM8 as u32, 35),
11371                mask(IMM8 as u32, 36),
11372                mask(IMM8 as u32, 37),
11373                mask(IMM8 as u32, 38),
11374                mask(IMM8 as u32, 39),
11375                mask(IMM8 as u32, 40),
11376                mask(IMM8 as u32, 41),
11377                mask(IMM8 as u32, 42),
11378                mask(IMM8 as u32, 43),
11379                mask(IMM8 as u32, 44),
11380                mask(IMM8 as u32, 45),
11381                mask(IMM8 as u32, 46),
11382                mask(IMM8 as u32, 47),
11383                mask(IMM8 as u32, 48),
11384                mask(IMM8 as u32, 49),
11385                mask(IMM8 as u32, 50),
11386                mask(IMM8 as u32, 51),
11387                mask(IMM8 as u32, 52),
11388                mask(IMM8 as u32, 53),
11389                mask(IMM8 as u32, 54),
11390                mask(IMM8 as u32, 55),
11391                mask(IMM8 as u32, 56),
11392                mask(IMM8 as u32, 57),
11393                mask(IMM8 as u32, 58),
11394                mask(IMM8 as u32, 59),
11395                mask(IMM8 as u32, 60),
11396                mask(IMM8 as u32, 61),
11397                mask(IMM8 as u32, 62),
11398                mask(IMM8 as u32, 63),
11399            ],
11400        );
11401        transmute(r)
11402    }
11403}
11404
11405/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11406///
11407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
11408#[inline]
11409#[target_feature(enable = "avx512bw")]
11410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11411#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11412#[rustc_legacy_const_generics(4)]
11413pub fn _mm512_mask_alignr_epi8<const IMM8: i32>(
11414    src: __m512i,
11415    k: __mmask64,
11416    a: __m512i,
11417    b: __m512i,
11418) -> __m512i {
11419    unsafe {
11420        static_assert_uimm_bits!(IMM8, 8);
11421        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11422        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
11423    }
11424}
11425
11426/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11427///
11428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
11429#[inline]
11430#[target_feature(enable = "avx512bw")]
11431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11432#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11433#[rustc_legacy_const_generics(3)]
11434pub fn _mm512_maskz_alignr_epi8<const IMM8: i32>(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
11435    unsafe {
11436        static_assert_uimm_bits!(IMM8, 8);
11437        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11438        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
11439    }
11440}
11441
11442/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11443///
11444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
11445#[inline]
11446#[target_feature(enable = "avx512bw,avx512vl")]
11447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11448#[rustc_legacy_const_generics(4)]
11449#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11450pub fn _mm256_mask_alignr_epi8<const IMM8: i32>(
11451    src: __m256i,
11452    k: __mmask32,
11453    a: __m256i,
11454    b: __m256i,
11455) -> __m256i {
11456    unsafe {
11457        static_assert_uimm_bits!(IMM8, 8);
11458        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11459        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
11460    }
11461}
11462
11463/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11464///
11465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
11466#[inline]
11467#[target_feature(enable = "avx512bw,avx512vl")]
11468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11469#[rustc_legacy_const_generics(3)]
11470#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11471pub fn _mm256_maskz_alignr_epi8<const IMM8: i32>(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
11472    unsafe {
11473        static_assert_uimm_bits!(IMM8, 8);
11474        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11475        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
11476    }
11477}
11478
11479/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11480///
11481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
11482#[inline]
11483#[target_feature(enable = "avx512bw,avx512vl")]
11484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11485#[rustc_legacy_const_generics(4)]
11486#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11487pub fn _mm_mask_alignr_epi8<const IMM8: i32>(
11488    src: __m128i,
11489    k: __mmask16,
11490    a: __m128i,
11491    b: __m128i,
11492) -> __m128i {
11493    unsafe {
11494        static_assert_uimm_bits!(IMM8, 8);
11495        let r = _mm_alignr_epi8::<IMM8>(a, b);
11496        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
11497    }
11498}
11499
11500/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
11503#[inline]
11504#[target_feature(enable = "avx512bw,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[rustc_legacy_const_generics(3)]
11507#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11508pub fn _mm_maskz_alignr_epi8<const IMM8: i32>(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
11509    unsafe {
11510        static_assert_uimm_bits!(IMM8, 8);
11511        let r = _mm_alignr_epi8::<IMM8>(a, b);
11512        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
11513    }
11514}
11515
11516/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11517///
11518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
11519#[inline]
11520#[target_feature(enable = "avx512bw")]
11521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11522#[cfg_attr(test, assert_instr(vpmovswb))]
11523pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11524    vpmovswbmem(mem_addr, a.as_i16x32(), k);
11525}
11526
11527/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11528///
11529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
11530#[inline]
11531#[target_feature(enable = "avx512bw,avx512vl")]
11532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11533#[cfg_attr(test, assert_instr(vpmovswb))]
11534pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11535    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
11536}
11537
11538/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11539///
11540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
11541#[inline]
11542#[target_feature(enable = "avx512bw,avx512vl")]
11543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11544#[cfg_attr(test, assert_instr(vpmovswb))]
11545pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11546    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
11547}
11548
11549/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11550///
11551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
11552#[inline]
11553#[target_feature(enable = "avx512bw")]
11554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11555#[cfg_attr(test, assert_instr(vpmovwb))]
11556pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11557    vpmovwbmem(mem_addr, a.as_i16x32(), k);
11558}
11559
11560/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11561///
11562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
11563#[inline]
11564#[target_feature(enable = "avx512bw,avx512vl")]
11565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11566#[cfg_attr(test, assert_instr(vpmovwb))]
11567pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11568    vpmovwbmem256(mem_addr, a.as_i16x16(), k);
11569}
11570
11571/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11572///
11573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
11574#[inline]
11575#[target_feature(enable = "avx512bw,avx512vl")]
11576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11577#[cfg_attr(test, assert_instr(vpmovwb))]
11578pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11579    vpmovwbmem128(mem_addr, a.as_i16x8(), k);
11580}
11581
11582/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
11585#[inline]
11586#[target_feature(enable = "avx512bw")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vpmovuswb))]
11589pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11590    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
11591}
11592
11593/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11594///
11595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
11596#[inline]
11597#[target_feature(enable = "avx512bw,avx512vl")]
11598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11599#[cfg_attr(test, assert_instr(vpmovuswb))]
11600pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11601    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
11602}
11603
11604/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11605///
11606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
11607#[inline]
11608#[target_feature(enable = "avx512bw,avx512vl")]
11609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11610#[cfg_attr(test, assert_instr(vpmovuswb))]
11611pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11612    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
11613}
11614
11615#[allow(improper_ctypes)]
11616unsafe extern "C" {
11617    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
11618    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
11619
11620    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
11621    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
11622    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
11623    fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
11624
11625    #[link_name = "llvm.x86.avx512.packssdw.512"]
11626    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
11627    #[link_name = "llvm.x86.avx512.packsswb.512"]
11628    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
11629    #[link_name = "llvm.x86.avx512.packusdw.512"]
11630    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
11631    #[link_name = "llvm.x86.avx512.packuswb.512"]
11632    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
11633
11634    #[link_name = "llvm.x86.avx512.psll.w.512"]
11635    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
11636
11637    #[link_name = "llvm.x86.avx512.psllv.w.512"]
11638    fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
11639    #[link_name = "llvm.x86.avx512.psllv.w.256"]
11640    fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
11641    #[link_name = "llvm.x86.avx512.psllv.w.128"]
11642    fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
11643
11644    #[link_name = "llvm.x86.avx512.psrl.w.512"]
11645    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
11646
11647    #[link_name = "llvm.x86.avx512.psrlv.w.512"]
11648    fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
11649    #[link_name = "llvm.x86.avx512.psrlv.w.256"]
11650    fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
11651    #[link_name = "llvm.x86.avx512.psrlv.w.128"]
11652    fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
11653
11654    #[link_name = "llvm.x86.avx512.psra.w.512"]
11655    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
11656
11657    #[link_name = "llvm.x86.avx512.psrav.w.512"]
11658    fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
11659    #[link_name = "llvm.x86.avx512.psrav.w.256"]
11660    fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
11661    #[link_name = "llvm.x86.avx512.psrav.w.128"]
11662    fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
11663
11664    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
11665    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
11666    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
11667    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
11668    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
11669    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
11670
11671    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
11672    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
11673    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
11674    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
11675    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
11676    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
11677
11678    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
11679    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
11680
11681    #[link_name = "llvm.x86.avx512.psad.bw.512"]
11682    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
11683
11684    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
11685    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
11686    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
11687    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
11688    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
11689    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
11690
11691    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
11692    fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
11693    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
11694    fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
11695    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
11696    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
11697
11698    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
11699    fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
11700    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
11701    fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
11702    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
11703    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
11704
11705    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
11706    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11707    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
11708    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11709    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
11710    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11711
11712    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
11713    fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11714    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
11715    fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11716    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
11717    fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11718
11719    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
11720    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11721    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
11722    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11723    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
11724    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11725
11726    #[link_name = "llvm.x86.avx512.mask.loadu.b.128"]
11727    fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
11728    #[link_name = "llvm.x86.avx512.mask.loadu.w.128"]
11729    fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
11730    #[link_name = "llvm.x86.avx512.mask.loadu.b.256"]
11731    fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
11732    #[link_name = "llvm.x86.avx512.mask.loadu.w.256"]
11733    fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
11734    #[link_name = "llvm.x86.avx512.mask.loadu.b.512"]
11735    fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
11736    #[link_name = "llvm.x86.avx512.mask.loadu.w.512"]
11737    fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
11738
11739    #[link_name = "llvm.x86.avx512.mask.storeu.b.128"]
11740    fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16);
11741    #[link_name = "llvm.x86.avx512.mask.storeu.w.128"]
11742    fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8);
11743    #[link_name = "llvm.x86.avx512.mask.storeu.b.256"]
11744    fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32);
11745    #[link_name = "llvm.x86.avx512.mask.storeu.w.256"]
11746    fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16);
11747    #[link_name = "llvm.x86.avx512.mask.storeu.b.512"]
11748    fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64);
11749    #[link_name = "llvm.x86.avx512.mask.storeu.w.512"]
11750    fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32);
11751
11752}
11753
11754#[cfg(test)]
11755mod tests {
11756
11757    use stdarch_test::simd_test;
11758
11759    use crate::core_arch::x86::*;
11760    use crate::hint::black_box;
11761    use crate::mem::{self};
11762
11763    #[simd_test(enable = "avx512bw")]
11764    unsafe fn test_mm512_abs_epi16() {
11765        let a = _mm512_set1_epi16(-1);
11766        let r = _mm512_abs_epi16(a);
11767        let e = _mm512_set1_epi16(1);
11768        assert_eq_m512i(r, e);
11769    }
11770
11771    #[simd_test(enable = "avx512bw")]
11772    unsafe fn test_mm512_mask_abs_epi16() {
11773        let a = _mm512_set1_epi16(-1);
11774        let r = _mm512_mask_abs_epi16(a, 0, a);
11775        assert_eq_m512i(r, a);
11776        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
11777        #[rustfmt::skip]
11778        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11779                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11780        assert_eq_m512i(r, e);
11781    }
11782
11783    #[simd_test(enable = "avx512bw")]
11784    unsafe fn test_mm512_maskz_abs_epi16() {
11785        let a = _mm512_set1_epi16(-1);
11786        let r = _mm512_maskz_abs_epi16(0, a);
11787        assert_eq_m512i(r, _mm512_setzero_si512());
11788        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
11789        #[rustfmt::skip]
11790        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11791                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11792        assert_eq_m512i(r, e);
11793    }
11794
11795    #[simd_test(enable = "avx512bw,avx512vl")]
11796    unsafe fn test_mm256_mask_abs_epi16() {
11797        let a = _mm256_set1_epi16(-1);
11798        let r = _mm256_mask_abs_epi16(a, 0, a);
11799        assert_eq_m256i(r, a);
11800        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
11801        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11802        assert_eq_m256i(r, e);
11803    }
11804
11805    #[simd_test(enable = "avx512bw,avx512vl")]
11806    unsafe fn test_mm256_maskz_abs_epi16() {
11807        let a = _mm256_set1_epi16(-1);
11808        let r = _mm256_maskz_abs_epi16(0, a);
11809        assert_eq_m256i(r, _mm256_setzero_si256());
11810        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
11811        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11812        assert_eq_m256i(r, e);
11813    }
11814
11815    #[simd_test(enable = "avx512bw,avx512vl")]
11816    unsafe fn test_mm_mask_abs_epi16() {
11817        let a = _mm_set1_epi16(-1);
11818        let r = _mm_mask_abs_epi16(a, 0, a);
11819        assert_eq_m128i(r, a);
11820        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
11821        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
11822        assert_eq_m128i(r, e);
11823    }
11824
11825    #[simd_test(enable = "avx512bw,avx512vl")]
11826    unsafe fn test_mm_maskz_abs_epi16() {
11827        let a = _mm_set1_epi16(-1);
11828        let r = _mm_maskz_abs_epi16(0, a);
11829        assert_eq_m128i(r, _mm_setzero_si128());
11830        let r = _mm_maskz_abs_epi16(0b00001111, a);
11831        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
11832        assert_eq_m128i(r, e);
11833    }
11834
11835    #[simd_test(enable = "avx512bw")]
11836    unsafe fn test_mm512_abs_epi8() {
11837        let a = _mm512_set1_epi8(-1);
11838        let r = _mm512_abs_epi8(a);
11839        let e = _mm512_set1_epi8(1);
11840        assert_eq_m512i(r, e);
11841    }
11842
11843    #[simd_test(enable = "avx512bw")]
11844    unsafe fn test_mm512_mask_abs_epi8() {
11845        let a = _mm512_set1_epi8(-1);
11846        let r = _mm512_mask_abs_epi8(a, 0, a);
11847        assert_eq_m512i(r, a);
11848        let r = _mm512_mask_abs_epi8(
11849            a,
11850            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11851            a,
11852        );
11853        #[rustfmt::skip]
11854        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11855                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11856                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11857                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11858        assert_eq_m512i(r, e);
11859    }
11860
11861    #[simd_test(enable = "avx512bw")]
11862    unsafe fn test_mm512_maskz_abs_epi8() {
11863        let a = _mm512_set1_epi8(-1);
11864        let r = _mm512_maskz_abs_epi8(0, a);
11865        assert_eq_m512i(r, _mm512_setzero_si512());
11866        let r = _mm512_maskz_abs_epi8(
11867            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11868            a,
11869        );
11870        #[rustfmt::skip]
11871        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11872                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11873                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11874                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11875        assert_eq_m512i(r, e);
11876    }
11877
11878    #[simd_test(enable = "avx512bw,avx512vl")]
11879    unsafe fn test_mm256_mask_abs_epi8() {
11880        let a = _mm256_set1_epi8(-1);
11881        let r = _mm256_mask_abs_epi8(a, 0, a);
11882        assert_eq_m256i(r, a);
11883        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
11884        #[rustfmt::skip]
11885        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11886                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11887        assert_eq_m256i(r, e);
11888    }
11889
11890    #[simd_test(enable = "avx512bw,avx512vl")]
11891    unsafe fn test_mm256_maskz_abs_epi8() {
11892        let a = _mm256_set1_epi8(-1);
11893        let r = _mm256_maskz_abs_epi8(0, a);
11894        assert_eq_m256i(r, _mm256_setzero_si256());
11895        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
11896        #[rustfmt::skip]
11897        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11898                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11899        assert_eq_m256i(r, e);
11900    }
11901
11902    #[simd_test(enable = "avx512bw,avx512vl")]
11903    unsafe fn test_mm_mask_abs_epi8() {
11904        let a = _mm_set1_epi8(-1);
11905        let r = _mm_mask_abs_epi8(a, 0, a);
11906        assert_eq_m128i(r, a);
11907        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
11908        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11909        assert_eq_m128i(r, e);
11910    }
11911
11912    #[simd_test(enable = "avx512bw,avx512vl")]
11913    unsafe fn test_mm_maskz_abs_epi8() {
11914        let a = _mm_set1_epi8(-1);
11915        let r = _mm_maskz_abs_epi8(0, a);
11916        assert_eq_m128i(r, _mm_setzero_si128());
11917        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
11918        #[rustfmt::skip]
11919        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11920        assert_eq_m128i(r, e);
11921    }
11922
11923    #[simd_test(enable = "avx512bw")]
11924    unsafe fn test_mm512_add_epi16() {
11925        let a = _mm512_set1_epi16(1);
11926        let b = _mm512_set1_epi16(2);
11927        let r = _mm512_add_epi16(a, b);
11928        let e = _mm512_set1_epi16(3);
11929        assert_eq_m512i(r, e);
11930    }
11931
11932    #[simd_test(enable = "avx512bw")]
11933    unsafe fn test_mm512_mask_add_epi16() {
11934        let a = _mm512_set1_epi16(1);
11935        let b = _mm512_set1_epi16(2);
11936        let r = _mm512_mask_add_epi16(a, 0, a, b);
11937        assert_eq_m512i(r, a);
11938        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
11939        #[rustfmt::skip]
11940        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11941                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11942        assert_eq_m512i(r, e);
11943    }
11944
11945    #[simd_test(enable = "avx512bw")]
11946    unsafe fn test_mm512_maskz_add_epi16() {
11947        let a = _mm512_set1_epi16(1);
11948        let b = _mm512_set1_epi16(2);
11949        let r = _mm512_maskz_add_epi16(0, a, b);
11950        assert_eq_m512i(r, _mm512_setzero_si512());
11951        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
11952        #[rustfmt::skip]
11953        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11954                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11955        assert_eq_m512i(r, e);
11956    }
11957
11958    #[simd_test(enable = "avx512bw,avx512vl")]
11959    unsafe fn test_mm256_mask_add_epi16() {
11960        let a = _mm256_set1_epi16(1);
11961        let b = _mm256_set1_epi16(2);
11962        let r = _mm256_mask_add_epi16(a, 0, a, b);
11963        assert_eq_m256i(r, a);
11964        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
11965        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11966        assert_eq_m256i(r, e);
11967    }
11968
11969    #[simd_test(enable = "avx512bw,avx512vl")]
11970    unsafe fn test_mm256_maskz_add_epi16() {
11971        let a = _mm256_set1_epi16(1);
11972        let b = _mm256_set1_epi16(2);
11973        let r = _mm256_maskz_add_epi16(0, a, b);
11974        assert_eq_m256i(r, _mm256_setzero_si256());
11975        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
11976        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11977        assert_eq_m256i(r, e);
11978    }
11979
11980    #[simd_test(enable = "avx512bw,avx512vl")]
11981    unsafe fn test_mm_mask_add_epi16() {
11982        let a = _mm_set1_epi16(1);
11983        let b = _mm_set1_epi16(2);
11984        let r = _mm_mask_add_epi16(a, 0, a, b);
11985        assert_eq_m128i(r, a);
11986        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
11987        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
11988        assert_eq_m128i(r, e);
11989    }
11990
11991    #[simd_test(enable = "avx512bw,avx512vl")]
11992    unsafe fn test_mm_maskz_add_epi16() {
11993        let a = _mm_set1_epi16(1);
11994        let b = _mm_set1_epi16(2);
11995        let r = _mm_maskz_add_epi16(0, a, b);
11996        assert_eq_m128i(r, _mm_setzero_si128());
11997        let r = _mm_maskz_add_epi16(0b00001111, a, b);
11998        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
11999        assert_eq_m128i(r, e);
12000    }
12001
12002    #[simd_test(enable = "avx512bw")]
12003    unsafe fn test_mm512_add_epi8() {
12004        let a = _mm512_set1_epi8(1);
12005        let b = _mm512_set1_epi8(2);
12006        let r = _mm512_add_epi8(a, b);
12007        let e = _mm512_set1_epi8(3);
12008        assert_eq_m512i(r, e);
12009    }
12010
12011    #[simd_test(enable = "avx512bw")]
12012    unsafe fn test_mm512_mask_add_epi8() {
12013        let a = _mm512_set1_epi8(1);
12014        let b = _mm512_set1_epi8(2);
12015        let r = _mm512_mask_add_epi8(a, 0, a, b);
12016        assert_eq_m512i(r, a);
12017        let r = _mm512_mask_add_epi8(
12018            a,
12019            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12020            a,
12021            b,
12022        );
12023        #[rustfmt::skip]
12024        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12025                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12026                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12027                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12028        assert_eq_m512i(r, e);
12029    }
12030
12031    #[simd_test(enable = "avx512bw")]
12032    unsafe fn test_mm512_maskz_add_epi8() {
12033        let a = _mm512_set1_epi8(1);
12034        let b = _mm512_set1_epi8(2);
12035        let r = _mm512_maskz_add_epi8(0, a, b);
12036        assert_eq_m512i(r, _mm512_setzero_si512());
12037        let r = _mm512_maskz_add_epi8(
12038            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12039            a,
12040            b,
12041        );
12042        #[rustfmt::skip]
12043        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12044                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12045                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12046                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12047        assert_eq_m512i(r, e);
12048    }
12049
12050    #[simd_test(enable = "avx512bw,avx512vl")]
12051    unsafe fn test_mm256_mask_add_epi8() {
12052        let a = _mm256_set1_epi8(1);
12053        let b = _mm256_set1_epi8(2);
12054        let r = _mm256_mask_add_epi8(a, 0, a, b);
12055        assert_eq_m256i(r, a);
12056        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12057        #[rustfmt::skip]
12058        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12059                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12060        assert_eq_m256i(r, e);
12061    }
12062
12063    #[simd_test(enable = "avx512bw,avx512vl")]
12064    unsafe fn test_mm256_maskz_add_epi8() {
12065        let a = _mm256_set1_epi8(1);
12066        let b = _mm256_set1_epi8(2);
12067        let r = _mm256_maskz_add_epi8(0, a, b);
12068        assert_eq_m256i(r, _mm256_setzero_si256());
12069        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12070        #[rustfmt::skip]
12071        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12072                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12073        assert_eq_m256i(r, e);
12074    }
12075
12076    #[simd_test(enable = "avx512bw,avx512vl")]
12077    unsafe fn test_mm_mask_add_epi8() {
12078        let a = _mm_set1_epi8(1);
12079        let b = _mm_set1_epi8(2);
12080        let r = _mm_mask_add_epi8(a, 0, a, b);
12081        assert_eq_m128i(r, a);
12082        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12083        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12084        assert_eq_m128i(r, e);
12085    }
12086
12087    #[simd_test(enable = "avx512bw,avx512vl")]
12088    unsafe fn test_mm_maskz_add_epi8() {
12089        let a = _mm_set1_epi8(1);
12090        let b = _mm_set1_epi8(2);
12091        let r = _mm_maskz_add_epi8(0, a, b);
12092        assert_eq_m128i(r, _mm_setzero_si128());
12093        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12094        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12095        assert_eq_m128i(r, e);
12096    }
12097
12098    #[simd_test(enable = "avx512bw")]
12099    unsafe fn test_mm512_adds_epu16() {
12100        let a = _mm512_set1_epi16(1);
12101        let b = _mm512_set1_epi16(u16::MAX as i16);
12102        let r = _mm512_adds_epu16(a, b);
12103        let e = _mm512_set1_epi16(u16::MAX as i16);
12104        assert_eq_m512i(r, e);
12105    }
12106
12107    #[simd_test(enable = "avx512bw")]
12108    unsafe fn test_mm512_mask_adds_epu16() {
12109        let a = _mm512_set1_epi16(1);
12110        let b = _mm512_set1_epi16(u16::MAX as i16);
12111        let r = _mm512_mask_adds_epu16(a, 0, a, b);
12112        assert_eq_m512i(r, a);
12113        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12114        #[rustfmt::skip]
12115        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12116                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12117        assert_eq_m512i(r, e);
12118    }
12119
12120    #[simd_test(enable = "avx512bw")]
12121    unsafe fn test_mm512_maskz_adds_epu16() {
12122        let a = _mm512_set1_epi16(1);
12123        let b = _mm512_set1_epi16(u16::MAX as i16);
12124        let r = _mm512_maskz_adds_epu16(0, a, b);
12125        assert_eq_m512i(r, _mm512_setzero_si512());
12126        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
12127        #[rustfmt::skip]
12128        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12129                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12130        assert_eq_m512i(r, e);
12131    }
12132
12133    #[simd_test(enable = "avx512bw,avx512vl")]
12134    unsafe fn test_mm256_mask_adds_epu16() {
12135        let a = _mm256_set1_epi16(1);
12136        let b = _mm256_set1_epi16(u16::MAX as i16);
12137        let r = _mm256_mask_adds_epu16(a, 0, a, b);
12138        assert_eq_m256i(r, a);
12139        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
12140        #[rustfmt::skip]
12141        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12142        assert_eq_m256i(r, e);
12143    }
12144
12145    #[simd_test(enable = "avx512bw,avx512vl")]
12146    unsafe fn test_mm256_maskz_adds_epu16() {
12147        let a = _mm256_set1_epi16(1);
12148        let b = _mm256_set1_epi16(u16::MAX as i16);
12149        let r = _mm256_maskz_adds_epu16(0, a, b);
12150        assert_eq_m256i(r, _mm256_setzero_si256());
12151        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
12152        #[rustfmt::skip]
12153        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12154        assert_eq_m256i(r, e);
12155    }
12156
12157    #[simd_test(enable = "avx512bw,avx512vl")]
12158    unsafe fn test_mm_mask_adds_epu16() {
12159        let a = _mm_set1_epi16(1);
12160        let b = _mm_set1_epi16(u16::MAX as i16);
12161        let r = _mm_mask_adds_epu16(a, 0, a, b);
12162        assert_eq_m128i(r, a);
12163        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
12164        #[rustfmt::skip]
12165        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12166        assert_eq_m128i(r, e);
12167    }
12168
12169    #[simd_test(enable = "avx512bw,avx512vl")]
12170    unsafe fn test_mm_maskz_adds_epu16() {
12171        let a = _mm_set1_epi16(1);
12172        let b = _mm_set1_epi16(u16::MAX as i16);
12173        let r = _mm_maskz_adds_epu16(0, a, b);
12174        assert_eq_m128i(r, _mm_setzero_si128());
12175        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
12176        #[rustfmt::skip]
12177        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12178        assert_eq_m128i(r, e);
12179    }
12180
12181    #[simd_test(enable = "avx512bw")]
12182    unsafe fn test_mm512_adds_epu8() {
12183        let a = _mm512_set1_epi8(1);
12184        let b = _mm512_set1_epi8(u8::MAX as i8);
12185        let r = _mm512_adds_epu8(a, b);
12186        let e = _mm512_set1_epi8(u8::MAX as i8);
12187        assert_eq_m512i(r, e);
12188    }
12189
12190    #[simd_test(enable = "avx512bw")]
12191    unsafe fn test_mm512_mask_adds_epu8() {
12192        let a = _mm512_set1_epi8(1);
12193        let b = _mm512_set1_epi8(u8::MAX as i8);
12194        let r = _mm512_mask_adds_epu8(a, 0, a, b);
12195        assert_eq_m512i(r, a);
12196        let r = _mm512_mask_adds_epu8(
12197            a,
12198            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12199            a,
12200            b,
12201        );
12202        #[rustfmt::skip]
12203        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12204                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12205                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12206                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12207        assert_eq_m512i(r, e);
12208    }
12209
12210    #[simd_test(enable = "avx512bw")]
12211    unsafe fn test_mm512_maskz_adds_epu8() {
12212        let a = _mm512_set1_epi8(1);
12213        let b = _mm512_set1_epi8(u8::MAX as i8);
12214        let r = _mm512_maskz_adds_epu8(0, a, b);
12215        assert_eq_m512i(r, _mm512_setzero_si512());
12216        let r = _mm512_maskz_adds_epu8(
12217            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12218            a,
12219            b,
12220        );
12221        #[rustfmt::skip]
12222        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12223                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12224                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12225                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12226        assert_eq_m512i(r, e);
12227    }
12228
12229    #[simd_test(enable = "avx512bw,avx512vl")]
12230    unsafe fn test_mm256_mask_adds_epu8() {
12231        let a = _mm256_set1_epi8(1);
12232        let b = _mm256_set1_epi8(u8::MAX as i8);
12233        let r = _mm256_mask_adds_epu8(a, 0, a, b);
12234        assert_eq_m256i(r, a);
12235        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12236        #[rustfmt::skip]
12237        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12238                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12239        assert_eq_m256i(r, e);
12240    }
12241
12242    #[simd_test(enable = "avx512bw,avx512vl")]
12243    unsafe fn test_mm256_maskz_adds_epu8() {
12244        let a = _mm256_set1_epi8(1);
12245        let b = _mm256_set1_epi8(u8::MAX as i8);
12246        let r = _mm256_maskz_adds_epu8(0, a, b);
12247        assert_eq_m256i(r, _mm256_setzero_si256());
12248        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
12249        #[rustfmt::skip]
12250        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12251                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12252        assert_eq_m256i(r, e);
12253    }
12254
12255    #[simd_test(enable = "avx512bw,avx512vl")]
12256    unsafe fn test_mm_mask_adds_epu8() {
12257        let a = _mm_set1_epi8(1);
12258        let b = _mm_set1_epi8(u8::MAX as i8);
12259        let r = _mm_mask_adds_epu8(a, 0, a, b);
12260        assert_eq_m128i(r, a);
12261        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
12262        #[rustfmt::skip]
12263        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12264        assert_eq_m128i(r, e);
12265    }
12266
12267    #[simd_test(enable = "avx512bw,avx512vl")]
12268    unsafe fn test_mm_maskz_adds_epu8() {
12269        let a = _mm_set1_epi8(1);
12270        let b = _mm_set1_epi8(u8::MAX as i8);
12271        let r = _mm_maskz_adds_epu8(0, a, b);
12272        assert_eq_m128i(r, _mm_setzero_si128());
12273        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
12274        #[rustfmt::skip]
12275        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12276        assert_eq_m128i(r, e);
12277    }
12278
12279    #[simd_test(enable = "avx512bw")]
12280    unsafe fn test_mm512_adds_epi16() {
12281        let a = _mm512_set1_epi16(1);
12282        let b = _mm512_set1_epi16(i16::MAX);
12283        let r = _mm512_adds_epi16(a, b);
12284        let e = _mm512_set1_epi16(i16::MAX);
12285        assert_eq_m512i(r, e);
12286    }
12287
12288    #[simd_test(enable = "avx512bw")]
12289    unsafe fn test_mm512_mask_adds_epi16() {
12290        let a = _mm512_set1_epi16(1);
12291        let b = _mm512_set1_epi16(i16::MAX);
12292        let r = _mm512_mask_adds_epi16(a, 0, a, b);
12293        assert_eq_m512i(r, a);
12294        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12295        #[rustfmt::skip]
12296        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12297                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12298        assert_eq_m512i(r, e);
12299    }
12300
12301    #[simd_test(enable = "avx512bw")]
12302    unsafe fn test_mm512_maskz_adds_epi16() {
12303        let a = _mm512_set1_epi16(1);
12304        let b = _mm512_set1_epi16(i16::MAX);
12305        let r = _mm512_maskz_adds_epi16(0, a, b);
12306        assert_eq_m512i(r, _mm512_setzero_si512());
12307        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
12308        #[rustfmt::skip]
12309        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12310                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12311        assert_eq_m512i(r, e);
12312    }
12313
12314    #[simd_test(enable = "avx512bw,avx512vl")]
12315    unsafe fn test_mm256_mask_adds_epi16() {
12316        let a = _mm256_set1_epi16(1);
12317        let b = _mm256_set1_epi16(i16::MAX);
12318        let r = _mm256_mask_adds_epi16(a, 0, a, b);
12319        assert_eq_m256i(r, a);
12320        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
12321        #[rustfmt::skip]
12322        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12323        assert_eq_m256i(r, e);
12324    }
12325
12326    #[simd_test(enable = "avx512bw,avx512vl")]
12327    unsafe fn test_mm256_maskz_adds_epi16() {
12328        let a = _mm256_set1_epi16(1);
12329        let b = _mm256_set1_epi16(i16::MAX);
12330        let r = _mm256_maskz_adds_epi16(0, a, b);
12331        assert_eq_m256i(r, _mm256_setzero_si256());
12332        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
12333        #[rustfmt::skip]
12334        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12335        assert_eq_m256i(r, e);
12336    }
12337
12338    #[simd_test(enable = "avx512bw,avx512vl")]
12339    unsafe fn test_mm_mask_adds_epi16() {
12340        let a = _mm_set1_epi16(1);
12341        let b = _mm_set1_epi16(i16::MAX);
12342        let r = _mm_mask_adds_epi16(a, 0, a, b);
12343        assert_eq_m128i(r, a);
12344        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
12345        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12346        assert_eq_m128i(r, e);
12347    }
12348
12349    #[simd_test(enable = "avx512bw,avx512vl")]
12350    unsafe fn test_mm_maskz_adds_epi16() {
12351        let a = _mm_set1_epi16(1);
12352        let b = _mm_set1_epi16(i16::MAX);
12353        let r = _mm_maskz_adds_epi16(0, a, b);
12354        assert_eq_m128i(r, _mm_setzero_si128());
12355        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
12356        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12357        assert_eq_m128i(r, e);
12358    }
12359
12360    #[simd_test(enable = "avx512bw")]
12361    unsafe fn test_mm512_adds_epi8() {
12362        let a = _mm512_set1_epi8(1);
12363        let b = _mm512_set1_epi8(i8::MAX);
12364        let r = _mm512_adds_epi8(a, b);
12365        let e = _mm512_set1_epi8(i8::MAX);
12366        assert_eq_m512i(r, e);
12367    }
12368
12369    #[simd_test(enable = "avx512bw")]
12370    unsafe fn test_mm512_mask_adds_epi8() {
12371        let a = _mm512_set1_epi8(1);
12372        let b = _mm512_set1_epi8(i8::MAX);
12373        let r = _mm512_mask_adds_epi8(a, 0, a, b);
12374        assert_eq_m512i(r, a);
12375        let r = _mm512_mask_adds_epi8(
12376            a,
12377            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12378            a,
12379            b,
12380        );
12381        #[rustfmt::skip]
12382        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12383                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12384                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12385                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12386        assert_eq_m512i(r, e);
12387    }
12388
12389    #[simd_test(enable = "avx512bw")]
12390    unsafe fn test_mm512_maskz_adds_epi8() {
12391        let a = _mm512_set1_epi8(1);
12392        let b = _mm512_set1_epi8(i8::MAX);
12393        let r = _mm512_maskz_adds_epi8(0, a, b);
12394        assert_eq_m512i(r, _mm512_setzero_si512());
12395        let r = _mm512_maskz_adds_epi8(
12396            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12397            a,
12398            b,
12399        );
12400        #[rustfmt::skip]
12401        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12402                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12403                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12404                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12405        assert_eq_m512i(r, e);
12406    }
12407
12408    #[simd_test(enable = "avx512bw,avx512vl")]
12409    unsafe fn test_mm256_mask_adds_epi8() {
12410        let a = _mm256_set1_epi8(1);
12411        let b = _mm256_set1_epi8(i8::MAX);
12412        let r = _mm256_mask_adds_epi8(a, 0, a, b);
12413        assert_eq_m256i(r, a);
12414        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12415        #[rustfmt::skip]
12416        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12417                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12418        assert_eq_m256i(r, e);
12419    }
12420
12421    #[simd_test(enable = "avx512bw,avx512vl")]
12422    unsafe fn test_mm256_maskz_adds_epi8() {
12423        let a = _mm256_set1_epi8(1);
12424        let b = _mm256_set1_epi8(i8::MAX);
12425        let r = _mm256_maskz_adds_epi8(0, a, b);
12426        assert_eq_m256i(r, _mm256_setzero_si256());
12427        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
12428        #[rustfmt::skip]
12429        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12430                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12431        assert_eq_m256i(r, e);
12432    }
12433
12434    #[simd_test(enable = "avx512bw,avx512vl")]
12435    unsafe fn test_mm_mask_adds_epi8() {
12436        let a = _mm_set1_epi8(1);
12437        let b = _mm_set1_epi8(i8::MAX);
12438        let r = _mm_mask_adds_epi8(a, 0, a, b);
12439        assert_eq_m128i(r, a);
12440        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
12441        #[rustfmt::skip]
12442        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12443        assert_eq_m128i(r, e);
12444    }
12445
12446    #[simd_test(enable = "avx512bw,avx512vl")]
12447    unsafe fn test_mm_maskz_adds_epi8() {
12448        let a = _mm_set1_epi8(1);
12449        let b = _mm_set1_epi8(i8::MAX);
12450        let r = _mm_maskz_adds_epi8(0, a, b);
12451        assert_eq_m128i(r, _mm_setzero_si128());
12452        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
12453        #[rustfmt::skip]
12454        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12455        assert_eq_m128i(r, e);
12456    }
12457
12458    #[simd_test(enable = "avx512bw")]
12459    unsafe fn test_mm512_sub_epi16() {
12460        let a = _mm512_set1_epi16(1);
12461        let b = _mm512_set1_epi16(2);
12462        let r = _mm512_sub_epi16(a, b);
12463        let e = _mm512_set1_epi16(-1);
12464        assert_eq_m512i(r, e);
12465    }
12466
12467    #[simd_test(enable = "avx512bw")]
12468    unsafe fn test_mm512_mask_sub_epi16() {
12469        let a = _mm512_set1_epi16(1);
12470        let b = _mm512_set1_epi16(2);
12471        let r = _mm512_mask_sub_epi16(a, 0, a, b);
12472        assert_eq_m512i(r, a);
12473        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12474        #[rustfmt::skip]
12475        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12476                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12477        assert_eq_m512i(r, e);
12478    }
12479
12480    #[simd_test(enable = "avx512bw")]
12481    unsafe fn test_mm512_maskz_sub_epi16() {
12482        let a = _mm512_set1_epi16(1);
12483        let b = _mm512_set1_epi16(2);
12484        let r = _mm512_maskz_sub_epi16(0, a, b);
12485        assert_eq_m512i(r, _mm512_setzero_si512());
12486        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
12487        #[rustfmt::skip]
12488        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12489                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12490        assert_eq_m512i(r, e);
12491    }
12492
12493    #[simd_test(enable = "avx512bw,avx512vl")]
12494    unsafe fn test_mm256_mask_sub_epi16() {
12495        let a = _mm256_set1_epi16(1);
12496        let b = _mm256_set1_epi16(2);
12497        let r = _mm256_mask_sub_epi16(a, 0, a, b);
12498        assert_eq_m256i(r, a);
12499        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
12500        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12501        assert_eq_m256i(r, e);
12502    }
12503
12504    #[simd_test(enable = "avx512bw,avx512vl")]
12505    unsafe fn test_mm256_maskz_sub_epi16() {
12506        let a = _mm256_set1_epi16(1);
12507        let b = _mm256_set1_epi16(2);
12508        let r = _mm256_maskz_sub_epi16(0, a, b);
12509        assert_eq_m256i(r, _mm256_setzero_si256());
12510        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
12511        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12512        assert_eq_m256i(r, e);
12513    }
12514
12515    #[simd_test(enable = "avx512bw,avx512vl")]
12516    unsafe fn test_mm_mask_sub_epi16() {
12517        let a = _mm_set1_epi16(1);
12518        let b = _mm_set1_epi16(2);
12519        let r = _mm_mask_sub_epi16(a, 0, a, b);
12520        assert_eq_m128i(r, a);
12521        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
12522        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
12523        assert_eq_m128i(r, e);
12524    }
12525
12526    #[simd_test(enable = "avx512bw,avx512vl")]
12527    unsafe fn test_mm_maskz_sub_epi16() {
12528        let a = _mm_set1_epi16(1);
12529        let b = _mm_set1_epi16(2);
12530        let r = _mm_maskz_sub_epi16(0, a, b);
12531        assert_eq_m128i(r, _mm_setzero_si128());
12532        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
12533        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
12534        assert_eq_m128i(r, e);
12535    }
12536
12537    #[simd_test(enable = "avx512bw")]
12538    unsafe fn test_mm512_sub_epi8() {
12539        let a = _mm512_set1_epi8(1);
12540        let b = _mm512_set1_epi8(2);
12541        let r = _mm512_sub_epi8(a, b);
12542        let e = _mm512_set1_epi8(-1);
12543        assert_eq_m512i(r, e);
12544    }
12545
12546    #[simd_test(enable = "avx512bw")]
12547    unsafe fn test_mm512_mask_sub_epi8() {
12548        let a = _mm512_set1_epi8(1);
12549        let b = _mm512_set1_epi8(2);
12550        let r = _mm512_mask_sub_epi8(a, 0, a, b);
12551        assert_eq_m512i(r, a);
12552        let r = _mm512_mask_sub_epi8(
12553            a,
12554            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12555            a,
12556            b,
12557        );
12558        #[rustfmt::skip]
12559        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12560                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12561                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12562                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12563        assert_eq_m512i(r, e);
12564    }
12565
12566    #[simd_test(enable = "avx512bw")]
12567    unsafe fn test_mm512_maskz_sub_epi8() {
12568        let a = _mm512_set1_epi8(1);
12569        let b = _mm512_set1_epi8(2);
12570        let r = _mm512_maskz_sub_epi8(0, a, b);
12571        assert_eq_m512i(r, _mm512_setzero_si512());
12572        let r = _mm512_maskz_sub_epi8(
12573            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12574            a,
12575            b,
12576        );
12577        #[rustfmt::skip]
12578        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12579                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12580                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12581                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12582        assert_eq_m512i(r, e);
12583    }
12584
12585    #[simd_test(enable = "avx512bw,avx512vl")]
12586    unsafe fn test_mm256_mask_sub_epi8() {
12587        let a = _mm256_set1_epi8(1);
12588        let b = _mm256_set1_epi8(2);
12589        let r = _mm256_mask_sub_epi8(a, 0, a, b);
12590        assert_eq_m256i(r, a);
12591        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12592        #[rustfmt::skip]
12593        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12594                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12595        assert_eq_m256i(r, e);
12596    }
12597
12598    #[simd_test(enable = "avx512bw,avx512vl")]
12599    unsafe fn test_mm256_maskz_sub_epi8() {
12600        let a = _mm256_set1_epi8(1);
12601        let b = _mm256_set1_epi8(2);
12602        let r = _mm256_maskz_sub_epi8(0, a, b);
12603        assert_eq_m256i(r, _mm256_setzero_si256());
12604        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
12605        #[rustfmt::skip]
12606        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12607                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12608        assert_eq_m256i(r, e);
12609    }
12610
12611    #[simd_test(enable = "avx512bw,avx512vl")]
12612    unsafe fn test_mm_mask_sub_epi8() {
12613        let a = _mm_set1_epi8(1);
12614        let b = _mm_set1_epi8(2);
12615        let r = _mm_mask_sub_epi8(a, 0, a, b);
12616        assert_eq_m128i(r, a);
12617        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
12618        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12619        assert_eq_m128i(r, e);
12620    }
12621
12622    #[simd_test(enable = "avx512bw,avx512vl")]
12623    unsafe fn test_mm_maskz_sub_epi8() {
12624        let a = _mm_set1_epi8(1);
12625        let b = _mm_set1_epi8(2);
12626        let r = _mm_maskz_sub_epi8(0, a, b);
12627        assert_eq_m128i(r, _mm_setzero_si128());
12628        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
12629        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12630        assert_eq_m128i(r, e);
12631    }
12632
12633    #[simd_test(enable = "avx512bw")]
12634    unsafe fn test_mm512_subs_epu16() {
12635        let a = _mm512_set1_epi16(1);
12636        let b = _mm512_set1_epi16(u16::MAX as i16);
12637        let r = _mm512_subs_epu16(a, b);
12638        let e = _mm512_set1_epi16(0);
12639        assert_eq_m512i(r, e);
12640    }
12641
12642    #[simd_test(enable = "avx512bw")]
12643    unsafe fn test_mm512_mask_subs_epu16() {
12644        let a = _mm512_set1_epi16(1);
12645        let b = _mm512_set1_epi16(u16::MAX as i16);
12646        let r = _mm512_mask_subs_epu16(a, 0, a, b);
12647        assert_eq_m512i(r, a);
12648        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12649        #[rustfmt::skip]
12650        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12651                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12652        assert_eq_m512i(r, e);
12653    }
12654
12655    #[simd_test(enable = "avx512bw")]
12656    unsafe fn test_mm512_maskz_subs_epu16() {
12657        let a = _mm512_set1_epi16(1);
12658        let b = _mm512_set1_epi16(u16::MAX as i16);
12659        let r = _mm512_maskz_subs_epu16(0, a, b);
12660        assert_eq_m512i(r, _mm512_setzero_si512());
12661        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
12662        #[rustfmt::skip]
12663        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12664                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12665        assert_eq_m512i(r, e);
12666    }
12667
12668    #[simd_test(enable = "avx512bw,avx512vl")]
12669    unsafe fn test_mm256_mask_subs_epu16() {
12670        let a = _mm256_set1_epi16(1);
12671        let b = _mm256_set1_epi16(u16::MAX as i16);
12672        let r = _mm256_mask_subs_epu16(a, 0, a, b);
12673        assert_eq_m256i(r, a);
12674        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
12675        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12676        assert_eq_m256i(r, e);
12677    }
12678
12679    #[simd_test(enable = "avx512bw,avx512vl")]
12680    unsafe fn test_mm256_maskz_subs_epu16() {
12681        let a = _mm256_set1_epi16(1);
12682        let b = _mm256_set1_epi16(u16::MAX as i16);
12683        let r = _mm256_maskz_subs_epu16(0, a, b);
12684        assert_eq_m256i(r, _mm256_setzero_si256());
12685        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
12686        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12687        assert_eq_m256i(r, e);
12688    }
12689
12690    #[simd_test(enable = "avx512bw,avx512vl")]
12691    unsafe fn test_mm_mask_subs_epu16() {
12692        let a = _mm_set1_epi16(1);
12693        let b = _mm_set1_epi16(u16::MAX as i16);
12694        let r = _mm_mask_subs_epu16(a, 0, a, b);
12695        assert_eq_m128i(r, a);
12696        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
12697        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12698        assert_eq_m128i(r, e);
12699    }
12700
12701    #[simd_test(enable = "avx512bw,avx512vl")]
12702    unsafe fn test_mm_maskz_subs_epu16() {
12703        let a = _mm_set1_epi16(1);
12704        let b = _mm_set1_epi16(u16::MAX as i16);
12705        let r = _mm_maskz_subs_epu16(0, a, b);
12706        assert_eq_m128i(r, _mm_setzero_si128());
12707        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
12708        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12709        assert_eq_m128i(r, e);
12710    }
12711
12712    #[simd_test(enable = "avx512bw")]
12713    unsafe fn test_mm512_subs_epu8() {
12714        let a = _mm512_set1_epi8(1);
12715        let b = _mm512_set1_epi8(u8::MAX as i8);
12716        let r = _mm512_subs_epu8(a, b);
12717        let e = _mm512_set1_epi8(0);
12718        assert_eq_m512i(r, e);
12719    }
12720
12721    #[simd_test(enable = "avx512bw")]
12722    unsafe fn test_mm512_mask_subs_epu8() {
12723        let a = _mm512_set1_epi8(1);
12724        let b = _mm512_set1_epi8(u8::MAX as i8);
12725        let r = _mm512_mask_subs_epu8(a, 0, a, b);
12726        assert_eq_m512i(r, a);
12727        let r = _mm512_mask_subs_epu8(
12728            a,
12729            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12730            a,
12731            b,
12732        );
12733        #[rustfmt::skip]
12734        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12735                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12736                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12737                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12738        assert_eq_m512i(r, e);
12739    }
12740
12741    #[simd_test(enable = "avx512bw")]
12742    unsafe fn test_mm512_maskz_subs_epu8() {
12743        let a = _mm512_set1_epi8(1);
12744        let b = _mm512_set1_epi8(u8::MAX as i8);
12745        let r = _mm512_maskz_subs_epu8(0, a, b);
12746        assert_eq_m512i(r, _mm512_setzero_si512());
12747        let r = _mm512_maskz_subs_epu8(
12748            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12749            a,
12750            b,
12751        );
12752        #[rustfmt::skip]
12753        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12754                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12755                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12756                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12757        assert_eq_m512i(r, e);
12758    }
12759
12760    #[simd_test(enable = "avx512bw,avx512vl")]
12761    unsafe fn test_mm256_mask_subs_epu8() {
12762        let a = _mm256_set1_epi8(1);
12763        let b = _mm256_set1_epi8(u8::MAX as i8);
12764        let r = _mm256_mask_subs_epu8(a, 0, a, b);
12765        assert_eq_m256i(r, a);
12766        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12767        #[rustfmt::skip]
12768        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12769                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12770        assert_eq_m256i(r, e);
12771    }
12772
12773    #[simd_test(enable = "avx512bw,avx512vl")]
12774    unsafe fn test_mm256_maskz_subs_epu8() {
12775        let a = _mm256_set1_epi8(1);
12776        let b = _mm256_set1_epi8(u8::MAX as i8);
12777        let r = _mm256_maskz_subs_epu8(0, a, b);
12778        assert_eq_m256i(r, _mm256_setzero_si256());
12779        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
12780        #[rustfmt::skip]
12781        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12782                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12783        assert_eq_m256i(r, e);
12784    }
12785
12786    #[simd_test(enable = "avx512bw,avx512vl")]
12787    unsafe fn test_mm_mask_subs_epu8() {
12788        let a = _mm_set1_epi8(1);
12789        let b = _mm_set1_epi8(u8::MAX as i8);
12790        let r = _mm_mask_subs_epu8(a, 0, a, b);
12791        assert_eq_m128i(r, a);
12792        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
12793        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12794        assert_eq_m128i(r, e);
12795    }
12796
12797    #[simd_test(enable = "avx512bw,avx512vl")]
12798    unsafe fn test_mm_maskz_subs_epu8() {
12799        let a = _mm_set1_epi8(1);
12800        let b = _mm_set1_epi8(u8::MAX as i8);
12801        let r = _mm_maskz_subs_epu8(0, a, b);
12802        assert_eq_m128i(r, _mm_setzero_si128());
12803        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
12804        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12805        assert_eq_m128i(r, e);
12806    }
12807
12808    #[simd_test(enable = "avx512bw")]
12809    unsafe fn test_mm512_subs_epi16() {
12810        let a = _mm512_set1_epi16(-1);
12811        let b = _mm512_set1_epi16(i16::MAX);
12812        let r = _mm512_subs_epi16(a, b);
12813        let e = _mm512_set1_epi16(i16::MIN);
12814        assert_eq_m512i(r, e);
12815    }
12816
12817    #[simd_test(enable = "avx512bw")]
12818    unsafe fn test_mm512_mask_subs_epi16() {
12819        let a = _mm512_set1_epi16(-1);
12820        let b = _mm512_set1_epi16(i16::MAX);
12821        let r = _mm512_mask_subs_epi16(a, 0, a, b);
12822        assert_eq_m512i(r, a);
12823        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12824        #[rustfmt::skip]
12825        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12826                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12827        assert_eq_m512i(r, e);
12828    }
12829
12830    #[simd_test(enable = "avx512bw")]
12831    unsafe fn test_mm512_maskz_subs_epi16() {
12832        let a = _mm512_set1_epi16(-1);
12833        let b = _mm512_set1_epi16(i16::MAX);
12834        let r = _mm512_maskz_subs_epi16(0, a, b);
12835        assert_eq_m512i(r, _mm512_setzero_si512());
12836        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
12837        #[rustfmt::skip]
12838        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12839                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12840        assert_eq_m512i(r, e);
12841    }
12842
12843    #[simd_test(enable = "avx512bw,avx512vl")]
12844    unsafe fn test_mm256_mask_subs_epi16() {
12845        let a = _mm256_set1_epi16(-1);
12846        let b = _mm256_set1_epi16(i16::MAX);
12847        let r = _mm256_mask_subs_epi16(a, 0, a, b);
12848        assert_eq_m256i(r, a);
12849        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
12850        #[rustfmt::skip]
12851        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12852        assert_eq_m256i(r, e);
12853    }
12854
12855    #[simd_test(enable = "avx512bw,avx512vl")]
12856    unsafe fn test_mm256_maskz_subs_epi16() {
12857        let a = _mm256_set1_epi16(-1);
12858        let b = _mm256_set1_epi16(i16::MAX);
12859        let r = _mm256_maskz_subs_epi16(0, a, b);
12860        assert_eq_m256i(r, _mm256_setzero_si256());
12861        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
12862        #[rustfmt::skip]
12863        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12864        assert_eq_m256i(r, e);
12865    }
12866
12867    #[simd_test(enable = "avx512bw,avx512vl")]
12868    unsafe fn test_mm_mask_subs_epi16() {
12869        let a = _mm_set1_epi16(-1);
12870        let b = _mm_set1_epi16(i16::MAX);
12871        let r = _mm_mask_subs_epi16(a, 0, a, b);
12872        assert_eq_m128i(r, a);
12873        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
12874        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12875        assert_eq_m128i(r, e);
12876    }
12877
12878    #[simd_test(enable = "avx512bw,avx512vl")]
12879    unsafe fn test_mm_maskz_subs_epi16() {
12880        let a = _mm_set1_epi16(-1);
12881        let b = _mm_set1_epi16(i16::MAX);
12882        let r = _mm_maskz_subs_epi16(0, a, b);
12883        assert_eq_m128i(r, _mm_setzero_si128());
12884        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
12885        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12886        assert_eq_m128i(r, e);
12887    }
12888
12889    #[simd_test(enable = "avx512bw")]
12890    unsafe fn test_mm512_subs_epi8() {
12891        let a = _mm512_set1_epi8(-1);
12892        let b = _mm512_set1_epi8(i8::MAX);
12893        let r = _mm512_subs_epi8(a, b);
12894        let e = _mm512_set1_epi8(i8::MIN);
12895        assert_eq_m512i(r, e);
12896    }
12897
12898    #[simd_test(enable = "avx512bw")]
12899    unsafe fn test_mm512_mask_subs_epi8() {
12900        let a = _mm512_set1_epi8(-1);
12901        let b = _mm512_set1_epi8(i8::MAX);
12902        let r = _mm512_mask_subs_epi8(a, 0, a, b);
12903        assert_eq_m512i(r, a);
12904        let r = _mm512_mask_subs_epi8(
12905            a,
12906            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12907            a,
12908            b,
12909        );
12910        #[rustfmt::skip]
12911        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12912                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12913                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12914                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12915        assert_eq_m512i(r, e);
12916    }
12917
12918    #[simd_test(enable = "avx512bw")]
12919    unsafe fn test_mm512_maskz_subs_epi8() {
12920        let a = _mm512_set1_epi8(-1);
12921        let b = _mm512_set1_epi8(i8::MAX);
12922        let r = _mm512_maskz_subs_epi8(0, a, b);
12923        assert_eq_m512i(r, _mm512_setzero_si512());
12924        let r = _mm512_maskz_subs_epi8(
12925            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12926            a,
12927            b,
12928        );
12929        #[rustfmt::skip]
12930        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12931                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12932                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12933                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12934        assert_eq_m512i(r, e);
12935    }
12936
12937    #[simd_test(enable = "avx512bw,avx512vl")]
12938    unsafe fn test_mm256_mask_subs_epi8() {
12939        let a = _mm256_set1_epi8(-1);
12940        let b = _mm256_set1_epi8(i8::MAX);
12941        let r = _mm256_mask_subs_epi8(a, 0, a, b);
12942        assert_eq_m256i(r, a);
12943        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12944        #[rustfmt::skip]
12945        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12946                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12947        assert_eq_m256i(r, e);
12948    }
12949
12950    #[simd_test(enable = "avx512bw,avx512vl")]
12951    unsafe fn test_mm256_maskz_subs_epi8() {
12952        let a = _mm256_set1_epi8(-1);
12953        let b = _mm256_set1_epi8(i8::MAX);
12954        let r = _mm256_maskz_subs_epi8(0, a, b);
12955        assert_eq_m256i(r, _mm256_setzero_si256());
12956        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
12957        #[rustfmt::skip]
12958        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12959                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12960        assert_eq_m256i(r, e);
12961    }
12962
12963    #[simd_test(enable = "avx512bw,avx512vl")]
12964    unsafe fn test_mm_mask_subs_epi8() {
12965        let a = _mm_set1_epi8(-1);
12966        let b = _mm_set1_epi8(i8::MAX);
12967        let r = _mm_mask_subs_epi8(a, 0, a, b);
12968        assert_eq_m128i(r, a);
12969        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
12970        #[rustfmt::skip]
12971        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12972        assert_eq_m128i(r, e);
12973    }
12974
12975    #[simd_test(enable = "avx512bw,avx512vl")]
12976    unsafe fn test_mm_maskz_subs_epi8() {
12977        let a = _mm_set1_epi8(-1);
12978        let b = _mm_set1_epi8(i8::MAX);
12979        let r = _mm_maskz_subs_epi8(0, a, b);
12980        assert_eq_m128i(r, _mm_setzero_si128());
12981        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
12982        #[rustfmt::skip]
12983        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12984        assert_eq_m128i(r, e);
12985    }
12986
12987    #[simd_test(enable = "avx512bw")]
12988    unsafe fn test_mm512_mulhi_epu16() {
12989        let a = _mm512_set1_epi16(1);
12990        let b = _mm512_set1_epi16(1);
12991        let r = _mm512_mulhi_epu16(a, b);
12992        let e = _mm512_set1_epi16(0);
12993        assert_eq_m512i(r, e);
12994    }
12995
12996    #[simd_test(enable = "avx512bw")]
12997    unsafe fn test_mm512_mask_mulhi_epu16() {
12998        let a = _mm512_set1_epi16(1);
12999        let b = _mm512_set1_epi16(1);
13000        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13001        assert_eq_m512i(r, a);
13002        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13003        #[rustfmt::skip]
13004        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13005                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13006        assert_eq_m512i(r, e);
13007    }
13008
13009    #[simd_test(enable = "avx512bw")]
13010    unsafe fn test_mm512_maskz_mulhi_epu16() {
13011        let a = _mm512_set1_epi16(1);
13012        let b = _mm512_set1_epi16(1);
13013        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13014        assert_eq_m512i(r, _mm512_setzero_si512());
13015        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13016        #[rustfmt::skip]
13017        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13018                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13019        assert_eq_m512i(r, e);
13020    }
13021
13022    #[simd_test(enable = "avx512bw,avx512vl")]
13023    unsafe fn test_mm256_mask_mulhi_epu16() {
13024        let a = _mm256_set1_epi16(1);
13025        let b = _mm256_set1_epi16(1);
13026        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13027        assert_eq_m256i(r, a);
13028        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13029        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13030        assert_eq_m256i(r, e);
13031    }
13032
13033    #[simd_test(enable = "avx512bw,avx512vl")]
13034    unsafe fn test_mm256_maskz_mulhi_epu16() {
13035        let a = _mm256_set1_epi16(1);
13036        let b = _mm256_set1_epi16(1);
13037        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13038        assert_eq_m256i(r, _mm256_setzero_si256());
13039        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13040        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13041        assert_eq_m256i(r, e);
13042    }
13043
13044    #[simd_test(enable = "avx512bw,avx512vl")]
13045    unsafe fn test_mm_mask_mulhi_epu16() {
13046        let a = _mm_set1_epi16(1);
13047        let b = _mm_set1_epi16(1);
13048        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13049        assert_eq_m128i(r, a);
13050        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13051        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13052        assert_eq_m128i(r, e);
13053    }
13054
13055    #[simd_test(enable = "avx512bw,avx512vl")]
13056    unsafe fn test_mm_maskz_mulhi_epu16() {
13057        let a = _mm_set1_epi16(1);
13058        let b = _mm_set1_epi16(1);
13059        let r = _mm_maskz_mulhi_epu16(0, a, b);
13060        assert_eq_m128i(r, _mm_setzero_si128());
13061        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13062        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13063        assert_eq_m128i(r, e);
13064    }
13065
13066    #[simd_test(enable = "avx512bw")]
13067    unsafe fn test_mm512_mulhi_epi16() {
13068        let a = _mm512_set1_epi16(1);
13069        let b = _mm512_set1_epi16(1);
13070        let r = _mm512_mulhi_epi16(a, b);
13071        let e = _mm512_set1_epi16(0);
13072        assert_eq_m512i(r, e);
13073    }
13074
13075    #[simd_test(enable = "avx512bw")]
13076    unsafe fn test_mm512_mask_mulhi_epi16() {
13077        let a = _mm512_set1_epi16(1);
13078        let b = _mm512_set1_epi16(1);
13079        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13080        assert_eq_m512i(r, a);
13081        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13082        #[rustfmt::skip]
13083        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13084                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13085        assert_eq_m512i(r, e);
13086    }
13087
13088    #[simd_test(enable = "avx512bw")]
13089    unsafe fn test_mm512_maskz_mulhi_epi16() {
13090        let a = _mm512_set1_epi16(1);
13091        let b = _mm512_set1_epi16(1);
13092        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13093        assert_eq_m512i(r, _mm512_setzero_si512());
13094        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13095        #[rustfmt::skip]
13096        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13097                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13098        assert_eq_m512i(r, e);
13099    }
13100
13101    #[simd_test(enable = "avx512bw,avx512vl")]
13102    unsafe fn test_mm256_mask_mulhi_epi16() {
13103        let a = _mm256_set1_epi16(1);
13104        let b = _mm256_set1_epi16(1);
13105        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
13106        assert_eq_m256i(r, a);
13107        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
13108        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13109        assert_eq_m256i(r, e);
13110    }
13111
13112    #[simd_test(enable = "avx512bw,avx512vl")]
13113    unsafe fn test_mm256_maskz_mulhi_epi16() {
13114        let a = _mm256_set1_epi16(1);
13115        let b = _mm256_set1_epi16(1);
13116        let r = _mm256_maskz_mulhi_epi16(0, a, b);
13117        assert_eq_m256i(r, _mm256_setzero_si256());
13118        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
13119        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13120        assert_eq_m256i(r, e);
13121    }
13122
13123    #[simd_test(enable = "avx512bw,avx512vl")]
13124    unsafe fn test_mm_mask_mulhi_epi16() {
13125        let a = _mm_set1_epi16(1);
13126        let b = _mm_set1_epi16(1);
13127        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
13128        assert_eq_m128i(r, a);
13129        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
13130        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13131        assert_eq_m128i(r, e);
13132    }
13133
13134    #[simd_test(enable = "avx512bw,avx512vl")]
13135    unsafe fn test_mm_maskz_mulhi_epi16() {
13136        let a = _mm_set1_epi16(1);
13137        let b = _mm_set1_epi16(1);
13138        let r = _mm_maskz_mulhi_epi16(0, a, b);
13139        assert_eq_m128i(r, _mm_setzero_si128());
13140        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
13141        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13142        assert_eq_m128i(r, e);
13143    }
13144
13145    #[simd_test(enable = "avx512bw")]
13146    unsafe fn test_mm512_mulhrs_epi16() {
13147        let a = _mm512_set1_epi16(1);
13148        let b = _mm512_set1_epi16(1);
13149        let r = _mm512_mulhrs_epi16(a, b);
13150        let e = _mm512_set1_epi16(0);
13151        assert_eq_m512i(r, e);
13152    }
13153
13154    #[simd_test(enable = "avx512bw")]
13155    unsafe fn test_mm512_mask_mulhrs_epi16() {
13156        let a = _mm512_set1_epi16(1);
13157        let b = _mm512_set1_epi16(1);
13158        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
13159        assert_eq_m512i(r, a);
13160        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13161        #[rustfmt::skip]
13162        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13163                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13164        assert_eq_m512i(r, e);
13165    }
13166
13167    #[simd_test(enable = "avx512bw")]
13168    unsafe fn test_mm512_maskz_mulhrs_epi16() {
13169        let a = _mm512_set1_epi16(1);
13170        let b = _mm512_set1_epi16(1);
13171        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
13172        assert_eq_m512i(r, _mm512_setzero_si512());
13173        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13174        #[rustfmt::skip]
13175        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13176                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13177        assert_eq_m512i(r, e);
13178    }
13179
13180    #[simd_test(enable = "avx512bw,avx512vl")]
13181    unsafe fn test_mm256_mask_mulhrs_epi16() {
13182        let a = _mm256_set1_epi16(1);
13183        let b = _mm256_set1_epi16(1);
13184        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
13185        assert_eq_m256i(r, a);
13186        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
13187        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13188        assert_eq_m256i(r, e);
13189    }
13190
13191    #[simd_test(enable = "avx512bw,avx512vl")]
13192    unsafe fn test_mm256_maskz_mulhrs_epi16() {
13193        let a = _mm256_set1_epi16(1);
13194        let b = _mm256_set1_epi16(1);
13195        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
13196        assert_eq_m256i(r, _mm256_setzero_si256());
13197        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
13198        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13199        assert_eq_m256i(r, e);
13200    }
13201
13202    #[simd_test(enable = "avx512bw,avx512vl")]
13203    unsafe fn test_mm_mask_mulhrs_epi16() {
13204        let a = _mm_set1_epi16(1);
13205        let b = _mm_set1_epi16(1);
13206        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
13207        assert_eq_m128i(r, a);
13208        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
13209        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13210        assert_eq_m128i(r, e);
13211    }
13212
13213    #[simd_test(enable = "avx512bw,avx512vl")]
13214    unsafe fn test_mm_maskz_mulhrs_epi16() {
13215        let a = _mm_set1_epi16(1);
13216        let b = _mm_set1_epi16(1);
13217        let r = _mm_maskz_mulhrs_epi16(0, a, b);
13218        assert_eq_m128i(r, _mm_setzero_si128());
13219        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
13220        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13221        assert_eq_m128i(r, e);
13222    }
13223
13224    #[simd_test(enable = "avx512bw")]
13225    unsafe fn test_mm512_mullo_epi16() {
13226        let a = _mm512_set1_epi16(1);
13227        let b = _mm512_set1_epi16(1);
13228        let r = _mm512_mullo_epi16(a, b);
13229        let e = _mm512_set1_epi16(1);
13230        assert_eq_m512i(r, e);
13231    }
13232
13233    #[simd_test(enable = "avx512bw")]
13234    unsafe fn test_mm512_mask_mullo_epi16() {
13235        let a = _mm512_set1_epi16(1);
13236        let b = _mm512_set1_epi16(1);
13237        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
13238        assert_eq_m512i(r, a);
13239        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13240        #[rustfmt::skip]
13241        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13242                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13243        assert_eq_m512i(r, e);
13244    }
13245
13246    #[simd_test(enable = "avx512bw")]
13247    unsafe fn test_mm512_maskz_mullo_epi16() {
13248        let a = _mm512_set1_epi16(1);
13249        let b = _mm512_set1_epi16(1);
13250        let r = _mm512_maskz_mullo_epi16(0, a, b);
13251        assert_eq_m512i(r, _mm512_setzero_si512());
13252        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
13253        #[rustfmt::skip]
13254        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13255                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13256        assert_eq_m512i(r, e);
13257    }
13258
13259    #[simd_test(enable = "avx512bw,avx512vl")]
13260    unsafe fn test_mm256_mask_mullo_epi16() {
13261        let a = _mm256_set1_epi16(1);
13262        let b = _mm256_set1_epi16(1);
13263        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
13264        assert_eq_m256i(r, a);
13265        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
13266        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13267        assert_eq_m256i(r, e);
13268    }
13269
13270    #[simd_test(enable = "avx512bw,avx512vl")]
13271    unsafe fn test_mm256_maskz_mullo_epi16() {
13272        let a = _mm256_set1_epi16(1);
13273        let b = _mm256_set1_epi16(1);
13274        let r = _mm256_maskz_mullo_epi16(0, a, b);
13275        assert_eq_m256i(r, _mm256_setzero_si256());
13276        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
13277        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13278        assert_eq_m256i(r, e);
13279    }
13280
13281    #[simd_test(enable = "avx512bw,avx512vl")]
13282    unsafe fn test_mm_mask_mullo_epi16() {
13283        let a = _mm_set1_epi16(1);
13284        let b = _mm_set1_epi16(1);
13285        let r = _mm_mask_mullo_epi16(a, 0, a, b);
13286        assert_eq_m128i(r, a);
13287        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
13288        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
13289        assert_eq_m128i(r, e);
13290    }
13291
13292    #[simd_test(enable = "avx512bw,avx512vl")]
13293    unsafe fn test_mm_maskz_mullo_epi16() {
13294        let a = _mm_set1_epi16(1);
13295        let b = _mm_set1_epi16(1);
13296        let r = _mm_maskz_mullo_epi16(0, a, b);
13297        assert_eq_m128i(r, _mm_setzero_si128());
13298        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
13299        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
13300        assert_eq_m128i(r, e);
13301    }
13302
13303    #[simd_test(enable = "avx512bw")]
13304    unsafe fn test_mm512_max_epu16() {
13305        #[rustfmt::skip]
13306        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13307                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13308        #[rustfmt::skip]
13309        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13310                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13311        let r = _mm512_max_epu16(a, b);
13312        #[rustfmt::skip]
13313        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13314                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13315        assert_eq_m512i(r, e);
13316    }
13317
13318    #[simd_test(enable = "avx512f")]
13319    unsafe fn test_mm512_mask_max_epu16() {
13320        #[rustfmt::skip]
13321        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13322                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13323        #[rustfmt::skip]
13324        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13325                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13326        let r = _mm512_mask_max_epu16(a, 0, a, b);
13327        assert_eq_m512i(r, a);
13328        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13329        #[rustfmt::skip]
13330        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13331                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13332        assert_eq_m512i(r, e);
13333    }
13334
13335    #[simd_test(enable = "avx512f")]
13336    unsafe fn test_mm512_maskz_max_epu16() {
13337        #[rustfmt::skip]
13338        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13339                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13340        #[rustfmt::skip]
13341        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13342                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13343        let r = _mm512_maskz_max_epu16(0, a, b);
13344        assert_eq_m512i(r, _mm512_setzero_si512());
13345        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
13346        #[rustfmt::skip]
13347        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13348                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13349        assert_eq_m512i(r, e);
13350    }
13351
13352    #[simd_test(enable = "avx512f,avx512vl")]
13353    unsafe fn test_mm256_mask_max_epu16() {
13354        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13355        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13356        let r = _mm256_mask_max_epu16(a, 0, a, b);
13357        assert_eq_m256i(r, a);
13358        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
13359        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13360        assert_eq_m256i(r, e);
13361    }
13362
13363    #[simd_test(enable = "avx512f,avx512vl")]
13364    unsafe fn test_mm256_maskz_max_epu16() {
13365        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13366        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13367        let r = _mm256_maskz_max_epu16(0, a, b);
13368        assert_eq_m256i(r, _mm256_setzero_si256());
13369        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
13370        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13371        assert_eq_m256i(r, e);
13372    }
13373
13374    #[simd_test(enable = "avx512f,avx512vl")]
13375    unsafe fn test_mm_mask_max_epu16() {
13376        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13377        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13378        let r = _mm_mask_max_epu16(a, 0, a, b);
13379        assert_eq_m128i(r, a);
13380        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
13381        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13382        assert_eq_m128i(r, e);
13383    }
13384
13385    #[simd_test(enable = "avx512f,avx512vl")]
13386    unsafe fn test_mm_maskz_max_epu16() {
13387        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13388        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13389        let r = _mm_maskz_max_epu16(0, a, b);
13390        assert_eq_m128i(r, _mm_setzero_si128());
13391        let r = _mm_maskz_max_epu16(0b00001111, a, b);
13392        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13393        assert_eq_m128i(r, e);
13394    }
13395
13396    #[simd_test(enable = "avx512bw")]
13397    unsafe fn test_mm512_max_epu8() {
13398        #[rustfmt::skip]
13399        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13400                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13401                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13402                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13403        #[rustfmt::skip]
13404        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13405                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13406                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13407                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13408        let r = _mm512_max_epu8(a, b);
13409        #[rustfmt::skip]
13410        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13411                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13412                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13413                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13414        assert_eq_m512i(r, e);
13415    }
13416
13417    #[simd_test(enable = "avx512f")]
13418    unsafe fn test_mm512_mask_max_epu8() {
13419        #[rustfmt::skip]
13420        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13421                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13422                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13423                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13424        #[rustfmt::skip]
13425        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13426                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13427                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13428                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13429        let r = _mm512_mask_max_epu8(a, 0, a, b);
13430        assert_eq_m512i(r, a);
13431        let r = _mm512_mask_max_epu8(
13432            a,
13433            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13434            a,
13435            b,
13436        );
13437        #[rustfmt::skip]
13438        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13439                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13440                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13441                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13442        assert_eq_m512i(r, e);
13443    }
13444
13445    #[simd_test(enable = "avx512f")]
13446    unsafe fn test_mm512_maskz_max_epu8() {
13447        #[rustfmt::skip]
13448        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13449                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13450                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13451                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13452        #[rustfmt::skip]
13453        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13454                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13455                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13456                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13457        let r = _mm512_maskz_max_epu8(0, a, b);
13458        assert_eq_m512i(r, _mm512_setzero_si512());
13459        let r = _mm512_maskz_max_epu8(
13460            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13461            a,
13462            b,
13463        );
13464        #[rustfmt::skip]
13465        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13466                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13467                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13468                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13469        assert_eq_m512i(r, e);
13470    }
13471
13472    #[simd_test(enable = "avx512f,avx512vl")]
13473    unsafe fn test_mm256_mask_max_epu8() {
13474        #[rustfmt::skip]
13475        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13476                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13477        #[rustfmt::skip]
13478        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13479                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13480        let r = _mm256_mask_max_epu8(a, 0, a, b);
13481        assert_eq_m256i(r, a);
13482        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13483        #[rustfmt::skip]
13484        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13485                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13486        assert_eq_m256i(r, e);
13487    }
13488
13489    #[simd_test(enable = "avx512f,avx512vl")]
13490    unsafe fn test_mm256_maskz_max_epu8() {
13491        #[rustfmt::skip]
13492        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13493                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13494        #[rustfmt::skip]
13495        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13496                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13497        let r = _mm256_maskz_max_epu8(0, a, b);
13498        assert_eq_m256i(r, _mm256_setzero_si256());
13499        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
13500        #[rustfmt::skip]
13501        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13502                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13503        assert_eq_m256i(r, e);
13504    }
13505
13506    #[simd_test(enable = "avx512f,avx512vl")]
13507    unsafe fn test_mm_mask_max_epu8() {
13508        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13509        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13510        let r = _mm_mask_max_epu8(a, 0, a, b);
13511        assert_eq_m128i(r, a);
13512        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
13513        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13514        assert_eq_m128i(r, e);
13515    }
13516
13517    #[simd_test(enable = "avx512f,avx512vl")]
13518    unsafe fn test_mm_maskz_max_epu8() {
13519        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13520        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13521        let r = _mm_maskz_max_epu8(0, a, b);
13522        assert_eq_m128i(r, _mm_setzero_si128());
13523        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
13524        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13525        assert_eq_m128i(r, e);
13526    }
13527
13528    #[simd_test(enable = "avx512bw")]
13529    unsafe fn test_mm512_max_epi16() {
13530        #[rustfmt::skip]
13531        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13532                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13533        #[rustfmt::skip]
13534        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13535                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13536        let r = _mm512_max_epi16(a, b);
13537        #[rustfmt::skip]
13538        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13539                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13540        assert_eq_m512i(r, e);
13541    }
13542
13543    #[simd_test(enable = "avx512f")]
13544    unsafe fn test_mm512_mask_max_epi16() {
13545        #[rustfmt::skip]
13546        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13547                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13548        #[rustfmt::skip]
13549        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13550                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13551        let r = _mm512_mask_max_epi16(a, 0, a, b);
13552        assert_eq_m512i(r, a);
13553        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13554        #[rustfmt::skip]
13555        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13556                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13557        assert_eq_m512i(r, e);
13558    }
13559
13560    #[simd_test(enable = "avx512f")]
13561    unsafe fn test_mm512_maskz_max_epi16() {
13562        #[rustfmt::skip]
13563        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13564                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13565        #[rustfmt::skip]
13566        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13567                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13568        let r = _mm512_maskz_max_epi16(0, a, b);
13569        assert_eq_m512i(r, _mm512_setzero_si512());
13570        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
13571        #[rustfmt::skip]
13572        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13573                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13574        assert_eq_m512i(r, e);
13575    }
13576
13577    #[simd_test(enable = "avx512f,avx512vl")]
13578    unsafe fn test_mm256_mask_max_epi16() {
13579        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13580        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13581        let r = _mm256_mask_max_epi16(a, 0, a, b);
13582        assert_eq_m256i(r, a);
13583        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
13584        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13585        assert_eq_m256i(r, e);
13586    }
13587
13588    #[simd_test(enable = "avx512f,avx512vl")]
13589    unsafe fn test_mm256_maskz_max_epi16() {
13590        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13591        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13592        let r = _mm256_maskz_max_epi16(0, a, b);
13593        assert_eq_m256i(r, _mm256_setzero_si256());
13594        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
13595        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13596        assert_eq_m256i(r, e);
13597    }
13598
13599    #[simd_test(enable = "avx512f,avx512vl")]
13600    unsafe fn test_mm_mask_max_epi16() {
13601        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13602        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13603        let r = _mm_mask_max_epi16(a, 0, a, b);
13604        assert_eq_m128i(r, a);
13605        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
13606        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13607        assert_eq_m128i(r, e);
13608    }
13609
13610    #[simd_test(enable = "avx512f,avx512vl")]
13611    unsafe fn test_mm_maskz_max_epi16() {
13612        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13613        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13614        let r = _mm_maskz_max_epi16(0, a, b);
13615        assert_eq_m128i(r, _mm_setzero_si128());
13616        let r = _mm_maskz_max_epi16(0b00001111, a, b);
13617        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13618        assert_eq_m128i(r, e);
13619    }
13620
13621    #[simd_test(enable = "avx512bw")]
13622    unsafe fn test_mm512_max_epi8() {
13623        #[rustfmt::skip]
13624        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13625                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13626                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13627                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13628        #[rustfmt::skip]
13629        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13630                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13631                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13632                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13633        let r = _mm512_max_epi8(a, b);
13634        #[rustfmt::skip]
13635        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13636                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13637                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13638                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13639        assert_eq_m512i(r, e);
13640    }
13641
13642    #[simd_test(enable = "avx512f")]
13643    unsafe fn test_mm512_mask_max_epi8() {
13644        #[rustfmt::skip]
13645        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13646                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13647                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13648                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13649        #[rustfmt::skip]
13650        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13651                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13652                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13653                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13654        let r = _mm512_mask_max_epi8(a, 0, a, b);
13655        assert_eq_m512i(r, a);
13656        let r = _mm512_mask_max_epi8(
13657            a,
13658            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13659            a,
13660            b,
13661        );
13662        #[rustfmt::skip]
13663        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13664                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13665                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13666                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13667        assert_eq_m512i(r, e);
13668    }
13669
13670    #[simd_test(enable = "avx512f")]
13671    unsafe fn test_mm512_maskz_max_epi8() {
13672        #[rustfmt::skip]
13673        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13674                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13675                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13676                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13677        #[rustfmt::skip]
13678        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13679                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13680                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13681                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13682        let r = _mm512_maskz_max_epi8(0, a, b);
13683        assert_eq_m512i(r, _mm512_setzero_si512());
13684        let r = _mm512_maskz_max_epi8(
13685            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13686            a,
13687            b,
13688        );
13689        #[rustfmt::skip]
13690        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13691                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13692                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13693                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13694        assert_eq_m512i(r, e);
13695    }
13696
13697    #[simd_test(enable = "avx512f,avx512vl")]
13698    unsafe fn test_mm256_mask_max_epi8() {
13699        #[rustfmt::skip]
13700        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13701                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13702        #[rustfmt::skip]
13703        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13704                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13705        let r = _mm256_mask_max_epi8(a, 0, a, b);
13706        assert_eq_m256i(r, a);
13707        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13708        #[rustfmt::skip]
13709        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13710                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13711        assert_eq_m256i(r, e);
13712    }
13713
13714    #[simd_test(enable = "avx512f,avx512vl")]
13715    unsafe fn test_mm256_maskz_max_epi8() {
13716        #[rustfmt::skip]
13717        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13718                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13719        #[rustfmt::skip]
13720        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13721                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13722        let r = _mm256_maskz_max_epi8(0, a, b);
13723        assert_eq_m256i(r, _mm256_setzero_si256());
13724        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
13725        #[rustfmt::skip]
13726        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13727                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13728        assert_eq_m256i(r, e);
13729    }
13730
13731    #[simd_test(enable = "avx512f,avx512vl")]
13732    unsafe fn test_mm_mask_max_epi8() {
13733        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13734        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13735        let r = _mm_mask_max_epi8(a, 0, a, b);
13736        assert_eq_m128i(r, a);
13737        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
13738        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13739        assert_eq_m128i(r, e);
13740    }
13741
13742    #[simd_test(enable = "avx512f,avx512vl")]
13743    unsafe fn test_mm_maskz_max_epi8() {
13744        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13745        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13746        let r = _mm_maskz_max_epi8(0, a, b);
13747        assert_eq_m128i(r, _mm_setzero_si128());
13748        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
13749        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13750        assert_eq_m128i(r, e);
13751    }
13752
13753    #[simd_test(enable = "avx512bw")]
13754    unsafe fn test_mm512_min_epu16() {
13755        #[rustfmt::skip]
13756        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13757                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13758        #[rustfmt::skip]
13759        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13760                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13761        let r = _mm512_min_epu16(a, b);
13762        #[rustfmt::skip]
13763        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13764                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13765        assert_eq_m512i(r, e);
13766    }
13767
13768    #[simd_test(enable = "avx512f")]
13769    unsafe fn test_mm512_mask_min_epu16() {
13770        #[rustfmt::skip]
13771        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13772                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13773        #[rustfmt::skip]
13774        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13775                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13776        let r = _mm512_mask_min_epu16(a, 0, a, b);
13777        assert_eq_m512i(r, a);
13778        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13779        #[rustfmt::skip]
13780        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13781                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13782        assert_eq_m512i(r, e);
13783    }
13784
13785    #[simd_test(enable = "avx512f")]
13786    unsafe fn test_mm512_maskz_min_epu16() {
13787        #[rustfmt::skip]
13788        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13789                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13790        #[rustfmt::skip]
13791        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13792                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13793        let r = _mm512_maskz_min_epu16(0, a, b);
13794        assert_eq_m512i(r, _mm512_setzero_si512());
13795        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
13796        #[rustfmt::skip]
13797        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13798                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13799        assert_eq_m512i(r, e);
13800    }
13801
13802    #[simd_test(enable = "avx512f,avx512vl")]
13803    unsafe fn test_mm256_mask_min_epu16() {
13804        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13805        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13806        let r = _mm256_mask_min_epu16(a, 0, a, b);
13807        assert_eq_m256i(r, a);
13808        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
13809        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13810        assert_eq_m256i(r, e);
13811    }
13812
13813    #[simd_test(enable = "avx512f,avx512vl")]
13814    unsafe fn test_mm256_maskz_min_epu16() {
13815        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13816        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13817        let r = _mm256_maskz_min_epu16(0, a, b);
13818        assert_eq_m256i(r, _mm256_setzero_si256());
13819        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
13820        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13821        assert_eq_m256i(r, e);
13822    }
13823
13824    #[simd_test(enable = "avx512f,avx512vl")]
13825    unsafe fn test_mm_mask_min_epu16() {
13826        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13827        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13828        let r = _mm_mask_min_epu16(a, 0, a, b);
13829        assert_eq_m128i(r, a);
13830        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
13831        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
13832        assert_eq_m128i(r, e);
13833    }
13834
13835    #[simd_test(enable = "avx512f,avx512vl")]
13836    unsafe fn test_mm_maskz_min_epu16() {
13837        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13838        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13839        let r = _mm_maskz_min_epu16(0, a, b);
13840        assert_eq_m128i(r, _mm_setzero_si128());
13841        let r = _mm_maskz_min_epu16(0b00001111, a, b);
13842        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
13843        assert_eq_m128i(r, e);
13844    }
13845
13846    #[simd_test(enable = "avx512bw")]
13847    unsafe fn test_mm512_min_epu8() {
13848        #[rustfmt::skip]
13849        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13850                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13851                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13852                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13853        #[rustfmt::skip]
13854        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13855                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13856                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13857                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13858        let r = _mm512_min_epu8(a, b);
13859        #[rustfmt::skip]
13860        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13861                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13862                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13863                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13864        assert_eq_m512i(r, e);
13865    }
13866
13867    #[simd_test(enable = "avx512f")]
13868    unsafe fn test_mm512_mask_min_epu8() {
13869        #[rustfmt::skip]
13870        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13871                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13872                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13873                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13874        #[rustfmt::skip]
13875        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13876                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13877                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13878                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13879        let r = _mm512_mask_min_epu8(a, 0, a, b);
13880        assert_eq_m512i(r, a);
13881        let r = _mm512_mask_min_epu8(
13882            a,
13883            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13884            a,
13885            b,
13886        );
13887        #[rustfmt::skip]
13888        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13889                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13890                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13891                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13892        assert_eq_m512i(r, e);
13893    }
13894
13895    #[simd_test(enable = "avx512f")]
13896    unsafe fn test_mm512_maskz_min_epu8() {
13897        #[rustfmt::skip]
13898        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13899                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13900                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13901                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13902        #[rustfmt::skip]
13903        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13904                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13905                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13906                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13907        let r = _mm512_maskz_min_epu8(0, a, b);
13908        assert_eq_m512i(r, _mm512_setzero_si512());
13909        let r = _mm512_maskz_min_epu8(
13910            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13911            a,
13912            b,
13913        );
13914        #[rustfmt::skip]
13915        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13916                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13917                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13918                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13919        assert_eq_m512i(r, e);
13920    }
13921
13922    #[simd_test(enable = "avx512f,avx512vl")]
13923    unsafe fn test_mm256_mask_min_epu8() {
13924        #[rustfmt::skip]
13925        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13926                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13927        #[rustfmt::skip]
13928        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13929                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13930        let r = _mm256_mask_min_epu8(a, 0, a, b);
13931        assert_eq_m256i(r, a);
13932        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13933        #[rustfmt::skip]
13934        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13935                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13936        assert_eq_m256i(r, e);
13937    }
13938
13939    #[simd_test(enable = "avx512f,avx512vl")]
13940    unsafe fn test_mm256_maskz_min_epu8() {
13941        #[rustfmt::skip]
13942        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13943                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13944        #[rustfmt::skip]
13945        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13946                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13947        let r = _mm256_maskz_min_epu8(0, a, b);
13948        assert_eq_m256i(r, _mm256_setzero_si256());
13949        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
13950        #[rustfmt::skip]
13951        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13952                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13953        assert_eq_m256i(r, e);
13954    }
13955
13956    #[simd_test(enable = "avx512f,avx512vl")]
13957    unsafe fn test_mm_mask_min_epu8() {
13958        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13959        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13960        let r = _mm_mask_min_epu8(a, 0, a, b);
13961        assert_eq_m128i(r, a);
13962        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
13963        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13964        assert_eq_m128i(r, e);
13965    }
13966
13967    #[simd_test(enable = "avx512f,avx512vl")]
13968    unsafe fn test_mm_maskz_min_epu8() {
13969        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13970        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13971        let r = _mm_maskz_min_epu8(0, a, b);
13972        assert_eq_m128i(r, _mm_setzero_si128());
13973        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
13974        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13975        assert_eq_m128i(r, e);
13976    }
13977
13978    #[simd_test(enable = "avx512bw")]
13979    unsafe fn test_mm512_min_epi16() {
13980        #[rustfmt::skip]
13981        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13982                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13983        #[rustfmt::skip]
13984        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13985                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13986        let r = _mm512_min_epi16(a, b);
13987        #[rustfmt::skip]
13988        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13989                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13990        assert_eq_m512i(r, e);
13991    }
13992
13993    #[simd_test(enable = "avx512f")]
13994    unsafe fn test_mm512_mask_min_epi16() {
13995        #[rustfmt::skip]
13996        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13997                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13998        #[rustfmt::skip]
13999        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14000                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14001        let r = _mm512_mask_min_epi16(a, 0, a, b);
14002        assert_eq_m512i(r, a);
14003        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14004        #[rustfmt::skip]
14005        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14006                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14007        assert_eq_m512i(r, e);
14008    }
14009
14010    #[simd_test(enable = "avx512f")]
14011    unsafe fn test_mm512_maskz_min_epi16() {
14012        #[rustfmt::skip]
14013        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14014                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14015        #[rustfmt::skip]
14016        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14017                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14018        let r = _mm512_maskz_min_epi16(0, a, b);
14019        assert_eq_m512i(r, _mm512_setzero_si512());
14020        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14021        #[rustfmt::skip]
14022        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14023                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14024        assert_eq_m512i(r, e);
14025    }
14026
14027    #[simd_test(enable = "avx512f,avx512vl")]
14028    unsafe fn test_mm256_mask_min_epi16() {
14029        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14030        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14031        let r = _mm256_mask_min_epi16(a, 0, a, b);
14032        assert_eq_m256i(r, a);
14033        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14034        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14035        assert_eq_m256i(r, e);
14036    }
14037
14038    #[simd_test(enable = "avx512f,avx512vl")]
14039    unsafe fn test_mm256_maskz_min_epi16() {
14040        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14041        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14042        let r = _mm256_maskz_min_epi16(0, a, b);
14043        assert_eq_m256i(r, _mm256_setzero_si256());
14044        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14045        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14046        assert_eq_m256i(r, e);
14047    }
14048
14049    #[simd_test(enable = "avx512f,avx512vl")]
14050    unsafe fn test_mm_mask_min_epi16() {
14051        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14052        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14053        let r = _mm_mask_min_epi16(a, 0, a, b);
14054        assert_eq_m128i(r, a);
14055        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14056        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14057        assert_eq_m128i(r, e);
14058    }
14059
14060    #[simd_test(enable = "avx512f,avx512vl")]
14061    unsafe fn test_mm_maskz_min_epi16() {
14062        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14063        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14064        let r = _mm_maskz_min_epi16(0, a, b);
14065        assert_eq_m128i(r, _mm_setzero_si128());
14066        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14067        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14068        assert_eq_m128i(r, e);
14069    }
14070
14071    #[simd_test(enable = "avx512bw")]
14072    unsafe fn test_mm512_min_epi8() {
14073        #[rustfmt::skip]
14074        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14075                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14076                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14077                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14078        #[rustfmt::skip]
14079        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14080                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14081                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14082                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14083        let r = _mm512_min_epi8(a, b);
14084        #[rustfmt::skip]
14085        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14086                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14087                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14088                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14089        assert_eq_m512i(r, e);
14090    }
14091
14092    #[simd_test(enable = "avx512f")]
14093    unsafe fn test_mm512_mask_min_epi8() {
14094        #[rustfmt::skip]
14095        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14096                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14097                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14098                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14099        #[rustfmt::skip]
14100        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14101                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14102                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14103                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14104        let r = _mm512_mask_min_epi8(a, 0, a, b);
14105        assert_eq_m512i(r, a);
14106        let r = _mm512_mask_min_epi8(
14107            a,
14108            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14109            a,
14110            b,
14111        );
14112        #[rustfmt::skip]
14113        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14114                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14115                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14116                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14117        assert_eq_m512i(r, e);
14118    }
14119
14120    #[simd_test(enable = "avx512f")]
14121    unsafe fn test_mm512_maskz_min_epi8() {
14122        #[rustfmt::skip]
14123        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14124                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14125                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14126                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14127        #[rustfmt::skip]
14128        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14129                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14130                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14131                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14132        let r = _mm512_maskz_min_epi8(0, a, b);
14133        assert_eq_m512i(r, _mm512_setzero_si512());
14134        let r = _mm512_maskz_min_epi8(
14135            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14136            a,
14137            b,
14138        );
14139        #[rustfmt::skip]
14140        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14141                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14142                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14143                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14144        assert_eq_m512i(r, e);
14145    }
14146
14147    #[simd_test(enable = "avx512f,avx512vl")]
14148    unsafe fn test_mm256_mask_min_epi8() {
14149        #[rustfmt::skip]
14150        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14151                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14152        #[rustfmt::skip]
14153        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14154                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14155        let r = _mm256_mask_min_epi8(a, 0, a, b);
14156        assert_eq_m256i(r, a);
14157        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14158        #[rustfmt::skip]
14159        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14160                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14161        assert_eq_m256i(r, e);
14162    }
14163
14164    #[simd_test(enable = "avx512f,avx512vl")]
14165    unsafe fn test_mm256_maskz_min_epi8() {
14166        #[rustfmt::skip]
14167        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14168                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14169        #[rustfmt::skip]
14170        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14171                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14172        let r = _mm256_maskz_min_epi8(0, a, b);
14173        assert_eq_m256i(r, _mm256_setzero_si256());
14174        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
14175        #[rustfmt::skip]
14176        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14177                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14178        assert_eq_m256i(r, e);
14179    }
14180
14181    #[simd_test(enable = "avx512f,avx512vl")]
14182    unsafe fn test_mm_mask_min_epi8() {
14183        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14184        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14185        let r = _mm_mask_min_epi8(a, 0, a, b);
14186        assert_eq_m128i(r, a);
14187        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
14188        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14189        assert_eq_m128i(r, e);
14190    }
14191
14192    #[simd_test(enable = "avx512f,avx512vl")]
14193    unsafe fn test_mm_maskz_min_epi8() {
14194        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14195        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14196        let r = _mm_maskz_min_epi8(0, a, b);
14197        assert_eq_m128i(r, _mm_setzero_si128());
14198        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
14199        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14200        assert_eq_m128i(r, e);
14201    }
14202
14203    #[simd_test(enable = "avx512bw")]
14204    unsafe fn test_mm512_cmplt_epu16_mask() {
14205        let a = _mm512_set1_epi16(-2);
14206        let b = _mm512_set1_epi16(-1);
14207        let m = _mm512_cmplt_epu16_mask(a, b);
14208        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14209    }
14210
14211    #[simd_test(enable = "avx512bw")]
14212    unsafe fn test_mm512_mask_cmplt_epu16_mask() {
14213        let a = _mm512_set1_epi16(-2);
14214        let b = _mm512_set1_epi16(-1);
14215        let mask = 0b01010101_01010101_01010101_01010101;
14216        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
14217        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14218    }
14219
14220    #[simd_test(enable = "avx512bw,avx512vl")]
14221    unsafe fn test_mm256_cmplt_epu16_mask() {
14222        let a = _mm256_set1_epi16(-2);
14223        let b = _mm256_set1_epi16(-1);
14224        let m = _mm256_cmplt_epu16_mask(a, b);
14225        assert_eq!(m, 0b11111111_11111111);
14226    }
14227
14228    #[simd_test(enable = "avx512bw,avx512vl")]
14229    unsafe fn test_mm256_mask_cmplt_epu16_mask() {
14230        let a = _mm256_set1_epi16(-2);
14231        let b = _mm256_set1_epi16(-1);
14232        let mask = 0b01010101_01010101;
14233        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
14234        assert_eq!(r, 0b01010101_01010101);
14235    }
14236
14237    #[simd_test(enable = "avx512bw,avx512vl")]
14238    unsafe fn test_mm_cmplt_epu16_mask() {
14239        let a = _mm_set1_epi16(-2);
14240        let b = _mm_set1_epi16(-1);
14241        let m = _mm_cmplt_epu16_mask(a, b);
14242        assert_eq!(m, 0b11111111);
14243    }
14244
14245    #[simd_test(enable = "avx512bw,avx512vl")]
14246    unsafe fn test_mm_mask_cmplt_epu16_mask() {
14247        let a = _mm_set1_epi16(-2);
14248        let b = _mm_set1_epi16(-1);
14249        let mask = 0b01010101;
14250        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
14251        assert_eq!(r, 0b01010101);
14252    }
14253
14254    #[simd_test(enable = "avx512bw")]
14255    unsafe fn test_mm512_cmplt_epu8_mask() {
14256        let a = _mm512_set1_epi8(-2);
14257        let b = _mm512_set1_epi8(-1);
14258        let m = _mm512_cmplt_epu8_mask(a, b);
14259        assert_eq!(
14260            m,
14261            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14262        );
14263    }
14264
14265    #[simd_test(enable = "avx512bw")]
14266    unsafe fn test_mm512_mask_cmplt_epu8_mask() {
14267        let a = _mm512_set1_epi8(-2);
14268        let b = _mm512_set1_epi8(-1);
14269        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14270        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
14271        assert_eq!(
14272            r,
14273            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14274        );
14275    }
14276
14277    #[simd_test(enable = "avx512bw,avx512vl")]
14278    unsafe fn test_mm256_cmplt_epu8_mask() {
14279        let a = _mm256_set1_epi8(-2);
14280        let b = _mm256_set1_epi8(-1);
14281        let m = _mm256_cmplt_epu8_mask(a, b);
14282        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14283    }
14284
14285    #[simd_test(enable = "avx512bw,avx512vl")]
14286    unsafe fn test_mm256_mask_cmplt_epu8_mask() {
14287        let a = _mm256_set1_epi8(-2);
14288        let b = _mm256_set1_epi8(-1);
14289        let mask = 0b01010101_01010101_01010101_01010101;
14290        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
14291        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14292    }
14293
14294    #[simd_test(enable = "avx512bw,avx512vl")]
14295    unsafe fn test_mm_cmplt_epu8_mask() {
14296        let a = _mm_set1_epi8(-2);
14297        let b = _mm_set1_epi8(-1);
14298        let m = _mm_cmplt_epu8_mask(a, b);
14299        assert_eq!(m, 0b11111111_11111111);
14300    }
14301
14302    #[simd_test(enable = "avx512bw,avx512vl")]
14303    unsafe fn test_mm_mask_cmplt_epu8_mask() {
14304        let a = _mm_set1_epi8(-2);
14305        let b = _mm_set1_epi8(-1);
14306        let mask = 0b01010101_01010101;
14307        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
14308        assert_eq!(r, 0b01010101_01010101);
14309    }
14310
14311    #[simd_test(enable = "avx512bw")]
14312    unsafe fn test_mm512_cmplt_epi16_mask() {
14313        let a = _mm512_set1_epi16(-2);
14314        let b = _mm512_set1_epi16(-1);
14315        let m = _mm512_cmplt_epi16_mask(a, b);
14316        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14317    }
14318
14319    #[simd_test(enable = "avx512bw")]
14320    unsafe fn test_mm512_mask_cmplt_epi16_mask() {
14321        let a = _mm512_set1_epi16(-2);
14322        let b = _mm512_set1_epi16(-1);
14323        let mask = 0b01010101_01010101_01010101_01010101;
14324        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
14325        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14326    }
14327
14328    #[simd_test(enable = "avx512bw,avx512vl")]
14329    unsafe fn test_mm256_cmplt_epi16_mask() {
14330        let a = _mm256_set1_epi16(-2);
14331        let b = _mm256_set1_epi16(-1);
14332        let m = _mm256_cmplt_epi16_mask(a, b);
14333        assert_eq!(m, 0b11111111_11111111);
14334    }
14335
14336    #[simd_test(enable = "avx512bw,avx512vl")]
14337    unsafe fn test_mm256_mask_cmplt_epi16_mask() {
14338        let a = _mm256_set1_epi16(-2);
14339        let b = _mm256_set1_epi16(-1);
14340        let mask = 0b01010101_01010101;
14341        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
14342        assert_eq!(r, 0b01010101_01010101);
14343    }
14344
14345    #[simd_test(enable = "avx512bw,avx512vl")]
14346    unsafe fn test_mm_cmplt_epi16_mask() {
14347        let a = _mm_set1_epi16(-2);
14348        let b = _mm_set1_epi16(-1);
14349        let m = _mm_cmplt_epi16_mask(a, b);
14350        assert_eq!(m, 0b11111111);
14351    }
14352
14353    #[simd_test(enable = "avx512bw,avx512vl")]
14354    unsafe fn test_mm_mask_cmplt_epi16_mask() {
14355        let a = _mm_set1_epi16(-2);
14356        let b = _mm_set1_epi16(-1);
14357        let mask = 0b01010101;
14358        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
14359        assert_eq!(r, 0b01010101);
14360    }
14361
14362    #[simd_test(enable = "avx512bw")]
14363    unsafe fn test_mm512_cmplt_epi8_mask() {
14364        let a = _mm512_set1_epi8(-2);
14365        let b = _mm512_set1_epi8(-1);
14366        let m = _mm512_cmplt_epi8_mask(a, b);
14367        assert_eq!(
14368            m,
14369            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14370        );
14371    }
14372
14373    #[simd_test(enable = "avx512bw")]
14374    unsafe fn test_mm512_mask_cmplt_epi8_mask() {
14375        let a = _mm512_set1_epi8(-2);
14376        let b = _mm512_set1_epi8(-1);
14377        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14378        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
14379        assert_eq!(
14380            r,
14381            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14382        );
14383    }
14384
14385    #[simd_test(enable = "avx512bw,avx512vl")]
14386    unsafe fn test_mm256_cmplt_epi8_mask() {
14387        let a = _mm256_set1_epi8(-2);
14388        let b = _mm256_set1_epi8(-1);
14389        let m = _mm256_cmplt_epi8_mask(a, b);
14390        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14391    }
14392
14393    #[simd_test(enable = "avx512bw,avx512vl")]
14394    unsafe fn test_mm256_mask_cmplt_epi8_mask() {
14395        let a = _mm256_set1_epi8(-2);
14396        let b = _mm256_set1_epi8(-1);
14397        let mask = 0b01010101_01010101_01010101_01010101;
14398        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
14399        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14400    }
14401
14402    #[simd_test(enable = "avx512bw,avx512vl")]
14403    unsafe fn test_mm_cmplt_epi8_mask() {
14404        let a = _mm_set1_epi8(-2);
14405        let b = _mm_set1_epi8(-1);
14406        let m = _mm_cmplt_epi8_mask(a, b);
14407        assert_eq!(m, 0b11111111_11111111);
14408    }
14409
14410    #[simd_test(enable = "avx512bw,avx512vl")]
14411    unsafe fn test_mm_mask_cmplt_epi8_mask() {
14412        let a = _mm_set1_epi8(-2);
14413        let b = _mm_set1_epi8(-1);
14414        let mask = 0b01010101_01010101;
14415        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
14416        assert_eq!(r, 0b01010101_01010101);
14417    }
14418
14419    #[simd_test(enable = "avx512bw")]
14420    unsafe fn test_mm512_cmpgt_epu16_mask() {
14421        let a = _mm512_set1_epi16(2);
14422        let b = _mm512_set1_epi16(1);
14423        let m = _mm512_cmpgt_epu16_mask(a, b);
14424        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14425    }
14426
14427    #[simd_test(enable = "avx512bw")]
14428    unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
14429        let a = _mm512_set1_epi16(2);
14430        let b = _mm512_set1_epi16(1);
14431        let mask = 0b01010101_01010101_01010101_01010101;
14432        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
14433        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14434    }
14435
14436    #[simd_test(enable = "avx512bw,avx512vl")]
14437    unsafe fn test_mm256_cmpgt_epu16_mask() {
14438        let a = _mm256_set1_epi16(2);
14439        let b = _mm256_set1_epi16(1);
14440        let m = _mm256_cmpgt_epu16_mask(a, b);
14441        assert_eq!(m, 0b11111111_11111111);
14442    }
14443
14444    #[simd_test(enable = "avx512bw,avx512vl")]
14445    unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
14446        let a = _mm256_set1_epi16(2);
14447        let b = _mm256_set1_epi16(1);
14448        let mask = 0b01010101_01010101;
14449        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
14450        assert_eq!(r, 0b01010101_01010101);
14451    }
14452
14453    #[simd_test(enable = "avx512bw,avx512vl")]
14454    unsafe fn test_mm_cmpgt_epu16_mask() {
14455        let a = _mm_set1_epi16(2);
14456        let b = _mm_set1_epi16(1);
14457        let m = _mm_cmpgt_epu16_mask(a, b);
14458        assert_eq!(m, 0b11111111);
14459    }
14460
14461    #[simd_test(enable = "avx512bw,avx512vl")]
14462    unsafe fn test_mm_mask_cmpgt_epu16_mask() {
14463        let a = _mm_set1_epi16(2);
14464        let b = _mm_set1_epi16(1);
14465        let mask = 0b01010101;
14466        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
14467        assert_eq!(r, 0b01010101);
14468    }
14469
14470    #[simd_test(enable = "avx512bw")]
14471    unsafe fn test_mm512_cmpgt_epu8_mask() {
14472        let a = _mm512_set1_epi8(2);
14473        let b = _mm512_set1_epi8(1);
14474        let m = _mm512_cmpgt_epu8_mask(a, b);
14475        assert_eq!(
14476            m,
14477            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14478        );
14479    }
14480
14481    #[simd_test(enable = "avx512bw")]
14482    unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
14483        let a = _mm512_set1_epi8(2);
14484        let b = _mm512_set1_epi8(1);
14485        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14486        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
14487        assert_eq!(
14488            r,
14489            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14490        );
14491    }
14492
14493    #[simd_test(enable = "avx512bw,avx512vl")]
14494    unsafe fn test_mm256_cmpgt_epu8_mask() {
14495        let a = _mm256_set1_epi8(2);
14496        let b = _mm256_set1_epi8(1);
14497        let m = _mm256_cmpgt_epu8_mask(a, b);
14498        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14499    }
14500
14501    #[simd_test(enable = "avx512bw,avx512vl")]
14502    unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
14503        let a = _mm256_set1_epi8(2);
14504        let b = _mm256_set1_epi8(1);
14505        let mask = 0b01010101_01010101_01010101_01010101;
14506        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
14507        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14508    }
14509
14510    #[simd_test(enable = "avx512bw,avx512vl")]
14511    unsafe fn test_mm_cmpgt_epu8_mask() {
14512        let a = _mm_set1_epi8(2);
14513        let b = _mm_set1_epi8(1);
14514        let m = _mm_cmpgt_epu8_mask(a, b);
14515        assert_eq!(m, 0b11111111_11111111);
14516    }
14517
14518    #[simd_test(enable = "avx512bw,avx512vl")]
14519    unsafe fn test_mm_mask_cmpgt_epu8_mask() {
14520        let a = _mm_set1_epi8(2);
14521        let b = _mm_set1_epi8(1);
14522        let mask = 0b01010101_01010101;
14523        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
14524        assert_eq!(r, 0b01010101_01010101);
14525    }
14526
14527    #[simd_test(enable = "avx512bw")]
14528    unsafe fn test_mm512_cmpgt_epi16_mask() {
14529        let a = _mm512_set1_epi16(2);
14530        let b = _mm512_set1_epi16(-1);
14531        let m = _mm512_cmpgt_epi16_mask(a, b);
14532        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14533    }
14534
14535    #[simd_test(enable = "avx512bw")]
14536    unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
14537        let a = _mm512_set1_epi16(2);
14538        let b = _mm512_set1_epi16(-1);
14539        let mask = 0b01010101_01010101_01010101_01010101;
14540        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
14541        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14542    }
14543
14544    #[simd_test(enable = "avx512bw,avx512vl")]
14545    unsafe fn test_mm256_cmpgt_epi16_mask() {
14546        let a = _mm256_set1_epi16(2);
14547        let b = _mm256_set1_epi16(-1);
14548        let m = _mm256_cmpgt_epi16_mask(a, b);
14549        assert_eq!(m, 0b11111111_11111111);
14550    }
14551
14552    #[simd_test(enable = "avx512bw,avx512vl")]
14553    unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
14554        let a = _mm256_set1_epi16(2);
14555        let b = _mm256_set1_epi16(-1);
14556        let mask = 0b001010101_01010101;
14557        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
14558        assert_eq!(r, 0b01010101_01010101);
14559    }
14560
14561    #[simd_test(enable = "avx512bw,avx512vl")]
14562    unsafe fn test_mm_cmpgt_epi16_mask() {
14563        let a = _mm_set1_epi16(2);
14564        let b = _mm_set1_epi16(-1);
14565        let m = _mm_cmpgt_epi16_mask(a, b);
14566        assert_eq!(m, 0b11111111);
14567    }
14568
14569    #[simd_test(enable = "avx512bw,avx512vl")]
14570    unsafe fn test_mm_mask_cmpgt_epi16_mask() {
14571        let a = _mm_set1_epi16(2);
14572        let b = _mm_set1_epi16(-1);
14573        let mask = 0b01010101;
14574        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
14575        assert_eq!(r, 0b01010101);
14576    }
14577
14578    #[simd_test(enable = "avx512bw")]
14579    unsafe fn test_mm512_cmpgt_epi8_mask() {
14580        let a = _mm512_set1_epi8(2);
14581        let b = _mm512_set1_epi8(-1);
14582        let m = _mm512_cmpgt_epi8_mask(a, b);
14583        assert_eq!(
14584            m,
14585            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14586        );
14587    }
14588
14589    #[simd_test(enable = "avx512bw")]
14590    unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
14591        let a = _mm512_set1_epi8(2);
14592        let b = _mm512_set1_epi8(-1);
14593        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14594        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
14595        assert_eq!(
14596            r,
14597            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14598        );
14599    }
14600
14601    #[simd_test(enable = "avx512bw,avx512vl")]
14602    unsafe fn test_mm256_cmpgt_epi8_mask() {
14603        let a = _mm256_set1_epi8(2);
14604        let b = _mm256_set1_epi8(-1);
14605        let m = _mm256_cmpgt_epi8_mask(a, b);
14606        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14607    }
14608
14609    #[simd_test(enable = "avx512bw,avx512vl")]
14610    unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
14611        let a = _mm256_set1_epi8(2);
14612        let b = _mm256_set1_epi8(-1);
14613        let mask = 0b01010101_01010101_01010101_01010101;
14614        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
14615        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14616    }
14617
14618    #[simd_test(enable = "avx512bw,avx512vl")]
14619    unsafe fn test_mm_cmpgt_epi8_mask() {
14620        let a = _mm_set1_epi8(2);
14621        let b = _mm_set1_epi8(-1);
14622        let m = _mm_cmpgt_epi8_mask(a, b);
14623        assert_eq!(m, 0b11111111_11111111);
14624    }
14625
14626    #[simd_test(enable = "avx512bw,avx512vl")]
14627    unsafe fn test_mm_mask_cmpgt_epi8_mask() {
14628        let a = _mm_set1_epi8(2);
14629        let b = _mm_set1_epi8(-1);
14630        let mask = 0b01010101_01010101;
14631        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
14632        assert_eq!(r, 0b01010101_01010101);
14633    }
14634
14635    #[simd_test(enable = "avx512bw")]
14636    unsafe fn test_mm512_cmple_epu16_mask() {
14637        let a = _mm512_set1_epi16(-1);
14638        let b = _mm512_set1_epi16(-1);
14639        let m = _mm512_cmple_epu16_mask(a, b);
14640        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14641    }
14642
14643    #[simd_test(enable = "avx512bw")]
14644    unsafe fn test_mm512_mask_cmple_epu16_mask() {
14645        let a = _mm512_set1_epi16(-1);
14646        let b = _mm512_set1_epi16(-1);
14647        let mask = 0b01010101_01010101_01010101_01010101;
14648        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
14649        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14650    }
14651
14652    #[simd_test(enable = "avx512bw,avx512vl")]
14653    unsafe fn test_mm256_cmple_epu16_mask() {
14654        let a = _mm256_set1_epi16(-1);
14655        let b = _mm256_set1_epi16(-1);
14656        let m = _mm256_cmple_epu16_mask(a, b);
14657        assert_eq!(m, 0b11111111_11111111);
14658    }
14659
14660    #[simd_test(enable = "avx512bw,avx512vl")]
14661    unsafe fn test_mm256_mask_cmple_epu16_mask() {
14662        let a = _mm256_set1_epi16(-1);
14663        let b = _mm256_set1_epi16(-1);
14664        let mask = 0b01010101_01010101;
14665        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
14666        assert_eq!(r, 0b01010101_01010101);
14667    }
14668
14669    #[simd_test(enable = "avx512bw,avx512vl")]
14670    unsafe fn test_mm_cmple_epu16_mask() {
14671        let a = _mm_set1_epi16(-1);
14672        let b = _mm_set1_epi16(-1);
14673        let m = _mm_cmple_epu16_mask(a, b);
14674        assert_eq!(m, 0b11111111);
14675    }
14676
14677    #[simd_test(enable = "avx512bw,avx512vl")]
14678    unsafe fn test_mm_mask_cmple_epu16_mask() {
14679        let a = _mm_set1_epi16(-1);
14680        let b = _mm_set1_epi16(-1);
14681        let mask = 0b01010101;
14682        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
14683        assert_eq!(r, 0b01010101);
14684    }
14685
14686    #[simd_test(enable = "avx512bw")]
14687    unsafe fn test_mm512_cmple_epu8_mask() {
14688        let a = _mm512_set1_epi8(-1);
14689        let b = _mm512_set1_epi8(-1);
14690        let m = _mm512_cmple_epu8_mask(a, b);
14691        assert_eq!(
14692            m,
14693            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14694        );
14695    }
14696
14697    #[simd_test(enable = "avx512bw")]
14698    unsafe fn test_mm512_mask_cmple_epu8_mask() {
14699        let a = _mm512_set1_epi8(-1);
14700        let b = _mm512_set1_epi8(-1);
14701        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14702        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
14703        assert_eq!(
14704            r,
14705            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14706        );
14707    }
14708
14709    #[simd_test(enable = "avx512bw,avx512vl")]
14710    unsafe fn test_mm256_cmple_epu8_mask() {
14711        let a = _mm256_set1_epi8(-1);
14712        let b = _mm256_set1_epi8(-1);
14713        let m = _mm256_cmple_epu8_mask(a, b);
14714        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14715    }
14716
14717    #[simd_test(enable = "avx512bw,avx512vl")]
14718    unsafe fn test_mm256_mask_cmple_epu8_mask() {
14719        let a = _mm256_set1_epi8(-1);
14720        let b = _mm256_set1_epi8(-1);
14721        let mask = 0b01010101_01010101_01010101_01010101;
14722        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
14723        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14724    }
14725
14726    #[simd_test(enable = "avx512bw,avx512vl")]
14727    unsafe fn test_mm_cmple_epu8_mask() {
14728        let a = _mm_set1_epi8(-1);
14729        let b = _mm_set1_epi8(-1);
14730        let m = _mm_cmple_epu8_mask(a, b);
14731        assert_eq!(m, 0b11111111_11111111);
14732    }
14733
14734    #[simd_test(enable = "avx512bw,avx512vl")]
14735    unsafe fn test_mm_mask_cmple_epu8_mask() {
14736        let a = _mm_set1_epi8(-1);
14737        let b = _mm_set1_epi8(-1);
14738        let mask = 0b01010101_01010101;
14739        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
14740        assert_eq!(r, 0b01010101_01010101);
14741    }
14742
14743    #[simd_test(enable = "avx512bw")]
14744    unsafe fn test_mm512_cmple_epi16_mask() {
14745        let a = _mm512_set1_epi16(-1);
14746        let b = _mm512_set1_epi16(-1);
14747        let m = _mm512_cmple_epi16_mask(a, b);
14748        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14749    }
14750
14751    #[simd_test(enable = "avx512bw")]
14752    unsafe fn test_mm512_mask_cmple_epi16_mask() {
14753        let a = _mm512_set1_epi16(-1);
14754        let b = _mm512_set1_epi16(-1);
14755        let mask = 0b01010101_01010101_01010101_01010101;
14756        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
14757        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14758    }
14759
14760    #[simd_test(enable = "avx512bw,avx512vl")]
14761    unsafe fn test_mm256_cmple_epi16_mask() {
14762        let a = _mm256_set1_epi16(-1);
14763        let b = _mm256_set1_epi16(-1);
14764        let m = _mm256_cmple_epi16_mask(a, b);
14765        assert_eq!(m, 0b11111111_11111111);
14766    }
14767
14768    #[simd_test(enable = "avx512bw,avx512vl")]
14769    unsafe fn test_mm256_mask_cmple_epi16_mask() {
14770        let a = _mm256_set1_epi16(-1);
14771        let b = _mm256_set1_epi16(-1);
14772        let mask = 0b01010101_01010101;
14773        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
14774        assert_eq!(r, 0b01010101_01010101);
14775    }
14776
14777    #[simd_test(enable = "avx512bw,avx512vl")]
14778    unsafe fn test_mm_cmple_epi16_mask() {
14779        let a = _mm_set1_epi16(-1);
14780        let b = _mm_set1_epi16(-1);
14781        let m = _mm_cmple_epi16_mask(a, b);
14782        assert_eq!(m, 0b11111111);
14783    }
14784
14785    #[simd_test(enable = "avx512bw,avx512vl")]
14786    unsafe fn test_mm_mask_cmple_epi16_mask() {
14787        let a = _mm_set1_epi16(-1);
14788        let b = _mm_set1_epi16(-1);
14789        let mask = 0b01010101;
14790        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
14791        assert_eq!(r, 0b01010101);
14792    }
14793
14794    #[simd_test(enable = "avx512bw")]
14795    unsafe fn test_mm512_cmple_epi8_mask() {
14796        let a = _mm512_set1_epi8(-1);
14797        let b = _mm512_set1_epi8(-1);
14798        let m = _mm512_cmple_epi8_mask(a, b);
14799        assert_eq!(
14800            m,
14801            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14802        );
14803    }
14804
14805    #[simd_test(enable = "avx512bw")]
14806    unsafe fn test_mm512_mask_cmple_epi8_mask() {
14807        let a = _mm512_set1_epi8(-1);
14808        let b = _mm512_set1_epi8(-1);
14809        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14810        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
14811        assert_eq!(
14812            r,
14813            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14814        );
14815    }
14816
14817    #[simd_test(enable = "avx512bw,avx512vl")]
14818    unsafe fn test_mm256_cmple_epi8_mask() {
14819        let a = _mm256_set1_epi8(-1);
14820        let b = _mm256_set1_epi8(-1);
14821        let m = _mm256_cmple_epi8_mask(a, b);
14822        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14823    }
14824
14825    #[simd_test(enable = "avx512bw,avx512vl")]
14826    unsafe fn test_mm256_mask_cmple_epi8_mask() {
14827        let a = _mm256_set1_epi8(-1);
14828        let b = _mm256_set1_epi8(-1);
14829        let mask = 0b01010101_01010101_01010101_01010101;
14830        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
14831        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14832    }
14833
14834    #[simd_test(enable = "avx512bw,avx512vl")]
14835    unsafe fn test_mm_cmple_epi8_mask() {
14836        let a = _mm_set1_epi8(-1);
14837        let b = _mm_set1_epi8(-1);
14838        let m = _mm_cmple_epi8_mask(a, b);
14839        assert_eq!(m, 0b11111111_11111111);
14840    }
14841
14842    #[simd_test(enable = "avx512bw,avx512vl")]
14843    unsafe fn test_mm_mask_cmple_epi8_mask() {
14844        let a = _mm_set1_epi8(-1);
14845        let b = _mm_set1_epi8(-1);
14846        let mask = 0b01010101_01010101;
14847        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
14848        assert_eq!(r, 0b01010101_01010101);
14849    }
14850
14851    #[simd_test(enable = "avx512bw")]
14852    unsafe fn test_mm512_cmpge_epu16_mask() {
14853        let a = _mm512_set1_epi16(1);
14854        let b = _mm512_set1_epi16(1);
14855        let m = _mm512_cmpge_epu16_mask(a, b);
14856        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14857    }
14858
14859    #[simd_test(enable = "avx512bw")]
14860    unsafe fn test_mm512_mask_cmpge_epu16_mask() {
14861        let a = _mm512_set1_epi16(1);
14862        let b = _mm512_set1_epi16(1);
14863        let mask = 0b01010101_01010101_01010101_01010101;
14864        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
14865        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14866    }
14867
14868    #[simd_test(enable = "avx512bw,avx512vl")]
14869    unsafe fn test_mm256_cmpge_epu16_mask() {
14870        let a = _mm256_set1_epi16(1);
14871        let b = _mm256_set1_epi16(1);
14872        let m = _mm256_cmpge_epu16_mask(a, b);
14873        assert_eq!(m, 0b11111111_11111111);
14874    }
14875
14876    #[simd_test(enable = "avx512bw,avx512vl")]
14877    unsafe fn test_mm256_mask_cmpge_epu16_mask() {
14878        let a = _mm256_set1_epi16(1);
14879        let b = _mm256_set1_epi16(1);
14880        let mask = 0b01010101_01010101;
14881        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
14882        assert_eq!(r, 0b01010101_01010101);
14883    }
14884
14885    #[simd_test(enable = "avx512bw,avx512vl")]
14886    unsafe fn test_mm_cmpge_epu16_mask() {
14887        let a = _mm_set1_epi16(1);
14888        let b = _mm_set1_epi16(1);
14889        let m = _mm_cmpge_epu16_mask(a, b);
14890        assert_eq!(m, 0b11111111);
14891    }
14892
14893    #[simd_test(enable = "avx512bw,avx512vl")]
14894    unsafe fn test_mm_mask_cmpge_epu16_mask() {
14895        let a = _mm_set1_epi16(1);
14896        let b = _mm_set1_epi16(1);
14897        let mask = 0b01010101;
14898        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
14899        assert_eq!(r, 0b01010101);
14900    }
14901
14902    #[simd_test(enable = "avx512bw")]
14903    unsafe fn test_mm512_cmpge_epu8_mask() {
14904        let a = _mm512_set1_epi8(1);
14905        let b = _mm512_set1_epi8(1);
14906        let m = _mm512_cmpge_epu8_mask(a, b);
14907        assert_eq!(
14908            m,
14909            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14910        );
14911    }
14912
14913    #[simd_test(enable = "avx512bw")]
14914    unsafe fn test_mm512_mask_cmpge_epu8_mask() {
14915        let a = _mm512_set1_epi8(1);
14916        let b = _mm512_set1_epi8(1);
14917        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14918        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
14919        assert_eq!(
14920            r,
14921            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14922        );
14923    }
14924
14925    #[simd_test(enable = "avx512bw,avx512vl")]
14926    unsafe fn test_mm256_cmpge_epu8_mask() {
14927        let a = _mm256_set1_epi8(1);
14928        let b = _mm256_set1_epi8(1);
14929        let m = _mm256_cmpge_epu8_mask(a, b);
14930        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14931    }
14932
14933    #[simd_test(enable = "avx512bw,avx512vl")]
14934    unsafe fn test_mm256_mask_cmpge_epu8_mask() {
14935        let a = _mm256_set1_epi8(1);
14936        let b = _mm256_set1_epi8(1);
14937        let mask = 0b01010101_01010101_01010101_01010101;
14938        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
14939        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14940    }
14941
14942    #[simd_test(enable = "avx512bw,avx512vl")]
14943    unsafe fn test_mm_cmpge_epu8_mask() {
14944        let a = _mm_set1_epi8(1);
14945        let b = _mm_set1_epi8(1);
14946        let m = _mm_cmpge_epu8_mask(a, b);
14947        assert_eq!(m, 0b11111111_11111111);
14948    }
14949
14950    #[simd_test(enable = "avx512bw,avx512vl")]
14951    unsafe fn test_mm_mask_cmpge_epu8_mask() {
14952        let a = _mm_set1_epi8(1);
14953        let b = _mm_set1_epi8(1);
14954        let mask = 0b01010101_01010101;
14955        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
14956        assert_eq!(r, 0b01010101_01010101);
14957    }
14958
14959    #[simd_test(enable = "avx512bw")]
14960    unsafe fn test_mm512_cmpge_epi16_mask() {
14961        let a = _mm512_set1_epi16(-1);
14962        let b = _mm512_set1_epi16(-1);
14963        let m = _mm512_cmpge_epi16_mask(a, b);
14964        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14965    }
14966
14967    #[simd_test(enable = "avx512bw")]
14968    unsafe fn test_mm512_mask_cmpge_epi16_mask() {
14969        let a = _mm512_set1_epi16(-1);
14970        let b = _mm512_set1_epi16(-1);
14971        let mask = 0b01010101_01010101_01010101_01010101;
14972        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
14973        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14974    }
14975
14976    #[simd_test(enable = "avx512bw,avx512vl")]
14977    unsafe fn test_mm256_cmpge_epi16_mask() {
14978        let a = _mm256_set1_epi16(-1);
14979        let b = _mm256_set1_epi16(-1);
14980        let m = _mm256_cmpge_epi16_mask(a, b);
14981        assert_eq!(m, 0b11111111_11111111);
14982    }
14983
14984    #[simd_test(enable = "avx512bw,avx512vl")]
14985    unsafe fn test_mm256_mask_cmpge_epi16_mask() {
14986        let a = _mm256_set1_epi16(-1);
14987        let b = _mm256_set1_epi16(-1);
14988        let mask = 0b01010101_01010101;
14989        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
14990        assert_eq!(r, 0b01010101_01010101);
14991    }
14992
14993    #[simd_test(enable = "avx512bw,avx512vl")]
14994    unsafe fn test_mm_cmpge_epi16_mask() {
14995        let a = _mm_set1_epi16(-1);
14996        let b = _mm_set1_epi16(-1);
14997        let m = _mm_cmpge_epi16_mask(a, b);
14998        assert_eq!(m, 0b11111111);
14999    }
15000
15001    #[simd_test(enable = "avx512bw,avx512vl")]
15002    unsafe fn test_mm_mask_cmpge_epi16_mask() {
15003        let a = _mm_set1_epi16(-1);
15004        let b = _mm_set1_epi16(-1);
15005        let mask = 0b01010101;
15006        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15007        assert_eq!(r, 0b01010101);
15008    }
15009
15010    #[simd_test(enable = "avx512bw")]
15011    unsafe fn test_mm512_cmpge_epi8_mask() {
15012        let a = _mm512_set1_epi8(-1);
15013        let b = _mm512_set1_epi8(-1);
15014        let m = _mm512_cmpge_epi8_mask(a, b);
15015        assert_eq!(
15016            m,
15017            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15018        );
15019    }
15020
15021    #[simd_test(enable = "avx512bw")]
15022    unsafe fn test_mm512_mask_cmpge_epi8_mask() {
15023        let a = _mm512_set1_epi8(-1);
15024        let b = _mm512_set1_epi8(-1);
15025        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15026        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15027        assert_eq!(
15028            r,
15029            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15030        );
15031    }
15032
15033    #[simd_test(enable = "avx512bw,avx512vl")]
15034    unsafe fn test_mm256_cmpge_epi8_mask() {
15035        let a = _mm256_set1_epi8(-1);
15036        let b = _mm256_set1_epi8(-1);
15037        let m = _mm256_cmpge_epi8_mask(a, b);
15038        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15039    }
15040
15041    #[simd_test(enable = "avx512bw,avx512vl")]
15042    unsafe fn test_mm256_mask_cmpge_epi8_mask() {
15043        let a = _mm256_set1_epi8(-1);
15044        let b = _mm256_set1_epi8(-1);
15045        let mask = 0b01010101_01010101_01010101_01010101;
15046        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15047        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15048    }
15049
15050    #[simd_test(enable = "avx512bw,avx512vl")]
15051    unsafe fn test_mm_cmpge_epi8_mask() {
15052        let a = _mm_set1_epi8(-1);
15053        let b = _mm_set1_epi8(-1);
15054        let m = _mm_cmpge_epi8_mask(a, b);
15055        assert_eq!(m, 0b11111111_11111111);
15056    }
15057
15058    #[simd_test(enable = "avx512bw,avx512vl")]
15059    unsafe fn test_mm_mask_cmpge_epi8_mask() {
15060        let a = _mm_set1_epi8(-1);
15061        let b = _mm_set1_epi8(-1);
15062        let mask = 0b01010101_01010101;
15063        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15064        assert_eq!(r, 0b01010101_01010101);
15065    }
15066
15067    #[simd_test(enable = "avx512bw")]
15068    unsafe fn test_mm512_cmpeq_epu16_mask() {
15069        let a = _mm512_set1_epi16(1);
15070        let b = _mm512_set1_epi16(1);
15071        let m = _mm512_cmpeq_epu16_mask(a, b);
15072        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15073    }
15074
15075    #[simd_test(enable = "avx512bw")]
15076    unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
15077        let a = _mm512_set1_epi16(1);
15078        let b = _mm512_set1_epi16(1);
15079        let mask = 0b01010101_01010101_01010101_01010101;
15080        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15081        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15082    }
15083
15084    #[simd_test(enable = "avx512bw,avx512vl")]
15085    unsafe fn test_mm256_cmpeq_epu16_mask() {
15086        let a = _mm256_set1_epi16(1);
15087        let b = _mm256_set1_epi16(1);
15088        let m = _mm256_cmpeq_epu16_mask(a, b);
15089        assert_eq!(m, 0b11111111_11111111);
15090    }
15091
15092    #[simd_test(enable = "avx512bw,avx512vl")]
15093    unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
15094        let a = _mm256_set1_epi16(1);
15095        let b = _mm256_set1_epi16(1);
15096        let mask = 0b01010101_01010101;
15097        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15098        assert_eq!(r, 0b01010101_01010101);
15099    }
15100
15101    #[simd_test(enable = "avx512bw,avx512vl")]
15102    unsafe fn test_mm_cmpeq_epu16_mask() {
15103        let a = _mm_set1_epi16(1);
15104        let b = _mm_set1_epi16(1);
15105        let m = _mm_cmpeq_epu16_mask(a, b);
15106        assert_eq!(m, 0b11111111);
15107    }
15108
15109    #[simd_test(enable = "avx512bw,avx512vl")]
15110    unsafe fn test_mm_mask_cmpeq_epu16_mask() {
15111        let a = _mm_set1_epi16(1);
15112        let b = _mm_set1_epi16(1);
15113        let mask = 0b01010101;
15114        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
15115        assert_eq!(r, 0b01010101);
15116    }
15117
15118    #[simd_test(enable = "avx512bw")]
15119    unsafe fn test_mm512_cmpeq_epu8_mask() {
15120        let a = _mm512_set1_epi8(1);
15121        let b = _mm512_set1_epi8(1);
15122        let m = _mm512_cmpeq_epu8_mask(a, b);
15123        assert_eq!(
15124            m,
15125            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15126        );
15127    }
15128
15129    #[simd_test(enable = "avx512bw")]
15130    unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
15131        let a = _mm512_set1_epi8(1);
15132        let b = _mm512_set1_epi8(1);
15133        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15134        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
15135        assert_eq!(
15136            r,
15137            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15138        );
15139    }
15140
15141    #[simd_test(enable = "avx512bw,avx512vl")]
15142    unsafe fn test_mm256_cmpeq_epu8_mask() {
15143        let a = _mm256_set1_epi8(1);
15144        let b = _mm256_set1_epi8(1);
15145        let m = _mm256_cmpeq_epu8_mask(a, b);
15146        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15147    }
15148
15149    #[simd_test(enable = "avx512bw,avx512vl")]
15150    unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
15151        let a = _mm256_set1_epi8(1);
15152        let b = _mm256_set1_epi8(1);
15153        let mask = 0b01010101_01010101_01010101_01010101;
15154        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
15155        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15156    }
15157
15158    #[simd_test(enable = "avx512bw,avx512vl")]
15159    unsafe fn test_mm_cmpeq_epu8_mask() {
15160        let a = _mm_set1_epi8(1);
15161        let b = _mm_set1_epi8(1);
15162        let m = _mm_cmpeq_epu8_mask(a, b);
15163        assert_eq!(m, 0b11111111_11111111);
15164    }
15165
15166    #[simd_test(enable = "avx512bw,avx512vl")]
15167    unsafe fn test_mm_mask_cmpeq_epu8_mask() {
15168        let a = _mm_set1_epi8(1);
15169        let b = _mm_set1_epi8(1);
15170        let mask = 0b01010101_01010101;
15171        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
15172        assert_eq!(r, 0b01010101_01010101);
15173    }
15174
15175    #[simd_test(enable = "avx512bw")]
15176    unsafe fn test_mm512_cmpeq_epi16_mask() {
15177        let a = _mm512_set1_epi16(-1);
15178        let b = _mm512_set1_epi16(-1);
15179        let m = _mm512_cmpeq_epi16_mask(a, b);
15180        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15181    }
15182
15183    #[simd_test(enable = "avx512bw")]
15184    unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
15185        let a = _mm512_set1_epi16(-1);
15186        let b = _mm512_set1_epi16(-1);
15187        let mask = 0b01010101_01010101_01010101_01010101;
15188        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
15189        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15190    }
15191
15192    #[simd_test(enable = "avx512bw,avx512vl")]
15193    unsafe fn test_mm256_cmpeq_epi16_mask() {
15194        let a = _mm256_set1_epi16(-1);
15195        let b = _mm256_set1_epi16(-1);
15196        let m = _mm256_cmpeq_epi16_mask(a, b);
15197        assert_eq!(m, 0b11111111_11111111);
15198    }
15199
15200    #[simd_test(enable = "avx512bw,avx512vl")]
15201    unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
15202        let a = _mm256_set1_epi16(-1);
15203        let b = _mm256_set1_epi16(-1);
15204        let mask = 0b01010101_01010101;
15205        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
15206        assert_eq!(r, 0b01010101_01010101);
15207    }
15208
15209    #[simd_test(enable = "avx512bw,avx512vl")]
15210    unsafe fn test_mm_cmpeq_epi16_mask() {
15211        let a = _mm_set1_epi16(-1);
15212        let b = _mm_set1_epi16(-1);
15213        let m = _mm_cmpeq_epi16_mask(a, b);
15214        assert_eq!(m, 0b11111111);
15215    }
15216
15217    #[simd_test(enable = "avx512bw,avx512vl")]
15218    unsafe fn test_mm_mask_cmpeq_epi16_mask() {
15219        let a = _mm_set1_epi16(-1);
15220        let b = _mm_set1_epi16(-1);
15221        let mask = 0b01010101;
15222        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
15223        assert_eq!(r, 0b01010101);
15224    }
15225
15226    #[simd_test(enable = "avx512bw")]
15227    unsafe fn test_mm512_cmpeq_epi8_mask() {
15228        let a = _mm512_set1_epi8(-1);
15229        let b = _mm512_set1_epi8(-1);
15230        let m = _mm512_cmpeq_epi8_mask(a, b);
15231        assert_eq!(
15232            m,
15233            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15234        );
15235    }
15236
15237    #[simd_test(enable = "avx512bw")]
15238    unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
15239        let a = _mm512_set1_epi8(-1);
15240        let b = _mm512_set1_epi8(-1);
15241        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15242        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
15243        assert_eq!(
15244            r,
15245            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15246        );
15247    }
15248
15249    #[simd_test(enable = "avx512bw,avx512vl")]
15250    unsafe fn test_mm256_cmpeq_epi8_mask() {
15251        let a = _mm256_set1_epi8(-1);
15252        let b = _mm256_set1_epi8(-1);
15253        let m = _mm256_cmpeq_epi8_mask(a, b);
15254        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15255    }
15256
15257    #[simd_test(enable = "avx512bw,avx512vl")]
15258    unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
15259        let a = _mm256_set1_epi8(-1);
15260        let b = _mm256_set1_epi8(-1);
15261        let mask = 0b01010101_01010101_01010101_01010101;
15262        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
15263        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15264    }
15265
15266    #[simd_test(enable = "avx512bw,avx512vl")]
15267    unsafe fn test_mm_cmpeq_epi8_mask() {
15268        let a = _mm_set1_epi8(-1);
15269        let b = _mm_set1_epi8(-1);
15270        let m = _mm_cmpeq_epi8_mask(a, b);
15271        assert_eq!(m, 0b11111111_11111111);
15272    }
15273
15274    #[simd_test(enable = "avx512bw,avx512vl")]
15275    unsafe fn test_mm_mask_cmpeq_epi8_mask() {
15276        let a = _mm_set1_epi8(-1);
15277        let b = _mm_set1_epi8(-1);
15278        let mask = 0b01010101_01010101;
15279        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
15280        assert_eq!(r, 0b01010101_01010101);
15281    }
15282
15283    #[simd_test(enable = "avx512bw")]
15284    unsafe fn test_mm512_cmpneq_epu16_mask() {
15285        let a = _mm512_set1_epi16(2);
15286        let b = _mm512_set1_epi16(1);
15287        let m = _mm512_cmpneq_epu16_mask(a, b);
15288        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15289    }
15290
15291    #[simd_test(enable = "avx512bw")]
15292    unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
15293        let a = _mm512_set1_epi16(2);
15294        let b = _mm512_set1_epi16(1);
15295        let mask = 0b01010101_01010101_01010101_01010101;
15296        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
15297        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15298    }
15299
15300    #[simd_test(enable = "avx512bw,avx512vl")]
15301    unsafe fn test_mm256_cmpneq_epu16_mask() {
15302        let a = _mm256_set1_epi16(2);
15303        let b = _mm256_set1_epi16(1);
15304        let m = _mm256_cmpneq_epu16_mask(a, b);
15305        assert_eq!(m, 0b11111111_11111111);
15306    }
15307
15308    #[simd_test(enable = "avx512bw,avx512vl")]
15309    unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
15310        let a = _mm256_set1_epi16(2);
15311        let b = _mm256_set1_epi16(1);
15312        let mask = 0b01010101_01010101;
15313        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
15314        assert_eq!(r, 0b01010101_01010101);
15315    }
15316
15317    #[simd_test(enable = "avx512bw,avx512vl")]
15318    unsafe fn test_mm_cmpneq_epu16_mask() {
15319        let a = _mm_set1_epi16(2);
15320        let b = _mm_set1_epi16(1);
15321        let m = _mm_cmpneq_epu16_mask(a, b);
15322        assert_eq!(m, 0b11111111);
15323    }
15324
15325    #[simd_test(enable = "avx512bw,avx512vl")]
15326    unsafe fn test_mm_mask_cmpneq_epu16_mask() {
15327        let a = _mm_set1_epi16(2);
15328        let b = _mm_set1_epi16(1);
15329        let mask = 0b01010101;
15330        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
15331        assert_eq!(r, 0b01010101);
15332    }
15333
15334    #[simd_test(enable = "avx512bw")]
15335    unsafe fn test_mm512_cmpneq_epu8_mask() {
15336        let a = _mm512_set1_epi8(2);
15337        let b = _mm512_set1_epi8(1);
15338        let m = _mm512_cmpneq_epu8_mask(a, b);
15339        assert_eq!(
15340            m,
15341            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15342        );
15343    }
15344
15345    #[simd_test(enable = "avx512bw")]
15346    unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
15347        let a = _mm512_set1_epi8(2);
15348        let b = _mm512_set1_epi8(1);
15349        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15350        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
15351        assert_eq!(
15352            r,
15353            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15354        );
15355    }
15356
15357    #[simd_test(enable = "avx512bw,avx512vl")]
15358    unsafe fn test_mm256_cmpneq_epu8_mask() {
15359        let a = _mm256_set1_epi8(2);
15360        let b = _mm256_set1_epi8(1);
15361        let m = _mm256_cmpneq_epu8_mask(a, b);
15362        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15363    }
15364
15365    #[simd_test(enable = "avx512bw,avx512vl")]
15366    unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
15367        let a = _mm256_set1_epi8(2);
15368        let b = _mm256_set1_epi8(1);
15369        let mask = 0b01010101_01010101_01010101_01010101;
15370        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
15371        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15372    }
15373
15374    #[simd_test(enable = "avx512bw,avx512vl")]
15375    unsafe fn test_mm_cmpneq_epu8_mask() {
15376        let a = _mm_set1_epi8(2);
15377        let b = _mm_set1_epi8(1);
15378        let m = _mm_cmpneq_epu8_mask(a, b);
15379        assert_eq!(m, 0b11111111_11111111);
15380    }
15381
15382    #[simd_test(enable = "avx512bw,avx512vl")]
15383    unsafe fn test_mm_mask_cmpneq_epu8_mask() {
15384        let a = _mm_set1_epi8(2);
15385        let b = _mm_set1_epi8(1);
15386        let mask = 0b01010101_01010101;
15387        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
15388        assert_eq!(r, 0b01010101_01010101);
15389    }
15390
15391    #[simd_test(enable = "avx512bw")]
15392    unsafe fn test_mm512_cmpneq_epi16_mask() {
15393        let a = _mm512_set1_epi16(1);
15394        let b = _mm512_set1_epi16(-1);
15395        let m = _mm512_cmpneq_epi16_mask(a, b);
15396        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15397    }
15398
15399    #[simd_test(enable = "avx512bw")]
15400    unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
15401        let a = _mm512_set1_epi16(1);
15402        let b = _mm512_set1_epi16(-1);
15403        let mask = 0b01010101_01010101_01010101_01010101;
15404        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
15405        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15406    }
15407
15408    #[simd_test(enable = "avx512bw,avx512vl")]
15409    unsafe fn test_mm256_cmpneq_epi16_mask() {
15410        let a = _mm256_set1_epi16(1);
15411        let b = _mm256_set1_epi16(-1);
15412        let m = _mm256_cmpneq_epi16_mask(a, b);
15413        assert_eq!(m, 0b11111111_11111111);
15414    }
15415
15416    #[simd_test(enable = "avx512bw,avx512vl")]
15417    unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
15418        let a = _mm256_set1_epi16(1);
15419        let b = _mm256_set1_epi16(-1);
15420        let mask = 0b01010101_01010101;
15421        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
15422        assert_eq!(r, 0b01010101_01010101);
15423    }
15424
15425    #[simd_test(enable = "avx512bw,avx512vl")]
15426    unsafe fn test_mm_cmpneq_epi16_mask() {
15427        let a = _mm_set1_epi16(1);
15428        let b = _mm_set1_epi16(-1);
15429        let m = _mm_cmpneq_epi16_mask(a, b);
15430        assert_eq!(m, 0b11111111);
15431    }
15432
15433    #[simd_test(enable = "avx512bw,avx512vl")]
15434    unsafe fn test_mm_mask_cmpneq_epi16_mask() {
15435        let a = _mm_set1_epi16(1);
15436        let b = _mm_set1_epi16(-1);
15437        let mask = 0b01010101;
15438        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
15439        assert_eq!(r, 0b01010101);
15440    }
15441
15442    #[simd_test(enable = "avx512bw")]
15443    unsafe fn test_mm512_cmpneq_epi8_mask() {
15444        let a = _mm512_set1_epi8(1);
15445        let b = _mm512_set1_epi8(-1);
15446        let m = _mm512_cmpneq_epi8_mask(a, b);
15447        assert_eq!(
15448            m,
15449            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15450        );
15451    }
15452
15453    #[simd_test(enable = "avx512bw")]
15454    unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
15455        let a = _mm512_set1_epi8(1);
15456        let b = _mm512_set1_epi8(-1);
15457        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15458        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
15459        assert_eq!(
15460            r,
15461            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15462        );
15463    }
15464
15465    #[simd_test(enable = "avx512bw,avx512vl")]
15466    unsafe fn test_mm256_cmpneq_epi8_mask() {
15467        let a = _mm256_set1_epi8(1);
15468        let b = _mm256_set1_epi8(-1);
15469        let m = _mm256_cmpneq_epi8_mask(a, b);
15470        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15471    }
15472
15473    #[simd_test(enable = "avx512bw,avx512vl")]
15474    unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
15475        let a = _mm256_set1_epi8(1);
15476        let b = _mm256_set1_epi8(-1);
15477        let mask = 0b01010101_01010101_01010101_01010101;
15478        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
15479        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15480    }
15481
15482    #[simd_test(enable = "avx512bw,avx512vl")]
15483    unsafe fn test_mm_cmpneq_epi8_mask() {
15484        let a = _mm_set1_epi8(1);
15485        let b = _mm_set1_epi8(-1);
15486        let m = _mm_cmpneq_epi8_mask(a, b);
15487        assert_eq!(m, 0b11111111_11111111);
15488    }
15489
15490    #[simd_test(enable = "avx512bw,avx512vl")]
15491    unsafe fn test_mm_mask_cmpneq_epi8_mask() {
15492        let a = _mm_set1_epi8(1);
15493        let b = _mm_set1_epi8(-1);
15494        let mask = 0b01010101_01010101;
15495        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
15496        assert_eq!(r, 0b01010101_01010101);
15497    }
15498
15499    #[simd_test(enable = "avx512bw")]
15500    unsafe fn test_mm512_cmp_epu16_mask() {
15501        let a = _mm512_set1_epi16(0);
15502        let b = _mm512_set1_epi16(1);
15503        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15504        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15505    }
15506
15507    #[simd_test(enable = "avx512bw")]
15508    unsafe fn test_mm512_mask_cmp_epu16_mask() {
15509        let a = _mm512_set1_epi16(0);
15510        let b = _mm512_set1_epi16(1);
15511        let mask = 0b01010101_01010101_01010101_01010101;
15512        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15513        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15514    }
15515
15516    #[simd_test(enable = "avx512bw,avx512vl")]
15517    unsafe fn test_mm256_cmp_epu16_mask() {
15518        let a = _mm256_set1_epi16(0);
15519        let b = _mm256_set1_epi16(1);
15520        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15521        assert_eq!(m, 0b11111111_11111111);
15522    }
15523
15524    #[simd_test(enable = "avx512bw,avx512vl")]
15525    unsafe fn test_mm256_mask_cmp_epu16_mask() {
15526        let a = _mm256_set1_epi16(0);
15527        let b = _mm256_set1_epi16(1);
15528        let mask = 0b01010101_01010101;
15529        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15530        assert_eq!(r, 0b01010101_01010101);
15531    }
15532
15533    #[simd_test(enable = "avx512bw,avx512vl")]
15534    unsafe fn test_mm_cmp_epu16_mask() {
15535        let a = _mm_set1_epi16(0);
15536        let b = _mm_set1_epi16(1);
15537        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15538        assert_eq!(m, 0b11111111);
15539    }
15540
15541    #[simd_test(enable = "avx512bw,avx512vl")]
15542    unsafe fn test_mm_mask_cmp_epu16_mask() {
15543        let a = _mm_set1_epi16(0);
15544        let b = _mm_set1_epi16(1);
15545        let mask = 0b01010101;
15546        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15547        assert_eq!(r, 0b01010101);
15548    }
15549
15550    #[simd_test(enable = "avx512bw")]
15551    unsafe fn test_mm512_cmp_epu8_mask() {
15552        let a = _mm512_set1_epi8(0);
15553        let b = _mm512_set1_epi8(1);
15554        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15555        assert_eq!(
15556            m,
15557            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15558        );
15559    }
15560
15561    #[simd_test(enable = "avx512bw")]
15562    unsafe fn test_mm512_mask_cmp_epu8_mask() {
15563        let a = _mm512_set1_epi8(0);
15564        let b = _mm512_set1_epi8(1);
15565        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15566        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15567        assert_eq!(
15568            r,
15569            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15570        );
15571    }
15572
15573    #[simd_test(enable = "avx512bw,avx512vl")]
15574    unsafe fn test_mm256_cmp_epu8_mask() {
15575        let a = _mm256_set1_epi8(0);
15576        let b = _mm256_set1_epi8(1);
15577        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15578        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15579    }
15580
15581    #[simd_test(enable = "avx512bw,avx512vl")]
15582    unsafe fn test_mm256_mask_cmp_epu8_mask() {
15583        let a = _mm256_set1_epi8(0);
15584        let b = _mm256_set1_epi8(1);
15585        let mask = 0b01010101_01010101_01010101_01010101;
15586        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15587        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15588    }
15589
15590    #[simd_test(enable = "avx512bw,avx512vl")]
15591    unsafe fn test_mm_cmp_epu8_mask() {
15592        let a = _mm_set1_epi8(0);
15593        let b = _mm_set1_epi8(1);
15594        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15595        assert_eq!(m, 0b11111111_11111111);
15596    }
15597
15598    #[simd_test(enable = "avx512bw,avx512vl")]
15599    unsafe fn test_mm_mask_cmp_epu8_mask() {
15600        let a = _mm_set1_epi8(0);
15601        let b = _mm_set1_epi8(1);
15602        let mask = 0b01010101_01010101;
15603        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15604        assert_eq!(r, 0b01010101_01010101);
15605    }
15606
15607    #[simd_test(enable = "avx512bw")]
15608    unsafe fn test_mm512_cmp_epi16_mask() {
15609        let a = _mm512_set1_epi16(0);
15610        let b = _mm512_set1_epi16(1);
15611        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15612        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15613    }
15614
15615    #[simd_test(enable = "avx512bw")]
15616    unsafe fn test_mm512_mask_cmp_epi16_mask() {
15617        let a = _mm512_set1_epi16(0);
15618        let b = _mm512_set1_epi16(1);
15619        let mask = 0b01010101_01010101_01010101_01010101;
15620        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15621        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15622    }
15623
15624    #[simd_test(enable = "avx512bw,avx512vl")]
15625    unsafe fn test_mm256_cmp_epi16_mask() {
15626        let a = _mm256_set1_epi16(0);
15627        let b = _mm256_set1_epi16(1);
15628        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15629        assert_eq!(m, 0b11111111_11111111);
15630    }
15631
15632    #[simd_test(enable = "avx512bw,avx512vl")]
15633    unsafe fn test_mm256_mask_cmp_epi16_mask() {
15634        let a = _mm256_set1_epi16(0);
15635        let b = _mm256_set1_epi16(1);
15636        let mask = 0b01010101_01010101;
15637        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15638        assert_eq!(r, 0b01010101_01010101);
15639    }
15640
15641    #[simd_test(enable = "avx512bw,avx512vl")]
15642    unsafe fn test_mm_cmp_epi16_mask() {
15643        let a = _mm_set1_epi16(0);
15644        let b = _mm_set1_epi16(1);
15645        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15646        assert_eq!(m, 0b11111111);
15647    }
15648
15649    #[simd_test(enable = "avx512bw,avx512vl")]
15650    unsafe fn test_mm_mask_cmp_epi16_mask() {
15651        let a = _mm_set1_epi16(0);
15652        let b = _mm_set1_epi16(1);
15653        let mask = 0b01010101;
15654        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15655        assert_eq!(r, 0b01010101);
15656    }
15657
15658    #[simd_test(enable = "avx512bw")]
15659    unsafe fn test_mm512_cmp_epi8_mask() {
15660        let a = _mm512_set1_epi8(0);
15661        let b = _mm512_set1_epi8(1);
15662        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15663        assert_eq!(
15664            m,
15665            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15666        );
15667    }
15668
15669    #[simd_test(enable = "avx512bw")]
15670    unsafe fn test_mm512_mask_cmp_epi8_mask() {
15671        let a = _mm512_set1_epi8(0);
15672        let b = _mm512_set1_epi8(1);
15673        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15674        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15675        assert_eq!(
15676            r,
15677            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15678        );
15679    }
15680
15681    #[simd_test(enable = "avx512bw,avx512vl")]
15682    unsafe fn test_mm256_cmp_epi8_mask() {
15683        let a = _mm256_set1_epi8(0);
15684        let b = _mm256_set1_epi8(1);
15685        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15686        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15687    }
15688
15689    #[simd_test(enable = "avx512bw,avx512vl")]
15690    unsafe fn test_mm256_mask_cmp_epi8_mask() {
15691        let a = _mm256_set1_epi8(0);
15692        let b = _mm256_set1_epi8(1);
15693        let mask = 0b01010101_01010101_01010101_01010101;
15694        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15695        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15696    }
15697
15698    #[simd_test(enable = "avx512bw,avx512vl")]
15699    unsafe fn test_mm_cmp_epi8_mask() {
15700        let a = _mm_set1_epi8(0);
15701        let b = _mm_set1_epi8(1);
15702        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15703        assert_eq!(m, 0b11111111_11111111);
15704    }
15705
15706    #[simd_test(enable = "avx512bw,avx512vl")]
15707    unsafe fn test_mm_mask_cmp_epi8_mask() {
15708        let a = _mm_set1_epi8(0);
15709        let b = _mm_set1_epi8(1);
15710        let mask = 0b01010101_01010101;
15711        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15712        assert_eq!(r, 0b01010101_01010101);
15713    }
15714
15715    #[simd_test(enable = "avx512bw,avx512vl")]
15716    unsafe fn test_mm256_reduce_add_epi16() {
15717        let a = _mm256_set1_epi16(1);
15718        let e = _mm256_reduce_add_epi16(a);
15719        assert_eq!(16, e);
15720    }
15721
15722    #[simd_test(enable = "avx512bw,avx512vl")]
15723    unsafe fn test_mm256_mask_reduce_add_epi16() {
15724        let a = _mm256_set1_epi16(1);
15725        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
15726        assert_eq!(8, e);
15727    }
15728
15729    #[simd_test(enable = "avx512bw,avx512vl")]
15730    unsafe fn test_mm_reduce_add_epi16() {
15731        let a = _mm_set1_epi16(1);
15732        let e = _mm_reduce_add_epi16(a);
15733        assert_eq!(8, e);
15734    }
15735
15736    #[simd_test(enable = "avx512bw,avx512vl")]
15737    unsafe fn test_mm_mask_reduce_add_epi16() {
15738        let a = _mm_set1_epi16(1);
15739        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
15740        assert_eq!(4, e);
15741    }
15742
15743    #[simd_test(enable = "avx512bw,avx512vl")]
15744    unsafe fn test_mm256_reduce_add_epi8() {
15745        let a = _mm256_set1_epi8(1);
15746        let e = _mm256_reduce_add_epi8(a);
15747        assert_eq!(32, e);
15748    }
15749
15750    #[simd_test(enable = "avx512bw,avx512vl")]
15751    unsafe fn test_mm256_mask_reduce_add_epi8() {
15752        let a = _mm256_set1_epi8(1);
15753        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
15754        assert_eq!(16, e);
15755    }
15756
15757    #[simd_test(enable = "avx512bw,avx512vl")]
15758    unsafe fn test_mm_reduce_add_epi8() {
15759        let a = _mm_set1_epi8(1);
15760        let e = _mm_reduce_add_epi8(a);
15761        assert_eq!(16, e);
15762    }
15763
15764    #[simd_test(enable = "avx512bw,avx512vl")]
15765    unsafe fn test_mm_mask_reduce_add_epi8() {
15766        let a = _mm_set1_epi8(1);
15767        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
15768        assert_eq!(8, e);
15769    }
15770
15771    #[simd_test(enable = "avx512bw,avx512vl")]
15772    unsafe fn test_mm256_reduce_and_epi16() {
15773        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15774        let e = _mm256_reduce_and_epi16(a);
15775        assert_eq!(0, e);
15776    }
15777
15778    #[simd_test(enable = "avx512bw,avx512vl")]
15779    unsafe fn test_mm256_mask_reduce_and_epi16() {
15780        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15781        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
15782        assert_eq!(1, e);
15783    }
15784
15785    #[simd_test(enable = "avx512bw,avx512vl")]
15786    unsafe fn test_mm_reduce_and_epi16() {
15787        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15788        let e = _mm_reduce_and_epi16(a);
15789        assert_eq!(0, e);
15790    }
15791
15792    #[simd_test(enable = "avx512bw,avx512vl")]
15793    unsafe fn test_mm_mask_reduce_and_epi16() {
15794        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15795        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
15796        assert_eq!(1, e);
15797    }
15798
15799    #[simd_test(enable = "avx512bw,avx512vl")]
15800    unsafe fn test_mm256_reduce_and_epi8() {
15801        let a = _mm256_set_epi8(
15802            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15803            2, 2, 2,
15804        );
15805        let e = _mm256_reduce_and_epi8(a);
15806        assert_eq!(0, e);
15807    }
15808
15809    #[simd_test(enable = "avx512bw,avx512vl")]
15810    unsafe fn test_mm256_mask_reduce_and_epi8() {
15811        let a = _mm256_set_epi8(
15812            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15813            2, 2, 2,
15814        );
15815        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
15816        assert_eq!(1, e);
15817    }
15818
15819    #[simd_test(enable = "avx512bw,avx512vl")]
15820    unsafe fn test_mm_reduce_and_epi8() {
15821        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15822        let e = _mm_reduce_and_epi8(a);
15823        assert_eq!(0, e);
15824    }
15825
15826    #[simd_test(enable = "avx512bw,avx512vl")]
15827    unsafe fn test_mm_mask_reduce_and_epi8() {
15828        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15829        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
15830        assert_eq!(1, e);
15831    }
15832
15833    #[simd_test(enable = "avx512bw,avx512vl")]
15834    unsafe fn test_mm256_reduce_mul_epi16() {
15835        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15836        let e = _mm256_reduce_mul_epi16(a);
15837        assert_eq!(256, e);
15838    }
15839
15840    #[simd_test(enable = "avx512bw,avx512vl")]
15841    unsafe fn test_mm256_mask_reduce_mul_epi16() {
15842        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15843        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
15844        assert_eq!(1, e);
15845    }
15846
15847    #[simd_test(enable = "avx512bw,avx512vl")]
15848    unsafe fn test_mm_reduce_mul_epi16() {
15849        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
15850        let e = _mm_reduce_mul_epi16(a);
15851        assert_eq!(16, e);
15852    }
15853
15854    #[simd_test(enable = "avx512bw,avx512vl")]
15855    unsafe fn test_mm_mask_reduce_mul_epi16() {
15856        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15857        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
15858        assert_eq!(1, e);
15859    }
15860
15861    #[simd_test(enable = "avx512bw,avx512vl")]
15862    unsafe fn test_mm256_reduce_mul_epi8() {
15863        let a = _mm256_set_epi8(
15864            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15865            2, 2, 2,
15866        );
15867        let e = _mm256_reduce_mul_epi8(a);
15868        assert_eq!(64, e);
15869    }
15870
15871    #[simd_test(enable = "avx512bw,avx512vl")]
15872    unsafe fn test_mm256_mask_reduce_mul_epi8() {
15873        let a = _mm256_set_epi8(
15874            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15875            2, 2, 2,
15876        );
15877        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
15878        assert_eq!(1, e);
15879    }
15880
15881    #[simd_test(enable = "avx512bw,avx512vl")]
15882    unsafe fn test_mm_reduce_mul_epi8() {
15883        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15884        let e = _mm_reduce_mul_epi8(a);
15885        assert_eq!(8, e);
15886    }
15887
15888    #[simd_test(enable = "avx512bw,avx512vl")]
15889    unsafe fn test_mm_mask_reduce_mul_epi8() {
15890        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15891        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
15892        assert_eq!(1, e);
15893    }
15894
15895    #[simd_test(enable = "avx512bw,avx512vl")]
15896    unsafe fn test_mm256_reduce_max_epi16() {
15897        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15898        let e: i16 = _mm256_reduce_max_epi16(a);
15899        assert_eq!(15, e);
15900    }
15901
15902    #[simd_test(enable = "avx512bw,avx512vl")]
15903    unsafe fn test_mm256_mask_reduce_max_epi16() {
15904        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15905        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
15906        assert_eq!(7, e);
15907    }
15908
15909    #[simd_test(enable = "avx512bw,avx512vl")]
15910    unsafe fn test_mm_reduce_max_epi16() {
15911        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15912        let e: i16 = _mm_reduce_max_epi16(a);
15913        assert_eq!(7, e);
15914    }
15915
15916    #[simd_test(enable = "avx512bw,avx512vl")]
15917    unsafe fn test_mm_mask_reduce_max_epi16() {
15918        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15919        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
15920        assert_eq!(3, e);
15921    }
15922
15923    #[simd_test(enable = "avx512bw,avx512vl")]
15924    unsafe fn test_mm256_reduce_max_epi8() {
15925        let a = _mm256_set_epi8(
15926            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15927            24, 25, 26, 27, 28, 29, 30, 31,
15928        );
15929        let e: i8 = _mm256_reduce_max_epi8(a);
15930        assert_eq!(31, e);
15931    }
15932
15933    #[simd_test(enable = "avx512bw,avx512vl")]
15934    unsafe fn test_mm256_mask_reduce_max_epi8() {
15935        let a = _mm256_set_epi8(
15936            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15937            24, 25, 26, 27, 28, 29, 30, 31,
15938        );
15939        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
15940        assert_eq!(15, e);
15941    }
15942
15943    #[simd_test(enable = "avx512bw,avx512vl")]
15944    unsafe fn test_mm_reduce_max_epi8() {
15945        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15946        let e: i8 = _mm_reduce_max_epi8(a);
15947        assert_eq!(15, e);
15948    }
15949
15950    #[simd_test(enable = "avx512bw,avx512vl")]
15951    unsafe fn test_mm_mask_reduce_max_epi8() {
15952        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15953        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
15954        assert_eq!(7, e);
15955    }
15956
15957    #[simd_test(enable = "avx512bw,avx512vl")]
15958    unsafe fn test_mm256_reduce_max_epu16() {
15959        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15960        let e: u16 = _mm256_reduce_max_epu16(a);
15961        assert_eq!(15, e);
15962    }
15963
15964    #[simd_test(enable = "avx512bw,avx512vl")]
15965    unsafe fn test_mm256_mask_reduce_max_epu16() {
15966        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15967        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
15968        assert_eq!(7, e);
15969    }
15970
15971    #[simd_test(enable = "avx512bw,avx512vl")]
15972    unsafe fn test_mm_reduce_max_epu16() {
15973        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15974        let e: u16 = _mm_reduce_max_epu16(a);
15975        assert_eq!(7, e);
15976    }
15977
15978    #[simd_test(enable = "avx512bw,avx512vl")]
15979    unsafe fn test_mm_mask_reduce_max_epu16() {
15980        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15981        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
15982        assert_eq!(3, e);
15983    }
15984
15985    #[simd_test(enable = "avx512bw,avx512vl")]
15986    unsafe fn test_mm256_reduce_max_epu8() {
15987        let a = _mm256_set_epi8(
15988            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15989            24, 25, 26, 27, 28, 29, 30, 31,
15990        );
15991        let e: u8 = _mm256_reduce_max_epu8(a);
15992        assert_eq!(31, e);
15993    }
15994
15995    #[simd_test(enable = "avx512bw,avx512vl")]
15996    unsafe fn test_mm256_mask_reduce_max_epu8() {
15997        let a = _mm256_set_epi8(
15998            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15999            24, 25, 26, 27, 28, 29, 30, 31,
16000        );
16001        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16002        assert_eq!(15, e);
16003    }
16004
16005    #[simd_test(enable = "avx512bw,avx512vl")]
16006    unsafe fn test_mm_reduce_max_epu8() {
16007        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16008        let e: u8 = _mm_reduce_max_epu8(a);
16009        assert_eq!(15, e);
16010    }
16011
16012    #[simd_test(enable = "avx512bw,avx512vl")]
16013    unsafe fn test_mm_mask_reduce_max_epu8() {
16014        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16015        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16016        assert_eq!(7, e);
16017    }
16018
16019    #[simd_test(enable = "avx512bw,avx512vl")]
16020    unsafe fn test_mm256_reduce_min_epi16() {
16021        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16022        let e: i16 = _mm256_reduce_min_epi16(a);
16023        assert_eq!(0, e);
16024    }
16025
16026    #[simd_test(enable = "avx512bw,avx512vl")]
16027    unsafe fn test_mm256_mask_reduce_min_epi16() {
16028        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16029        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16030        assert_eq!(0, e);
16031    }
16032
16033    #[simd_test(enable = "avx512bw,avx512vl")]
16034    unsafe fn test_mm_reduce_min_epi16() {
16035        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16036        let e: i16 = _mm_reduce_min_epi16(a);
16037        assert_eq!(0, e);
16038    }
16039
16040    #[simd_test(enable = "avx512bw,avx512vl")]
16041    unsafe fn test_mm_mask_reduce_min_epi16() {
16042        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16043        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16044        assert_eq!(0, e);
16045    }
16046
16047    #[simd_test(enable = "avx512bw,avx512vl")]
16048    unsafe fn test_mm256_reduce_min_epi8() {
16049        let a = _mm256_set_epi8(
16050            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16051            24, 25, 26, 27, 28, 29, 30, 31,
16052        );
16053        let e: i8 = _mm256_reduce_min_epi8(a);
16054        assert_eq!(0, e);
16055    }
16056
16057    #[simd_test(enable = "avx512bw,avx512vl")]
16058    unsafe fn test_mm256_mask_reduce_min_epi8() {
16059        let a = _mm256_set_epi8(
16060            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16061            24, 25, 26, 27, 28, 29, 30, 31,
16062        );
16063        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16064        assert_eq!(0, e);
16065    }
16066
16067    #[simd_test(enable = "avx512bw,avx512vl")]
16068    unsafe fn test_mm_reduce_min_epi8() {
16069        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16070        let e: i8 = _mm_reduce_min_epi8(a);
16071        assert_eq!(0, e);
16072    }
16073
16074    #[simd_test(enable = "avx512bw,avx512vl")]
16075    unsafe fn test_mm_mask_reduce_min_epi8() {
16076        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16077        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16078        assert_eq!(0, e);
16079    }
16080
16081    #[simd_test(enable = "avx512bw,avx512vl")]
16082    unsafe fn test_mm256_reduce_min_epu16() {
16083        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16084        let e: u16 = _mm256_reduce_min_epu16(a);
16085        assert_eq!(0, e);
16086    }
16087
16088    #[simd_test(enable = "avx512bw,avx512vl")]
16089    unsafe fn test_mm256_mask_reduce_min_epu16() {
16090        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16091        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16092        assert_eq!(0, e);
16093    }
16094
16095    #[simd_test(enable = "avx512bw,avx512vl")]
16096    unsafe fn test_mm_reduce_min_epu16() {
16097        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16098        let e: u16 = _mm_reduce_min_epu16(a);
16099        assert_eq!(0, e);
16100    }
16101
16102    #[simd_test(enable = "avx512bw,avx512vl")]
16103    unsafe fn test_mm_mask_reduce_min_epu16() {
16104        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16105        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
16106        assert_eq!(0, e);
16107    }
16108
16109    #[simd_test(enable = "avx512bw,avx512vl")]
16110    unsafe fn test_mm256_reduce_min_epu8() {
16111        let a = _mm256_set_epi8(
16112            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16113            24, 25, 26, 27, 28, 29, 30, 31,
16114        );
16115        let e: u8 = _mm256_reduce_min_epu8(a);
16116        assert_eq!(0, e);
16117    }
16118
16119    #[simd_test(enable = "avx512bw,avx512vl")]
16120    unsafe fn test_mm256_mask_reduce_min_epu8() {
16121        let a = _mm256_set_epi8(
16122            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16123            24, 25, 26, 27, 28, 29, 30, 31,
16124        );
16125        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
16126        assert_eq!(0, e);
16127    }
16128
16129    #[simd_test(enable = "avx512bw,avx512vl")]
16130    unsafe fn test_mm_reduce_min_epu8() {
16131        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16132        let e: u8 = _mm_reduce_min_epu8(a);
16133        assert_eq!(0, e);
16134    }
16135
16136    #[simd_test(enable = "avx512bw,avx512vl")]
16137    unsafe fn test_mm_mask_reduce_min_epu8() {
16138        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16139        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
16140        assert_eq!(0, e);
16141    }
16142
16143    #[simd_test(enable = "avx512bw,avx512vl")]
16144    unsafe fn test_mm256_reduce_or_epi16() {
16145        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16146        let e = _mm256_reduce_or_epi16(a);
16147        assert_eq!(3, e);
16148    }
16149
16150    #[simd_test(enable = "avx512bw,avx512vl")]
16151    unsafe fn test_mm256_mask_reduce_or_epi16() {
16152        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16153        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
16154        assert_eq!(1, e);
16155    }
16156
16157    #[simd_test(enable = "avx512bw,avx512vl")]
16158    unsafe fn test_mm_reduce_or_epi16() {
16159        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16160        let e = _mm_reduce_or_epi16(a);
16161        assert_eq!(3, e);
16162    }
16163
16164    #[simd_test(enable = "avx512bw,avx512vl")]
16165    unsafe fn test_mm_mask_reduce_or_epi16() {
16166        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16167        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
16168        assert_eq!(1, e);
16169    }
16170
16171    #[simd_test(enable = "avx512bw,avx512vl")]
16172    unsafe fn test_mm256_reduce_or_epi8() {
16173        let a = _mm256_set_epi8(
16174            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16175            2, 2, 2,
16176        );
16177        let e = _mm256_reduce_or_epi8(a);
16178        assert_eq!(3, e);
16179    }
16180
16181    #[simd_test(enable = "avx512bw,avx512vl")]
16182    unsafe fn test_mm256_mask_reduce_or_epi8() {
16183        let a = _mm256_set_epi8(
16184            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16185            2, 2, 2,
16186        );
16187        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
16188        assert_eq!(1, e);
16189    }
16190
16191    #[simd_test(enable = "avx512bw,avx512vl")]
16192    unsafe fn test_mm_reduce_or_epi8() {
16193        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16194        let e = _mm_reduce_or_epi8(a);
16195        assert_eq!(3, e);
16196    }
16197
16198    #[simd_test(enable = "avx512bw,avx512vl")]
16199    unsafe fn test_mm_mask_reduce_or_epi8() {
16200        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16201        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
16202        assert_eq!(1, e);
16203    }
16204
16205    #[simd_test(enable = "avx512bw")]
16206    unsafe fn test_mm512_loadu_epi16() {
16207        #[rustfmt::skip]
16208        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16209        let r = _mm512_loadu_epi16(&a[0]);
16210        #[rustfmt::skip]
16211        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16212        assert_eq_m512i(r, e);
16213    }
16214
16215    #[simd_test(enable = "avx512bw,avx512vl")]
16216    unsafe fn test_mm256_loadu_epi16() {
16217        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16218        let r = _mm256_loadu_epi16(&a[0]);
16219        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16220        assert_eq_m256i(r, e);
16221    }
16222
16223    #[simd_test(enable = "avx512bw,avx512vl")]
16224    unsafe fn test_mm_loadu_epi16() {
16225        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
16226        let r = _mm_loadu_epi16(&a[0]);
16227        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
16228        assert_eq_m128i(r, e);
16229    }
16230
16231    #[simd_test(enable = "avx512bw")]
16232    unsafe fn test_mm512_loadu_epi8() {
16233        #[rustfmt::skip]
16234        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16235                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16236        let r = _mm512_loadu_epi8(&a[0]);
16237        #[rustfmt::skip]
16238        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
16239                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16240        assert_eq_m512i(r, e);
16241    }
16242
16243    #[simd_test(enable = "avx512bw,avx512vl")]
16244    unsafe fn test_mm256_loadu_epi8() {
16245        #[rustfmt::skip]
16246        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16247        let r = _mm256_loadu_epi8(&a[0]);
16248        #[rustfmt::skip]
16249        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16250        assert_eq_m256i(r, e);
16251    }
16252
16253    #[simd_test(enable = "avx512bw,avx512vl")]
16254    unsafe fn test_mm_loadu_epi8() {
16255        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16256        let r = _mm_loadu_epi8(&a[0]);
16257        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16258        assert_eq_m128i(r, e);
16259    }
16260
16261    #[simd_test(enable = "avx512bw")]
16262    unsafe fn test_mm512_storeu_epi16() {
16263        let a = _mm512_set1_epi16(9);
16264        let mut r = _mm512_undefined_epi32();
16265        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16266        assert_eq_m512i(r, a);
16267    }
16268
16269    #[simd_test(enable = "avx512bw,avx512vl")]
16270    unsafe fn test_mm256_storeu_epi16() {
16271        let a = _mm256_set1_epi16(9);
16272        let mut r = _mm256_set1_epi32(0);
16273        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16274        assert_eq_m256i(r, a);
16275    }
16276
16277    #[simd_test(enable = "avx512bw,avx512vl")]
16278    unsafe fn test_mm_storeu_epi16() {
16279        let a = _mm_set1_epi16(9);
16280        let mut r = _mm_set1_epi32(0);
16281        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16282        assert_eq_m128i(r, a);
16283    }
16284
16285    #[simd_test(enable = "avx512bw")]
16286    unsafe fn test_mm512_storeu_epi8() {
16287        let a = _mm512_set1_epi8(9);
16288        let mut r = _mm512_undefined_epi32();
16289        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16290        assert_eq_m512i(r, a);
16291    }
16292
16293    #[simd_test(enable = "avx512bw,avx512vl")]
16294    unsafe fn test_mm256_storeu_epi8() {
16295        let a = _mm256_set1_epi8(9);
16296        let mut r = _mm256_set1_epi32(0);
16297        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16298        assert_eq_m256i(r, a);
16299    }
16300
16301    #[simd_test(enable = "avx512bw,avx512vl")]
16302    unsafe fn test_mm_storeu_epi8() {
16303        let a = _mm_set1_epi8(9);
16304        let mut r = _mm_set1_epi32(0);
16305        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16306        assert_eq_m128i(r, a);
16307    }
16308
16309    #[simd_test(enable = "avx512f,avx512bw")]
16310    unsafe fn test_mm512_mask_loadu_epi16() {
16311        let src = _mm512_set1_epi16(42);
16312        let a = &[
16313            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16314            24, 25, 26, 27, 28, 29, 30, 31, 32,
16315        ];
16316        let p = a.as_ptr();
16317        let m = 0b10101010_11001100_11101000_11001010;
16318        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
16319        let e = &[
16320            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16321            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16322        ];
16323        let e = _mm512_loadu_epi16(e.as_ptr());
16324        assert_eq_m512i(r, e);
16325    }
16326
16327    #[simd_test(enable = "avx512f,avx512bw")]
16328    unsafe fn test_mm512_maskz_loadu_epi16() {
16329        let a = &[
16330            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16331            24, 25, 26, 27, 28, 29, 30, 31, 32,
16332        ];
16333        let p = a.as_ptr();
16334        let m = 0b10101010_11001100_11101000_11001010;
16335        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
16336        let e = &[
16337            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16338            26, 0, 28, 0, 30, 0, 32,
16339        ];
16340        let e = _mm512_loadu_epi16(e.as_ptr());
16341        assert_eq_m512i(r, e);
16342    }
16343
16344    #[simd_test(enable = "avx512f,avx512bw")]
16345    unsafe fn test_mm512_mask_storeu_epi16() {
16346        let mut r = [42_i16; 32];
16347        let a = &[
16348            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16349            24, 25, 26, 27, 28, 29, 30, 31, 32,
16350        ];
16351        let a = _mm512_loadu_epi16(a.as_ptr());
16352        let m = 0b10101010_11001100_11101000_11001010;
16353        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16354        let e = &[
16355            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16356            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16357        ];
16358        let e = _mm512_loadu_epi16(e.as_ptr());
16359        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
16360    }
16361
16362    #[simd_test(enable = "avx512f,avx512bw")]
16363    unsafe fn test_mm512_mask_loadu_epi8() {
16364        let src = _mm512_set1_epi8(42);
16365        let a = &[
16366            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16367            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16368            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16369        ];
16370        let p = a.as_ptr();
16371        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16372        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
16373        let e = &[
16374            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16375            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16376            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16377        ];
16378        let e = _mm512_loadu_epi8(e.as_ptr());
16379        assert_eq_m512i(r, e);
16380    }
16381
16382    #[simd_test(enable = "avx512f,avx512bw")]
16383    unsafe fn test_mm512_maskz_loadu_epi8() {
16384        let a = &[
16385            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16386            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16387            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16388        ];
16389        let p = a.as_ptr();
16390        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16391        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
16392        let e = &[
16393            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16394            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
16395            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
16396        ];
16397        let e = _mm512_loadu_epi8(e.as_ptr());
16398        assert_eq_m512i(r, e);
16399    }
16400
16401    #[simd_test(enable = "avx512f,avx512bw")]
16402    unsafe fn test_mm512_mask_storeu_epi8() {
16403        let mut r = [42_i8; 64];
16404        let a = &[
16405            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16406            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16407            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16408        ];
16409        let a = _mm512_loadu_epi8(a.as_ptr());
16410        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16411        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16412        let e = &[
16413            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16414            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16415            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16416        ];
16417        let e = _mm512_loadu_epi8(e.as_ptr());
16418        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
16419    }
16420
16421    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16422    unsafe fn test_mm256_mask_loadu_epi16() {
16423        let src = _mm256_set1_epi16(42);
16424        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16425        let p = a.as_ptr();
16426        let m = 0b11101000_11001010;
16427        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
16428        let e = &[
16429            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16430        ];
16431        let e = _mm256_loadu_epi16(e.as_ptr());
16432        assert_eq_m256i(r, e);
16433    }
16434
16435    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16436    unsafe fn test_mm256_maskz_loadu_epi16() {
16437        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16438        let p = a.as_ptr();
16439        let m = 0b11101000_11001010;
16440        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
16441        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16442        let e = _mm256_loadu_epi16(e.as_ptr());
16443        assert_eq_m256i(r, e);
16444    }
16445
16446    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16447    unsafe fn test_mm256_mask_storeu_epi16() {
16448        let mut r = [42_i16; 16];
16449        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16450        let a = _mm256_loadu_epi16(a.as_ptr());
16451        let m = 0b11101000_11001010;
16452        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16453        let e = &[
16454            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16455        ];
16456        let e = _mm256_loadu_epi16(e.as_ptr());
16457        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
16458    }
16459
16460    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16461    unsafe fn test_mm256_mask_loadu_epi8() {
16462        let src = _mm256_set1_epi8(42);
16463        let a = &[
16464            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16465            24, 25, 26, 27, 28, 29, 30, 31, 32,
16466        ];
16467        let p = a.as_ptr();
16468        let m = 0b10101010_11001100_11101000_11001010;
16469        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
16470        let e = &[
16471            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16472            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16473        ];
16474        let e = _mm256_loadu_epi8(e.as_ptr());
16475        assert_eq_m256i(r, e);
16476    }
16477
16478    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16479    unsafe fn test_mm256_maskz_loadu_epi8() {
16480        let a = &[
16481            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16482            24, 25, 26, 27, 28, 29, 30, 31, 32,
16483        ];
16484        let p = a.as_ptr();
16485        let m = 0b10101010_11001100_11101000_11001010;
16486        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
16487        let e = &[
16488            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16489            26, 0, 28, 0, 30, 0, 32,
16490        ];
16491        let e = _mm256_loadu_epi8(e.as_ptr());
16492        assert_eq_m256i(r, e);
16493    }
16494
16495    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16496    unsafe fn test_mm256_mask_storeu_epi8() {
16497        let mut r = [42_i8; 32];
16498        let a = &[
16499            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16500            24, 25, 26, 27, 28, 29, 30, 31, 32,
16501        ];
16502        let a = _mm256_loadu_epi8(a.as_ptr());
16503        let m = 0b10101010_11001100_11101000_11001010;
16504        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16505        let e = &[
16506            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16507            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16508        ];
16509        let e = _mm256_loadu_epi8(e.as_ptr());
16510        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
16511    }
16512
16513    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16514    unsafe fn test_mm_mask_loadu_epi16() {
16515        let src = _mm_set1_epi16(42);
16516        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16517        let p = a.as_ptr();
16518        let m = 0b11001010;
16519        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
16520        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16521        let e = _mm_loadu_epi16(e.as_ptr());
16522        assert_eq_m128i(r, e);
16523    }
16524
16525    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16526    unsafe fn test_mm_maskz_loadu_epi16() {
16527        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16528        let p = a.as_ptr();
16529        let m = 0b11001010;
16530        let r = _mm_maskz_loadu_epi16(m, black_box(p));
16531        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
16532        let e = _mm_loadu_epi16(e.as_ptr());
16533        assert_eq_m128i(r, e);
16534    }
16535
16536    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16537    unsafe fn test_mm_mask_storeu_epi16() {
16538        let mut r = [42_i16; 8];
16539        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16540        let a = _mm_loadu_epi16(a.as_ptr());
16541        let m = 0b11001010;
16542        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16543        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16544        let e = _mm_loadu_epi16(e.as_ptr());
16545        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
16546    }
16547
16548    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16549    unsafe fn test_mm_mask_loadu_epi8() {
16550        let src = _mm_set1_epi8(42);
16551        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16552        let p = a.as_ptr();
16553        let m = 0b11101000_11001010;
16554        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
16555        let e = &[
16556            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16557        ];
16558        let e = _mm_loadu_epi8(e.as_ptr());
16559        assert_eq_m128i(r, e);
16560    }
16561
16562    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16563    unsafe fn test_mm_maskz_loadu_epi8() {
16564        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16565        let p = a.as_ptr();
16566        let m = 0b11101000_11001010;
16567        let r = _mm_maskz_loadu_epi8(m, black_box(p));
16568        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16569        let e = _mm_loadu_epi8(e.as_ptr());
16570        assert_eq_m128i(r, e);
16571    }
16572
16573    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16574    unsafe fn test_mm_mask_storeu_epi8() {
16575        let mut r = [42_i8; 16];
16576        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16577        let a = _mm_loadu_epi8(a.as_ptr());
16578        let m = 0b11101000_11001010;
16579        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16580        let e = &[
16581            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16582        ];
16583        let e = _mm_loadu_epi8(e.as_ptr());
16584        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
16585    }
16586
16587    #[simd_test(enable = "avx512bw")]
16588    unsafe fn test_mm512_madd_epi16() {
16589        let a = _mm512_set1_epi16(1);
16590        let b = _mm512_set1_epi16(1);
16591        let r = _mm512_madd_epi16(a, b);
16592        let e = _mm512_set1_epi32(2);
16593        assert_eq_m512i(r, e);
16594    }
16595
16596    #[simd_test(enable = "avx512bw")]
16597    unsafe fn test_mm512_mask_madd_epi16() {
16598        let a = _mm512_set1_epi16(1);
16599        let b = _mm512_set1_epi16(1);
16600        let r = _mm512_mask_madd_epi16(a, 0, a, b);
16601        assert_eq_m512i(r, a);
16602        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
16603        let e = _mm512_set_epi32(
16604            1 << 16 | 1,
16605            1 << 16 | 1,
16606            1 << 16 | 1,
16607            1 << 16 | 1,
16608            1 << 16 | 1,
16609            1 << 16 | 1,
16610            1 << 16 | 1,
16611            1 << 16 | 1,
16612            1 << 16 | 1,
16613            1 << 16 | 1,
16614            1 << 16 | 1,
16615            1 << 16 | 1,
16616            2,
16617            2,
16618            2,
16619            2,
16620        );
16621        assert_eq_m512i(r, e);
16622    }
16623
16624    #[simd_test(enable = "avx512bw")]
16625    unsafe fn test_mm512_maskz_madd_epi16() {
16626        let a = _mm512_set1_epi16(1);
16627        let b = _mm512_set1_epi16(1);
16628        let r = _mm512_maskz_madd_epi16(0, a, b);
16629        assert_eq_m512i(r, _mm512_setzero_si512());
16630        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
16631        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
16632        assert_eq_m512i(r, e);
16633    }
16634
16635    #[simd_test(enable = "avx512bw,avx512vl")]
16636    unsafe fn test_mm256_mask_madd_epi16() {
16637        let a = _mm256_set1_epi16(1);
16638        let b = _mm256_set1_epi16(1);
16639        let r = _mm256_mask_madd_epi16(a, 0, a, b);
16640        assert_eq_m256i(r, a);
16641        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
16642        let e = _mm256_set_epi32(
16643            1 << 16 | 1,
16644            1 << 16 | 1,
16645            1 << 16 | 1,
16646            1 << 16 | 1,
16647            2,
16648            2,
16649            2,
16650            2,
16651        );
16652        assert_eq_m256i(r, e);
16653    }
16654
16655    #[simd_test(enable = "avx512bw,avx512vl")]
16656    unsafe fn test_mm256_maskz_madd_epi16() {
16657        let a = _mm256_set1_epi16(1);
16658        let b = _mm256_set1_epi16(1);
16659        let r = _mm256_maskz_madd_epi16(0, a, b);
16660        assert_eq_m256i(r, _mm256_setzero_si256());
16661        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
16662        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
16663        assert_eq_m256i(r, e);
16664    }
16665
16666    #[simd_test(enable = "avx512bw,avx512vl")]
16667    unsafe fn test_mm_mask_madd_epi16() {
16668        let a = _mm_set1_epi16(1);
16669        let b = _mm_set1_epi16(1);
16670        let r = _mm_mask_madd_epi16(a, 0, a, b);
16671        assert_eq_m128i(r, a);
16672        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
16673        let e = _mm_set_epi32(2, 2, 2, 2);
16674        assert_eq_m128i(r, e);
16675    }
16676
16677    #[simd_test(enable = "avx512bw,avx512vl")]
16678    unsafe fn test_mm_maskz_madd_epi16() {
16679        let a = _mm_set1_epi16(1);
16680        let b = _mm_set1_epi16(1);
16681        let r = _mm_maskz_madd_epi16(0, a, b);
16682        assert_eq_m128i(r, _mm_setzero_si128());
16683        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
16684        let e = _mm_set_epi32(2, 2, 2, 2);
16685        assert_eq_m128i(r, e);
16686    }
16687
16688    #[simd_test(enable = "avx512bw")]
16689    unsafe fn test_mm512_maddubs_epi16() {
16690        let a = _mm512_set1_epi8(1);
16691        let b = _mm512_set1_epi8(1);
16692        let r = _mm512_maddubs_epi16(a, b);
16693        let e = _mm512_set1_epi16(2);
16694        assert_eq_m512i(r, e);
16695    }
16696
16697    #[simd_test(enable = "avx512bw")]
16698    unsafe fn test_mm512_mask_maddubs_epi16() {
16699        let a = _mm512_set1_epi8(1);
16700        let b = _mm512_set1_epi8(1);
16701        let src = _mm512_set1_epi16(1);
16702        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
16703        assert_eq_m512i(r, src);
16704        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
16705        #[rustfmt::skip]
16706        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16707                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
16708        assert_eq_m512i(r, e);
16709    }
16710
16711    #[simd_test(enable = "avx512bw")]
16712    unsafe fn test_mm512_maskz_maddubs_epi16() {
16713        let a = _mm512_set1_epi8(1);
16714        let b = _mm512_set1_epi8(1);
16715        let r = _mm512_maskz_maddubs_epi16(0, a, b);
16716        assert_eq_m512i(r, _mm512_setzero_si512());
16717        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
16718        #[rustfmt::skip]
16719        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
16720                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16721        assert_eq_m512i(r, e);
16722    }
16723
16724    #[simd_test(enable = "avx512bw,avx512vl")]
16725    unsafe fn test_mm256_mask_maddubs_epi16() {
16726        let a = _mm256_set1_epi8(1);
16727        let b = _mm256_set1_epi8(1);
16728        let src = _mm256_set1_epi16(1);
16729        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
16730        assert_eq_m256i(r, src);
16731        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
16732        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16733        assert_eq_m256i(r, e);
16734    }
16735
16736    #[simd_test(enable = "avx512bw,avx512vl")]
16737    unsafe fn test_mm256_maskz_maddubs_epi16() {
16738        let a = _mm256_set1_epi8(1);
16739        let b = _mm256_set1_epi8(1);
16740        let r = _mm256_maskz_maddubs_epi16(0, a, b);
16741        assert_eq_m256i(r, _mm256_setzero_si256());
16742        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
16743        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16744        assert_eq_m256i(r, e);
16745    }
16746
16747    #[simd_test(enable = "avx512bw,avx512vl")]
16748    unsafe fn test_mm_mask_maddubs_epi16() {
16749        let a = _mm_set1_epi8(1);
16750        let b = _mm_set1_epi8(1);
16751        let src = _mm_set1_epi16(1);
16752        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
16753        assert_eq_m128i(r, src);
16754        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
16755        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16756        assert_eq_m128i(r, e);
16757    }
16758
16759    #[simd_test(enable = "avx512bw,avx512vl")]
16760    unsafe fn test_mm_maskz_maddubs_epi16() {
16761        let a = _mm_set1_epi8(1);
16762        let b = _mm_set1_epi8(1);
16763        let r = _mm_maskz_maddubs_epi16(0, a, b);
16764        assert_eq_m128i(r, _mm_setzero_si128());
16765        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
16766        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
16767        assert_eq_m128i(r, e);
16768    }
16769
16770    #[simd_test(enable = "avx512bw")]
16771    unsafe fn test_mm512_packs_epi32() {
16772        let a = _mm512_set1_epi32(i32::MAX);
16773        let b = _mm512_set1_epi32(1);
16774        let r = _mm512_packs_epi32(a, b);
16775        #[rustfmt::skip]
16776        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
16777                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16778        assert_eq_m512i(r, e);
16779    }
16780
16781    #[simd_test(enable = "avx512bw")]
16782    unsafe fn test_mm512_mask_packs_epi32() {
16783        let a = _mm512_set1_epi32(i32::MAX);
16784        let b = _mm512_set1_epi32(1 << 16 | 1);
16785        let r = _mm512_mask_packs_epi32(a, 0, a, b);
16786        assert_eq_m512i(r, a);
16787        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16788        #[rustfmt::skip]
16789        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16790                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16791        assert_eq_m512i(r, e);
16792    }
16793
16794    #[simd_test(enable = "avx512bw")]
16795    unsafe fn test_mm512_maskz_packs_epi32() {
16796        let a = _mm512_set1_epi32(i32::MAX);
16797        let b = _mm512_set1_epi32(1);
16798        let r = _mm512_maskz_packs_epi32(0, a, b);
16799        assert_eq_m512i(r, _mm512_setzero_si512());
16800        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
16801        #[rustfmt::skip]
16802        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16803                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16804        assert_eq_m512i(r, e);
16805    }
16806
16807    #[simd_test(enable = "avx512bw,avx512vl")]
16808    unsafe fn test_mm256_mask_packs_epi32() {
16809        let a = _mm256_set1_epi32(i32::MAX);
16810        let b = _mm256_set1_epi32(1 << 16 | 1);
16811        let r = _mm256_mask_packs_epi32(a, 0, a, b);
16812        assert_eq_m256i(r, a);
16813        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
16814        #[rustfmt::skip]
16815        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16816        assert_eq_m256i(r, e);
16817    }
16818
16819    #[simd_test(enable = "avx512bw,avx512vl")]
16820    unsafe fn test_mm256_maskz_packs_epi32() {
16821        let a = _mm256_set1_epi32(i32::MAX);
16822        let b = _mm256_set1_epi32(1);
16823        let r = _mm256_maskz_packs_epi32(0, a, b);
16824        assert_eq_m256i(r, _mm256_setzero_si256());
16825        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
16826        #[rustfmt::skip]
16827        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16828        assert_eq_m256i(r, e);
16829    }
16830
16831    #[simd_test(enable = "avx512bw,avx512vl")]
16832    unsafe fn test_mm_mask_packs_epi32() {
16833        let a = _mm_set1_epi32(i32::MAX);
16834        let b = _mm_set1_epi32(1 << 16 | 1);
16835        let r = _mm_mask_packs_epi32(a, 0, a, b);
16836        assert_eq_m128i(r, a);
16837        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
16838        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16839        assert_eq_m128i(r, e);
16840    }
16841
16842    #[simd_test(enable = "avx512bw,avx512vl")]
16843    unsafe fn test_mm_maskz_packs_epi32() {
16844        let a = _mm_set1_epi32(i32::MAX);
16845        let b = _mm_set1_epi32(1);
16846        let r = _mm_maskz_packs_epi32(0, a, b);
16847        assert_eq_m128i(r, _mm_setzero_si128());
16848        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
16849        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16850        assert_eq_m128i(r, e);
16851    }
16852
16853    #[simd_test(enable = "avx512bw")]
16854    unsafe fn test_mm512_packs_epi16() {
16855        let a = _mm512_set1_epi16(i16::MAX);
16856        let b = _mm512_set1_epi16(1);
16857        let r = _mm512_packs_epi16(a, b);
16858        #[rustfmt::skip]
16859        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16860                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16861                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16862                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16863        assert_eq_m512i(r, e);
16864    }
16865
16866    #[simd_test(enable = "avx512bw")]
16867    unsafe fn test_mm512_mask_packs_epi16() {
16868        let a = _mm512_set1_epi16(i16::MAX);
16869        let b = _mm512_set1_epi16(1 << 8 | 1);
16870        let r = _mm512_mask_packs_epi16(a, 0, a, b);
16871        assert_eq_m512i(r, a);
16872        let r = _mm512_mask_packs_epi16(
16873            b,
16874            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16875            a,
16876            b,
16877        );
16878        #[rustfmt::skip]
16879        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16880                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16881                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16882                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16883        assert_eq_m512i(r, e);
16884    }
16885
16886    #[simd_test(enable = "avx512bw")]
16887    unsafe fn test_mm512_maskz_packs_epi16() {
16888        let a = _mm512_set1_epi16(i16::MAX);
16889        let b = _mm512_set1_epi16(1);
16890        let r = _mm512_maskz_packs_epi16(0, a, b);
16891        assert_eq_m512i(r, _mm512_setzero_si512());
16892        let r = _mm512_maskz_packs_epi16(
16893            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16894            a,
16895            b,
16896        );
16897        #[rustfmt::skip]
16898        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16899                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16900                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16901                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16902        assert_eq_m512i(r, e);
16903    }
16904
16905    #[simd_test(enable = "avx512bw,avx512vl")]
16906    unsafe fn test_mm256_mask_packs_epi16() {
16907        let a = _mm256_set1_epi16(i16::MAX);
16908        let b = _mm256_set1_epi16(1 << 8 | 1);
16909        let r = _mm256_mask_packs_epi16(a, 0, a, b);
16910        assert_eq_m256i(r, a);
16911        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
16912        #[rustfmt::skip]
16913        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16914                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16915        assert_eq_m256i(r, e);
16916    }
16917
16918    #[simd_test(enable = "avx512bw,avx512vl")]
16919    unsafe fn test_mm256_maskz_packs_epi16() {
16920        let a = _mm256_set1_epi16(i16::MAX);
16921        let b = _mm256_set1_epi16(1);
16922        let r = _mm256_maskz_packs_epi16(0, a, b);
16923        assert_eq_m256i(r, _mm256_setzero_si256());
16924        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
16925        #[rustfmt::skip]
16926        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16927                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16928        assert_eq_m256i(r, e);
16929    }
16930
16931    #[simd_test(enable = "avx512bw,avx512vl")]
16932    unsafe fn test_mm_mask_packs_epi16() {
16933        let a = _mm_set1_epi16(i16::MAX);
16934        let b = _mm_set1_epi16(1 << 8 | 1);
16935        let r = _mm_mask_packs_epi16(a, 0, a, b);
16936        assert_eq_m128i(r, a);
16937        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
16938        #[rustfmt::skip]
16939        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16940        assert_eq_m128i(r, e);
16941    }
16942
16943    #[simd_test(enable = "avx512bw,avx512vl")]
16944    unsafe fn test_mm_maskz_packs_epi16() {
16945        let a = _mm_set1_epi16(i16::MAX);
16946        let b = _mm_set1_epi16(1);
16947        let r = _mm_maskz_packs_epi16(0, a, b);
16948        assert_eq_m128i(r, _mm_setzero_si128());
16949        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
16950        #[rustfmt::skip]
16951        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16952        assert_eq_m128i(r, e);
16953    }
16954
16955    #[simd_test(enable = "avx512bw")]
16956    unsafe fn test_mm512_packus_epi32() {
16957        let a = _mm512_set1_epi32(-1);
16958        let b = _mm512_set1_epi32(1);
16959        let r = _mm512_packus_epi32(a, b);
16960        #[rustfmt::skip]
16961        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
16962                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
16963        assert_eq_m512i(r, e);
16964    }
16965
16966    #[simd_test(enable = "avx512bw")]
16967    unsafe fn test_mm512_mask_packus_epi32() {
16968        let a = _mm512_set1_epi32(-1);
16969        let b = _mm512_set1_epi32(1 << 16 | 1);
16970        let r = _mm512_mask_packus_epi32(a, 0, a, b);
16971        assert_eq_m512i(r, a);
16972        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16973        #[rustfmt::skip]
16974        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16975                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16976        assert_eq_m512i(r, e);
16977    }
16978
16979    #[simd_test(enable = "avx512bw")]
16980    unsafe fn test_mm512_maskz_packus_epi32() {
16981        let a = _mm512_set1_epi32(-1);
16982        let b = _mm512_set1_epi32(1);
16983        let r = _mm512_maskz_packus_epi32(0, a, b);
16984        assert_eq_m512i(r, _mm512_setzero_si512());
16985        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
16986        #[rustfmt::skip]
16987        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16988                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
16989        assert_eq_m512i(r, e);
16990    }
16991
16992    #[simd_test(enable = "avx512bw,avx512vl")]
16993    unsafe fn test_mm256_mask_packus_epi32() {
16994        let a = _mm256_set1_epi32(-1);
16995        let b = _mm256_set1_epi32(1 << 16 | 1);
16996        let r = _mm256_mask_packus_epi32(a, 0, a, b);
16997        assert_eq_m256i(r, a);
16998        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
16999        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17000        assert_eq_m256i(r, e);
17001    }
17002
17003    #[simd_test(enable = "avx512bw,avx512vl")]
17004    unsafe fn test_mm256_maskz_packus_epi32() {
17005        let a = _mm256_set1_epi32(-1);
17006        let b = _mm256_set1_epi32(1);
17007        let r = _mm256_maskz_packus_epi32(0, a, b);
17008        assert_eq_m256i(r, _mm256_setzero_si256());
17009        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17010        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17011        assert_eq_m256i(r, e);
17012    }
17013
17014    #[simd_test(enable = "avx512bw,avx512vl")]
17015    unsafe fn test_mm_mask_packus_epi32() {
17016        let a = _mm_set1_epi32(-1);
17017        let b = _mm_set1_epi32(1 << 16 | 1);
17018        let r = _mm_mask_packus_epi32(a, 0, a, b);
17019        assert_eq_m128i(r, a);
17020        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17021        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17022        assert_eq_m128i(r, e);
17023    }
17024
17025    #[simd_test(enable = "avx512bw,avx512vl")]
17026    unsafe fn test_mm_maskz_packus_epi32() {
17027        let a = _mm_set1_epi32(-1);
17028        let b = _mm_set1_epi32(1);
17029        let r = _mm_maskz_packus_epi32(0, a, b);
17030        assert_eq_m128i(r, _mm_setzero_si128());
17031        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17032        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17033        assert_eq_m128i(r, e);
17034    }
17035
17036    #[simd_test(enable = "avx512bw")]
17037    unsafe fn test_mm512_packus_epi16() {
17038        let a = _mm512_set1_epi16(-1);
17039        let b = _mm512_set1_epi16(1);
17040        let r = _mm512_packus_epi16(a, b);
17041        #[rustfmt::skip]
17042        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17043                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17044                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17045                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17046        assert_eq_m512i(r, e);
17047    }
17048
17049    #[simd_test(enable = "avx512bw")]
17050    unsafe fn test_mm512_mask_packus_epi16() {
17051        let a = _mm512_set1_epi16(-1);
17052        let b = _mm512_set1_epi16(1 << 8 | 1);
17053        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17054        assert_eq_m512i(r, a);
17055        let r = _mm512_mask_packus_epi16(
17056            b,
17057            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17058            a,
17059            b,
17060        );
17061        #[rustfmt::skip]
17062        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17063                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17064                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17065                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17066        assert_eq_m512i(r, e);
17067    }
17068
17069    #[simd_test(enable = "avx512bw")]
17070    unsafe fn test_mm512_maskz_packus_epi16() {
17071        let a = _mm512_set1_epi16(-1);
17072        let b = _mm512_set1_epi16(1);
17073        let r = _mm512_maskz_packus_epi16(0, a, b);
17074        assert_eq_m512i(r, _mm512_setzero_si512());
17075        let r = _mm512_maskz_packus_epi16(
17076            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17077            a,
17078            b,
17079        );
17080        #[rustfmt::skip]
17081        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17082                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17083                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17084                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17085        assert_eq_m512i(r, e);
17086    }
17087
17088    #[simd_test(enable = "avx512bw,avx512vl")]
17089    unsafe fn test_mm256_mask_packus_epi16() {
17090        let a = _mm256_set1_epi16(-1);
17091        let b = _mm256_set1_epi16(1 << 8 | 1);
17092        let r = _mm256_mask_packus_epi16(a, 0, a, b);
17093        assert_eq_m256i(r, a);
17094        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17095        #[rustfmt::skip]
17096        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17097                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17098        assert_eq_m256i(r, e);
17099    }
17100
17101    #[simd_test(enable = "avx512bw,avx512vl")]
17102    unsafe fn test_mm256_maskz_packus_epi16() {
17103        let a = _mm256_set1_epi16(-1);
17104        let b = _mm256_set1_epi16(1);
17105        let r = _mm256_maskz_packus_epi16(0, a, b);
17106        assert_eq_m256i(r, _mm256_setzero_si256());
17107        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
17108        #[rustfmt::skip]
17109        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17110                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17111        assert_eq_m256i(r, e);
17112    }
17113
17114    #[simd_test(enable = "avx512bw,avx512vl")]
17115    unsafe fn test_mm_mask_packus_epi16() {
17116        let a = _mm_set1_epi16(-1);
17117        let b = _mm_set1_epi16(1 << 8 | 1);
17118        let r = _mm_mask_packus_epi16(a, 0, a, b);
17119        assert_eq_m128i(r, a);
17120        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
17121        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17122        assert_eq_m128i(r, e);
17123    }
17124
17125    #[simd_test(enable = "avx512bw,avx512vl")]
17126    unsafe fn test_mm_maskz_packus_epi16() {
17127        let a = _mm_set1_epi16(-1);
17128        let b = _mm_set1_epi16(1);
17129        let r = _mm_maskz_packus_epi16(0, a, b);
17130        assert_eq_m128i(r, _mm_setzero_si128());
17131        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
17132        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17133        assert_eq_m128i(r, e);
17134    }
17135
17136    #[simd_test(enable = "avx512bw")]
17137    unsafe fn test_mm512_avg_epu16() {
17138        let a = _mm512_set1_epi16(1);
17139        let b = _mm512_set1_epi16(1);
17140        let r = _mm512_avg_epu16(a, b);
17141        let e = _mm512_set1_epi16(1);
17142        assert_eq_m512i(r, e);
17143    }
17144
17145    #[simd_test(enable = "avx512bw")]
17146    unsafe fn test_mm512_mask_avg_epu16() {
17147        let a = _mm512_set1_epi16(1);
17148        let b = _mm512_set1_epi16(1);
17149        let r = _mm512_mask_avg_epu16(a, 0, a, b);
17150        assert_eq_m512i(r, a);
17151        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
17152        #[rustfmt::skip]
17153        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17154                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17155        assert_eq_m512i(r, e);
17156    }
17157
17158    #[simd_test(enable = "avx512bw")]
17159    unsafe fn test_mm512_maskz_avg_epu16() {
17160        let a = _mm512_set1_epi16(1);
17161        let b = _mm512_set1_epi16(1);
17162        let r = _mm512_maskz_avg_epu16(0, a, b);
17163        assert_eq_m512i(r, _mm512_setzero_si512());
17164        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
17165        #[rustfmt::skip]
17166        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17167                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17168        assert_eq_m512i(r, e);
17169    }
17170
17171    #[simd_test(enable = "avx512bw,avx512vl")]
17172    unsafe fn test_mm256_mask_avg_epu16() {
17173        let a = _mm256_set1_epi16(1);
17174        let b = _mm256_set1_epi16(1);
17175        let r = _mm256_mask_avg_epu16(a, 0, a, b);
17176        assert_eq_m256i(r, a);
17177        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
17178        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17179        assert_eq_m256i(r, e);
17180    }
17181
17182    #[simd_test(enable = "avx512bw,avx512vl")]
17183    unsafe fn test_mm256_maskz_avg_epu16() {
17184        let a = _mm256_set1_epi16(1);
17185        let b = _mm256_set1_epi16(1);
17186        let r = _mm256_maskz_avg_epu16(0, a, b);
17187        assert_eq_m256i(r, _mm256_setzero_si256());
17188        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
17189        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17190        assert_eq_m256i(r, e);
17191    }
17192
17193    #[simd_test(enable = "avx512bw,avx512vl")]
17194    unsafe fn test_mm_mask_avg_epu16() {
17195        let a = _mm_set1_epi16(1);
17196        let b = _mm_set1_epi16(1);
17197        let r = _mm_mask_avg_epu16(a, 0, a, b);
17198        assert_eq_m128i(r, a);
17199        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
17200        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
17201        assert_eq_m128i(r, e);
17202    }
17203
17204    #[simd_test(enable = "avx512bw,avx512vl")]
17205    unsafe fn test_mm_maskz_avg_epu16() {
17206        let a = _mm_set1_epi16(1);
17207        let b = _mm_set1_epi16(1);
17208        let r = _mm_maskz_avg_epu16(0, a, b);
17209        assert_eq_m128i(r, _mm_setzero_si128());
17210        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
17211        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
17212        assert_eq_m128i(r, e);
17213    }
17214
17215    #[simd_test(enable = "avx512bw")]
17216    unsafe fn test_mm512_avg_epu8() {
17217        let a = _mm512_set1_epi8(1);
17218        let b = _mm512_set1_epi8(1);
17219        let r = _mm512_avg_epu8(a, b);
17220        let e = _mm512_set1_epi8(1);
17221        assert_eq_m512i(r, e);
17222    }
17223
17224    #[simd_test(enable = "avx512bw")]
17225    unsafe fn test_mm512_mask_avg_epu8() {
17226        let a = _mm512_set1_epi8(1);
17227        let b = _mm512_set1_epi8(1);
17228        let r = _mm512_mask_avg_epu8(a, 0, a, b);
17229        assert_eq_m512i(r, a);
17230        let r = _mm512_mask_avg_epu8(
17231            a,
17232            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17233            a,
17234            b,
17235        );
17236        #[rustfmt::skip]
17237        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17238                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17239                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17240                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17241        assert_eq_m512i(r, e);
17242    }
17243
17244    #[simd_test(enable = "avx512bw")]
17245    unsafe fn test_mm512_maskz_avg_epu8() {
17246        let a = _mm512_set1_epi8(1);
17247        let b = _mm512_set1_epi8(1);
17248        let r = _mm512_maskz_avg_epu8(0, a, b);
17249        assert_eq_m512i(r, _mm512_setzero_si512());
17250        let r = _mm512_maskz_avg_epu8(
17251            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
17252            a,
17253            b,
17254        );
17255        #[rustfmt::skip]
17256        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17257                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17258                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17259                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17260        assert_eq_m512i(r, e);
17261    }
17262
17263    #[simd_test(enable = "avx512bw,avx512vl")]
17264    unsafe fn test_mm256_mask_avg_epu8() {
17265        let a = _mm256_set1_epi8(1);
17266        let b = _mm256_set1_epi8(1);
17267        let r = _mm256_mask_avg_epu8(a, 0, a, b);
17268        assert_eq_m256i(r, a);
17269        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
17270        #[rustfmt::skip]
17271        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17272                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17273        assert_eq_m256i(r, e);
17274    }
17275
17276    #[simd_test(enable = "avx512bw,avx512vl")]
17277    unsafe fn test_mm256_maskz_avg_epu8() {
17278        let a = _mm256_set1_epi8(1);
17279        let b = _mm256_set1_epi8(1);
17280        let r = _mm256_maskz_avg_epu8(0, a, b);
17281        assert_eq_m256i(r, _mm256_setzero_si256());
17282        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
17283        #[rustfmt::skip]
17284        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17285                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17286        assert_eq_m256i(r, e);
17287    }
17288
17289    #[simd_test(enable = "avx512bw,avx512vl")]
17290    unsafe fn test_mm_mask_avg_epu8() {
17291        let a = _mm_set1_epi8(1);
17292        let b = _mm_set1_epi8(1);
17293        let r = _mm_mask_avg_epu8(a, 0, a, b);
17294        assert_eq_m128i(r, a);
17295        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
17296        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17297        assert_eq_m128i(r, e);
17298    }
17299
17300    #[simd_test(enable = "avx512bw,avx512vl")]
17301    unsafe fn test_mm_maskz_avg_epu8() {
17302        let a = _mm_set1_epi8(1);
17303        let b = _mm_set1_epi8(1);
17304        let r = _mm_maskz_avg_epu8(0, a, b);
17305        assert_eq_m128i(r, _mm_setzero_si128());
17306        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
17307        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17308        assert_eq_m128i(r, e);
17309    }
17310
17311    #[simd_test(enable = "avx512bw")]
17312    unsafe fn test_mm512_sll_epi16() {
17313        let a = _mm512_set1_epi16(1 << 15);
17314        let count = _mm_set1_epi16(2);
17315        let r = _mm512_sll_epi16(a, count);
17316        let e = _mm512_set1_epi16(0);
17317        assert_eq_m512i(r, e);
17318    }
17319
17320    #[simd_test(enable = "avx512bw")]
17321    unsafe fn test_mm512_mask_sll_epi16() {
17322        let a = _mm512_set1_epi16(1 << 15);
17323        let count = _mm_set1_epi16(2);
17324        let r = _mm512_mask_sll_epi16(a, 0, a, count);
17325        assert_eq_m512i(r, a);
17326        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17327        let e = _mm512_set1_epi16(0);
17328        assert_eq_m512i(r, e);
17329    }
17330
17331    #[simd_test(enable = "avx512bw")]
17332    unsafe fn test_mm512_maskz_sll_epi16() {
17333        let a = _mm512_set1_epi16(1 << 15);
17334        let count = _mm_set1_epi16(2);
17335        let r = _mm512_maskz_sll_epi16(0, a, count);
17336        assert_eq_m512i(r, _mm512_setzero_si512());
17337        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
17338        let e = _mm512_set1_epi16(0);
17339        assert_eq_m512i(r, e);
17340    }
17341
17342    #[simd_test(enable = "avx512bw,avx512vl")]
17343    unsafe fn test_mm256_mask_sll_epi16() {
17344        let a = _mm256_set1_epi16(1 << 15);
17345        let count = _mm_set1_epi16(2);
17346        let r = _mm256_mask_sll_epi16(a, 0, a, count);
17347        assert_eq_m256i(r, a);
17348        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
17349        let e = _mm256_set1_epi16(0);
17350        assert_eq_m256i(r, e);
17351    }
17352
17353    #[simd_test(enable = "avx512bw,avx512vl")]
17354    unsafe fn test_mm256_maskz_sll_epi16() {
17355        let a = _mm256_set1_epi16(1 << 15);
17356        let count = _mm_set1_epi16(2);
17357        let r = _mm256_maskz_sll_epi16(0, a, count);
17358        assert_eq_m256i(r, _mm256_setzero_si256());
17359        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
17360        let e = _mm256_set1_epi16(0);
17361        assert_eq_m256i(r, e);
17362    }
17363
17364    #[simd_test(enable = "avx512bw,avx512vl")]
17365    unsafe fn test_mm_mask_sll_epi16() {
17366        let a = _mm_set1_epi16(1 << 15);
17367        let count = _mm_set1_epi16(2);
17368        let r = _mm_mask_sll_epi16(a, 0, a, count);
17369        assert_eq_m128i(r, a);
17370        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
17371        let e = _mm_set1_epi16(0);
17372        assert_eq_m128i(r, e);
17373    }
17374
17375    #[simd_test(enable = "avx512bw,avx512vl")]
17376    unsafe fn test_mm_maskz_sll_epi16() {
17377        let a = _mm_set1_epi16(1 << 15);
17378        let count = _mm_set1_epi16(2);
17379        let r = _mm_maskz_sll_epi16(0, a, count);
17380        assert_eq_m128i(r, _mm_setzero_si128());
17381        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
17382        let e = _mm_set1_epi16(0);
17383        assert_eq_m128i(r, e);
17384    }
17385
17386    #[simd_test(enable = "avx512bw")]
17387    unsafe fn test_mm512_slli_epi16() {
17388        let a = _mm512_set1_epi16(1 << 15);
17389        let r = _mm512_slli_epi16::<1>(a);
17390        let e = _mm512_set1_epi16(0);
17391        assert_eq_m512i(r, e);
17392    }
17393
17394    #[simd_test(enable = "avx512bw")]
17395    unsafe fn test_mm512_mask_slli_epi16() {
17396        let a = _mm512_set1_epi16(1 << 15);
17397        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
17398        assert_eq_m512i(r, a);
17399        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
17400        let e = _mm512_set1_epi16(0);
17401        assert_eq_m512i(r, e);
17402    }
17403
17404    #[simd_test(enable = "avx512bw")]
17405    unsafe fn test_mm512_maskz_slli_epi16() {
17406        let a = _mm512_set1_epi16(1 << 15);
17407        let r = _mm512_maskz_slli_epi16::<1>(0, a);
17408        assert_eq_m512i(r, _mm512_setzero_si512());
17409        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
17410        let e = _mm512_set1_epi16(0);
17411        assert_eq_m512i(r, e);
17412    }
17413
17414    #[simd_test(enable = "avx512bw,avx512vl")]
17415    unsafe fn test_mm256_mask_slli_epi16() {
17416        let a = _mm256_set1_epi16(1 << 15);
17417        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
17418        assert_eq_m256i(r, a);
17419        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
17420        let e = _mm256_set1_epi16(0);
17421        assert_eq_m256i(r, e);
17422    }
17423
17424    #[simd_test(enable = "avx512bw,avx512vl")]
17425    unsafe fn test_mm256_maskz_slli_epi16() {
17426        let a = _mm256_set1_epi16(1 << 15);
17427        let r = _mm256_maskz_slli_epi16::<1>(0, a);
17428        assert_eq_m256i(r, _mm256_setzero_si256());
17429        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
17430        let e = _mm256_set1_epi16(0);
17431        assert_eq_m256i(r, e);
17432    }
17433
17434    #[simd_test(enable = "avx512bw,avx512vl")]
17435    unsafe fn test_mm_mask_slli_epi16() {
17436        let a = _mm_set1_epi16(1 << 15);
17437        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
17438        assert_eq_m128i(r, a);
17439        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
17440        let e = _mm_set1_epi16(0);
17441        assert_eq_m128i(r, e);
17442    }
17443
17444    #[simd_test(enable = "avx512bw,avx512vl")]
17445    unsafe fn test_mm_maskz_slli_epi16() {
17446        let a = _mm_set1_epi16(1 << 15);
17447        let r = _mm_maskz_slli_epi16::<1>(0, a);
17448        assert_eq_m128i(r, _mm_setzero_si128());
17449        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
17450        let e = _mm_set1_epi16(0);
17451        assert_eq_m128i(r, e);
17452    }
17453
17454    #[simd_test(enable = "avx512bw")]
17455    unsafe fn test_mm512_sllv_epi16() {
17456        let a = _mm512_set1_epi16(1 << 15);
17457        let count = _mm512_set1_epi16(2);
17458        let r = _mm512_sllv_epi16(a, count);
17459        let e = _mm512_set1_epi16(0);
17460        assert_eq_m512i(r, e);
17461    }
17462
17463    #[simd_test(enable = "avx512bw")]
17464    unsafe fn test_mm512_mask_sllv_epi16() {
17465        let a = _mm512_set1_epi16(1 << 15);
17466        let count = _mm512_set1_epi16(2);
17467        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
17468        assert_eq_m512i(r, a);
17469        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17470        let e = _mm512_set1_epi16(0);
17471        assert_eq_m512i(r, e);
17472    }
17473
17474    #[simd_test(enable = "avx512bw")]
17475    unsafe fn test_mm512_maskz_sllv_epi16() {
17476        let a = _mm512_set1_epi16(1 << 15);
17477        let count = _mm512_set1_epi16(2);
17478        let r = _mm512_maskz_sllv_epi16(0, a, count);
17479        assert_eq_m512i(r, _mm512_setzero_si512());
17480        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17481        let e = _mm512_set1_epi16(0);
17482        assert_eq_m512i(r, e);
17483    }
17484
17485    #[simd_test(enable = "avx512bw,avx512vl")]
17486    unsafe fn test_mm256_sllv_epi16() {
17487        let a = _mm256_set1_epi16(1 << 15);
17488        let count = _mm256_set1_epi16(2);
17489        let r = _mm256_sllv_epi16(a, count);
17490        let e = _mm256_set1_epi16(0);
17491        assert_eq_m256i(r, e);
17492    }
17493
17494    #[simd_test(enable = "avx512bw,avx512vl")]
17495    unsafe fn test_mm256_mask_sllv_epi16() {
17496        let a = _mm256_set1_epi16(1 << 15);
17497        let count = _mm256_set1_epi16(2);
17498        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
17499        assert_eq_m256i(r, a);
17500        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
17501        let e = _mm256_set1_epi16(0);
17502        assert_eq_m256i(r, e);
17503    }
17504
17505    #[simd_test(enable = "avx512bw,avx512vl")]
17506    unsafe fn test_mm256_maskz_sllv_epi16() {
17507        let a = _mm256_set1_epi16(1 << 15);
17508        let count = _mm256_set1_epi16(2);
17509        let r = _mm256_maskz_sllv_epi16(0, a, count);
17510        assert_eq_m256i(r, _mm256_setzero_si256());
17511        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
17512        let e = _mm256_set1_epi16(0);
17513        assert_eq_m256i(r, e);
17514    }
17515
17516    #[simd_test(enable = "avx512bw,avx512vl")]
17517    unsafe fn test_mm_sllv_epi16() {
17518        let a = _mm_set1_epi16(1 << 15);
17519        let count = _mm_set1_epi16(2);
17520        let r = _mm_sllv_epi16(a, count);
17521        let e = _mm_set1_epi16(0);
17522        assert_eq_m128i(r, e);
17523    }
17524
17525    #[simd_test(enable = "avx512bw,avx512vl")]
17526    unsafe fn test_mm_mask_sllv_epi16() {
17527        let a = _mm_set1_epi16(1 << 15);
17528        let count = _mm_set1_epi16(2);
17529        let r = _mm_mask_sllv_epi16(a, 0, a, count);
17530        assert_eq_m128i(r, a);
17531        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
17532        let e = _mm_set1_epi16(0);
17533        assert_eq_m128i(r, e);
17534    }
17535
17536    #[simd_test(enable = "avx512bw,avx512vl")]
17537    unsafe fn test_mm_maskz_sllv_epi16() {
17538        let a = _mm_set1_epi16(1 << 15);
17539        let count = _mm_set1_epi16(2);
17540        let r = _mm_maskz_sllv_epi16(0, a, count);
17541        assert_eq_m128i(r, _mm_setzero_si128());
17542        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
17543        let e = _mm_set1_epi16(0);
17544        assert_eq_m128i(r, e);
17545    }
17546
17547    #[simd_test(enable = "avx512bw")]
17548    unsafe fn test_mm512_srl_epi16() {
17549        let a = _mm512_set1_epi16(1 << 1);
17550        let count = _mm_set1_epi16(2);
17551        let r = _mm512_srl_epi16(a, count);
17552        let e = _mm512_set1_epi16(0);
17553        assert_eq_m512i(r, e);
17554    }
17555
17556    #[simd_test(enable = "avx512bw")]
17557    unsafe fn test_mm512_mask_srl_epi16() {
17558        let a = _mm512_set1_epi16(1 << 1);
17559        let count = _mm_set1_epi16(2);
17560        let r = _mm512_mask_srl_epi16(a, 0, a, count);
17561        assert_eq_m512i(r, a);
17562        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17563        let e = _mm512_set1_epi16(0);
17564        assert_eq_m512i(r, e);
17565    }
17566
17567    #[simd_test(enable = "avx512bw")]
17568    unsafe fn test_mm512_maskz_srl_epi16() {
17569        let a = _mm512_set1_epi16(1 << 1);
17570        let count = _mm_set1_epi16(2);
17571        let r = _mm512_maskz_srl_epi16(0, a, count);
17572        assert_eq_m512i(r, _mm512_setzero_si512());
17573        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
17574        let e = _mm512_set1_epi16(0);
17575        assert_eq_m512i(r, e);
17576    }
17577
17578    #[simd_test(enable = "avx512bw,avx512vl")]
17579    unsafe fn test_mm256_mask_srl_epi16() {
17580        let a = _mm256_set1_epi16(1 << 1);
17581        let count = _mm_set1_epi16(2);
17582        let r = _mm256_mask_srl_epi16(a, 0, a, count);
17583        assert_eq_m256i(r, a);
17584        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
17585        let e = _mm256_set1_epi16(0);
17586        assert_eq_m256i(r, e);
17587    }
17588
17589    #[simd_test(enable = "avx512bw,avx512vl")]
17590    unsafe fn test_mm256_maskz_srl_epi16() {
17591        let a = _mm256_set1_epi16(1 << 1);
17592        let count = _mm_set1_epi16(2);
17593        let r = _mm256_maskz_srl_epi16(0, a, count);
17594        assert_eq_m256i(r, _mm256_setzero_si256());
17595        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
17596        let e = _mm256_set1_epi16(0);
17597        assert_eq_m256i(r, e);
17598    }
17599
17600    #[simd_test(enable = "avx512bw,avx512vl")]
17601    unsafe fn test_mm_mask_srl_epi16() {
17602        let a = _mm_set1_epi16(1 << 1);
17603        let count = _mm_set1_epi16(2);
17604        let r = _mm_mask_srl_epi16(a, 0, a, count);
17605        assert_eq_m128i(r, a);
17606        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
17607        let e = _mm_set1_epi16(0);
17608        assert_eq_m128i(r, e);
17609    }
17610
17611    #[simd_test(enable = "avx512bw,avx512vl")]
17612    unsafe fn test_mm_maskz_srl_epi16() {
17613        let a = _mm_set1_epi16(1 << 1);
17614        let count = _mm_set1_epi16(2);
17615        let r = _mm_maskz_srl_epi16(0, a, count);
17616        assert_eq_m128i(r, _mm_setzero_si128());
17617        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
17618        let e = _mm_set1_epi16(0);
17619        assert_eq_m128i(r, e);
17620    }
17621
17622    #[simd_test(enable = "avx512bw")]
17623    unsafe fn test_mm512_srli_epi16() {
17624        let a = _mm512_set1_epi16(1 << 1);
17625        let r = _mm512_srli_epi16::<2>(a);
17626        let e = _mm512_set1_epi16(0);
17627        assert_eq_m512i(r, e);
17628    }
17629
17630    #[simd_test(enable = "avx512bw")]
17631    unsafe fn test_mm512_mask_srli_epi16() {
17632        let a = _mm512_set1_epi16(1 << 1);
17633        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
17634        assert_eq_m512i(r, a);
17635        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17636        let e = _mm512_set1_epi16(0);
17637        assert_eq_m512i(r, e);
17638    }
17639
17640    #[simd_test(enable = "avx512bw")]
17641    unsafe fn test_mm512_maskz_srli_epi16() {
17642        let a = _mm512_set1_epi16(1 << 1);
17643        let r = _mm512_maskz_srli_epi16::<2>(0, a);
17644        assert_eq_m512i(r, _mm512_setzero_si512());
17645        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17646        let e = _mm512_set1_epi16(0);
17647        assert_eq_m512i(r, e);
17648    }
17649
17650    #[simd_test(enable = "avx512bw,avx512vl")]
17651    unsafe fn test_mm256_mask_srli_epi16() {
17652        let a = _mm256_set1_epi16(1 << 1);
17653        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
17654        assert_eq_m256i(r, a);
17655        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
17656        let e = _mm256_set1_epi16(0);
17657        assert_eq_m256i(r, e);
17658    }
17659
17660    #[simd_test(enable = "avx512bw,avx512vl")]
17661    unsafe fn test_mm256_maskz_srli_epi16() {
17662        let a = _mm256_set1_epi16(1 << 1);
17663        let r = _mm256_maskz_srli_epi16::<2>(0, a);
17664        assert_eq_m256i(r, _mm256_setzero_si256());
17665        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
17666        let e = _mm256_set1_epi16(0);
17667        assert_eq_m256i(r, e);
17668    }
17669
17670    #[simd_test(enable = "avx512bw,avx512vl")]
17671    unsafe fn test_mm_mask_srli_epi16() {
17672        let a = _mm_set1_epi16(1 << 1);
17673        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
17674        assert_eq_m128i(r, a);
17675        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
17676        let e = _mm_set1_epi16(0);
17677        assert_eq_m128i(r, e);
17678    }
17679
17680    #[simd_test(enable = "avx512bw,avx512vl")]
17681    unsafe fn test_mm_maskz_srli_epi16() {
17682        let a = _mm_set1_epi16(1 << 1);
17683        let r = _mm_maskz_srli_epi16::<2>(0, a);
17684        assert_eq_m128i(r, _mm_setzero_si128());
17685        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
17686        let e = _mm_set1_epi16(0);
17687        assert_eq_m128i(r, e);
17688    }
17689
17690    #[simd_test(enable = "avx512bw")]
17691    unsafe fn test_mm512_srlv_epi16() {
17692        let a = _mm512_set1_epi16(1 << 1);
17693        let count = _mm512_set1_epi16(2);
17694        let r = _mm512_srlv_epi16(a, count);
17695        let e = _mm512_set1_epi16(0);
17696        assert_eq_m512i(r, e);
17697    }
17698
17699    #[simd_test(enable = "avx512bw")]
17700    unsafe fn test_mm512_mask_srlv_epi16() {
17701        let a = _mm512_set1_epi16(1 << 1);
17702        let count = _mm512_set1_epi16(2);
17703        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
17704        assert_eq_m512i(r, a);
17705        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17706        let e = _mm512_set1_epi16(0);
17707        assert_eq_m512i(r, e);
17708    }
17709
17710    #[simd_test(enable = "avx512bw")]
17711    unsafe fn test_mm512_maskz_srlv_epi16() {
17712        let a = _mm512_set1_epi16(1 << 1);
17713        let count = _mm512_set1_epi16(2);
17714        let r = _mm512_maskz_srlv_epi16(0, a, count);
17715        assert_eq_m512i(r, _mm512_setzero_si512());
17716        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17717        let e = _mm512_set1_epi16(0);
17718        assert_eq_m512i(r, e);
17719    }
17720
17721    #[simd_test(enable = "avx512bw,avx512vl")]
17722    unsafe fn test_mm256_srlv_epi16() {
17723        let a = _mm256_set1_epi16(1 << 1);
17724        let count = _mm256_set1_epi16(2);
17725        let r = _mm256_srlv_epi16(a, count);
17726        let e = _mm256_set1_epi16(0);
17727        assert_eq_m256i(r, e);
17728    }
17729
17730    #[simd_test(enable = "avx512bw,avx512vl")]
17731    unsafe fn test_mm256_mask_srlv_epi16() {
17732        let a = _mm256_set1_epi16(1 << 1);
17733        let count = _mm256_set1_epi16(2);
17734        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
17735        assert_eq_m256i(r, a);
17736        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
17737        let e = _mm256_set1_epi16(0);
17738        assert_eq_m256i(r, e);
17739    }
17740
17741    #[simd_test(enable = "avx512bw,avx512vl")]
17742    unsafe fn test_mm256_maskz_srlv_epi16() {
17743        let a = _mm256_set1_epi16(1 << 1);
17744        let count = _mm256_set1_epi16(2);
17745        let r = _mm256_maskz_srlv_epi16(0, a, count);
17746        assert_eq_m256i(r, _mm256_setzero_si256());
17747        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
17748        let e = _mm256_set1_epi16(0);
17749        assert_eq_m256i(r, e);
17750    }
17751
17752    #[simd_test(enable = "avx512bw,avx512vl")]
17753    unsafe fn test_mm_srlv_epi16() {
17754        let a = _mm_set1_epi16(1 << 1);
17755        let count = _mm_set1_epi16(2);
17756        let r = _mm_srlv_epi16(a, count);
17757        let e = _mm_set1_epi16(0);
17758        assert_eq_m128i(r, e);
17759    }
17760
17761    #[simd_test(enable = "avx512bw,avx512vl")]
17762    unsafe fn test_mm_mask_srlv_epi16() {
17763        let a = _mm_set1_epi16(1 << 1);
17764        let count = _mm_set1_epi16(2);
17765        let r = _mm_mask_srlv_epi16(a, 0, a, count);
17766        assert_eq_m128i(r, a);
17767        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
17768        let e = _mm_set1_epi16(0);
17769        assert_eq_m128i(r, e);
17770    }
17771
17772    #[simd_test(enable = "avx512bw,avx512vl")]
17773    unsafe fn test_mm_maskz_srlv_epi16() {
17774        let a = _mm_set1_epi16(1 << 1);
17775        let count = _mm_set1_epi16(2);
17776        let r = _mm_maskz_srlv_epi16(0, a, count);
17777        assert_eq_m128i(r, _mm_setzero_si128());
17778        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
17779        let e = _mm_set1_epi16(0);
17780        assert_eq_m128i(r, e);
17781    }
17782
17783    #[simd_test(enable = "avx512bw")]
17784    unsafe fn test_mm512_sra_epi16() {
17785        let a = _mm512_set1_epi16(8);
17786        let count = _mm_set1_epi16(1);
17787        let r = _mm512_sra_epi16(a, count);
17788        let e = _mm512_set1_epi16(0);
17789        assert_eq_m512i(r, e);
17790    }
17791
17792    #[simd_test(enable = "avx512bw")]
17793    unsafe fn test_mm512_mask_sra_epi16() {
17794        let a = _mm512_set1_epi16(8);
17795        let count = _mm_set1_epi16(1);
17796        let r = _mm512_mask_sra_epi16(a, 0, a, count);
17797        assert_eq_m512i(r, a);
17798        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17799        let e = _mm512_set1_epi16(0);
17800        assert_eq_m512i(r, e);
17801    }
17802
17803    #[simd_test(enable = "avx512bw")]
17804    unsafe fn test_mm512_maskz_sra_epi16() {
17805        let a = _mm512_set1_epi16(8);
17806        let count = _mm_set1_epi16(1);
17807        let r = _mm512_maskz_sra_epi16(0, a, count);
17808        assert_eq_m512i(r, _mm512_setzero_si512());
17809        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
17810        let e = _mm512_set1_epi16(0);
17811        assert_eq_m512i(r, e);
17812    }
17813
17814    #[simd_test(enable = "avx512bw,avx512vl")]
17815    unsafe fn test_mm256_mask_sra_epi16() {
17816        let a = _mm256_set1_epi16(8);
17817        let count = _mm_set1_epi16(1);
17818        let r = _mm256_mask_sra_epi16(a, 0, a, count);
17819        assert_eq_m256i(r, a);
17820        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
17821        let e = _mm256_set1_epi16(0);
17822        assert_eq_m256i(r, e);
17823    }
17824
17825    #[simd_test(enable = "avx512bw,avx512vl")]
17826    unsafe fn test_mm256_maskz_sra_epi16() {
17827        let a = _mm256_set1_epi16(8);
17828        let count = _mm_set1_epi16(1);
17829        let r = _mm256_maskz_sra_epi16(0, a, count);
17830        assert_eq_m256i(r, _mm256_setzero_si256());
17831        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
17832        let e = _mm256_set1_epi16(0);
17833        assert_eq_m256i(r, e);
17834    }
17835
17836    #[simd_test(enable = "avx512bw,avx512vl")]
17837    unsafe fn test_mm_mask_sra_epi16() {
17838        let a = _mm_set1_epi16(8);
17839        let count = _mm_set1_epi16(1);
17840        let r = _mm_mask_sra_epi16(a, 0, a, count);
17841        assert_eq_m128i(r, a);
17842        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
17843        let e = _mm_set1_epi16(0);
17844        assert_eq_m128i(r, e);
17845    }
17846
17847    #[simd_test(enable = "avx512bw,avx512vl")]
17848    unsafe fn test_mm_maskz_sra_epi16() {
17849        let a = _mm_set1_epi16(8);
17850        let count = _mm_set1_epi16(1);
17851        let r = _mm_maskz_sra_epi16(0, a, count);
17852        assert_eq_m128i(r, _mm_setzero_si128());
17853        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
17854        let e = _mm_set1_epi16(0);
17855        assert_eq_m128i(r, e);
17856    }
17857
17858    #[simd_test(enable = "avx512bw")]
17859    unsafe fn test_mm512_srai_epi16() {
17860        let a = _mm512_set1_epi16(8);
17861        let r = _mm512_srai_epi16::<2>(a);
17862        let e = _mm512_set1_epi16(2);
17863        assert_eq_m512i(r, e);
17864    }
17865
17866    #[simd_test(enable = "avx512bw")]
17867    unsafe fn test_mm512_mask_srai_epi16() {
17868        let a = _mm512_set1_epi16(8);
17869        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
17870        assert_eq_m512i(r, a);
17871        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17872        let e = _mm512_set1_epi16(2);
17873        assert_eq_m512i(r, e);
17874    }
17875
17876    #[simd_test(enable = "avx512bw")]
17877    unsafe fn test_mm512_maskz_srai_epi16() {
17878        let a = _mm512_set1_epi16(8);
17879        let r = _mm512_maskz_srai_epi16::<2>(0, a);
17880        assert_eq_m512i(r, _mm512_setzero_si512());
17881        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17882        let e = _mm512_set1_epi16(2);
17883        assert_eq_m512i(r, e);
17884    }
17885
17886    #[simd_test(enable = "avx512bw,avx512vl")]
17887    unsafe fn test_mm256_mask_srai_epi16() {
17888        let a = _mm256_set1_epi16(8);
17889        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
17890        assert_eq_m256i(r, a);
17891        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
17892        let e = _mm256_set1_epi16(2);
17893        assert_eq_m256i(r, e);
17894    }
17895
17896    #[simd_test(enable = "avx512bw,avx512vl")]
17897    unsafe fn test_mm256_maskz_srai_epi16() {
17898        let a = _mm256_set1_epi16(8);
17899        let r = _mm256_maskz_srai_epi16::<2>(0, a);
17900        assert_eq_m256i(r, _mm256_setzero_si256());
17901        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
17902        let e = _mm256_set1_epi16(2);
17903        assert_eq_m256i(r, e);
17904    }
17905
17906    #[simd_test(enable = "avx512bw,avx512vl")]
17907    unsafe fn test_mm_mask_srai_epi16() {
17908        let a = _mm_set1_epi16(8);
17909        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
17910        assert_eq_m128i(r, a);
17911        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
17912        let e = _mm_set1_epi16(2);
17913        assert_eq_m128i(r, e);
17914    }
17915
17916    #[simd_test(enable = "avx512bw,avx512vl")]
17917    unsafe fn test_mm_maskz_srai_epi16() {
17918        let a = _mm_set1_epi16(8);
17919        let r = _mm_maskz_srai_epi16::<2>(0, a);
17920        assert_eq_m128i(r, _mm_setzero_si128());
17921        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
17922        let e = _mm_set1_epi16(2);
17923        assert_eq_m128i(r, e);
17924    }
17925
17926    #[simd_test(enable = "avx512bw")]
17927    unsafe fn test_mm512_srav_epi16() {
17928        let a = _mm512_set1_epi16(8);
17929        let count = _mm512_set1_epi16(2);
17930        let r = _mm512_srav_epi16(a, count);
17931        let e = _mm512_set1_epi16(2);
17932        assert_eq_m512i(r, e);
17933    }
17934
17935    #[simd_test(enable = "avx512bw")]
17936    unsafe fn test_mm512_mask_srav_epi16() {
17937        let a = _mm512_set1_epi16(8);
17938        let count = _mm512_set1_epi16(2);
17939        let r = _mm512_mask_srav_epi16(a, 0, a, count);
17940        assert_eq_m512i(r, a);
17941        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17942        let e = _mm512_set1_epi16(2);
17943        assert_eq_m512i(r, e);
17944    }
17945
17946    #[simd_test(enable = "avx512bw")]
17947    unsafe fn test_mm512_maskz_srav_epi16() {
17948        let a = _mm512_set1_epi16(8);
17949        let count = _mm512_set1_epi16(2);
17950        let r = _mm512_maskz_srav_epi16(0, a, count);
17951        assert_eq_m512i(r, _mm512_setzero_si512());
17952        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
17953        let e = _mm512_set1_epi16(2);
17954        assert_eq_m512i(r, e);
17955    }
17956
17957    #[simd_test(enable = "avx512bw,avx512vl")]
17958    unsafe fn test_mm256_srav_epi16() {
17959        let a = _mm256_set1_epi16(8);
17960        let count = _mm256_set1_epi16(2);
17961        let r = _mm256_srav_epi16(a, count);
17962        let e = _mm256_set1_epi16(2);
17963        assert_eq_m256i(r, e);
17964    }
17965
17966    #[simd_test(enable = "avx512bw,avx512vl")]
17967    unsafe fn test_mm256_mask_srav_epi16() {
17968        let a = _mm256_set1_epi16(8);
17969        let count = _mm256_set1_epi16(2);
17970        let r = _mm256_mask_srav_epi16(a, 0, a, count);
17971        assert_eq_m256i(r, a);
17972        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
17973        let e = _mm256_set1_epi16(2);
17974        assert_eq_m256i(r, e);
17975    }
17976
17977    #[simd_test(enable = "avx512bw,avx512vl")]
17978    unsafe fn test_mm256_maskz_srav_epi16() {
17979        let a = _mm256_set1_epi16(8);
17980        let count = _mm256_set1_epi16(2);
17981        let r = _mm256_maskz_srav_epi16(0, a, count);
17982        assert_eq_m256i(r, _mm256_setzero_si256());
17983        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
17984        let e = _mm256_set1_epi16(2);
17985        assert_eq_m256i(r, e);
17986    }
17987
17988    #[simd_test(enable = "avx512bw,avx512vl")]
17989    unsafe fn test_mm_srav_epi16() {
17990        let a = _mm_set1_epi16(8);
17991        let count = _mm_set1_epi16(2);
17992        let r = _mm_srav_epi16(a, count);
17993        let e = _mm_set1_epi16(2);
17994        assert_eq_m128i(r, e);
17995    }
17996
17997    #[simd_test(enable = "avx512bw,avx512vl")]
17998    unsafe fn test_mm_mask_srav_epi16() {
17999        let a = _mm_set1_epi16(8);
18000        let count = _mm_set1_epi16(2);
18001        let r = _mm_mask_srav_epi16(a, 0, a, count);
18002        assert_eq_m128i(r, a);
18003        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18004        let e = _mm_set1_epi16(2);
18005        assert_eq_m128i(r, e);
18006    }
18007
18008    #[simd_test(enable = "avx512bw,avx512vl")]
18009    unsafe fn test_mm_maskz_srav_epi16() {
18010        let a = _mm_set1_epi16(8);
18011        let count = _mm_set1_epi16(2);
18012        let r = _mm_maskz_srav_epi16(0, a, count);
18013        assert_eq_m128i(r, _mm_setzero_si128());
18014        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18015        let e = _mm_set1_epi16(2);
18016        assert_eq_m128i(r, e);
18017    }
18018
18019    #[simd_test(enable = "avx512bw")]
18020    unsafe fn test_mm512_permutex2var_epi16() {
18021        #[rustfmt::skip]
18022        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18023                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18024        #[rustfmt::skip]
18025        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18026                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18027        let b = _mm512_set1_epi16(100);
18028        let r = _mm512_permutex2var_epi16(a, idx, b);
18029        #[rustfmt::skip]
18030        let e = _mm512_set_epi16(
18031            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18032            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18033        );
18034        assert_eq_m512i(r, e);
18035    }
18036
18037    #[simd_test(enable = "avx512bw")]
18038    unsafe fn test_mm512_mask_permutex2var_epi16() {
18039        #[rustfmt::skip]
18040        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18041                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18042        #[rustfmt::skip]
18043        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18044                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18045        let b = _mm512_set1_epi16(100);
18046        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18047        assert_eq_m512i(r, a);
18048        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18049        #[rustfmt::skip]
18050        let e = _mm512_set_epi16(
18051            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18052            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18053        );
18054        assert_eq_m512i(r, e);
18055    }
18056
18057    #[simd_test(enable = "avx512bw")]
18058    unsafe fn test_mm512_maskz_permutex2var_epi16() {
18059        #[rustfmt::skip]
18060        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18061                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18062        #[rustfmt::skip]
18063        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18064                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18065        let b = _mm512_set1_epi16(100);
18066        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18067        assert_eq_m512i(r, _mm512_setzero_si512());
18068        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18069        #[rustfmt::skip]
18070        let e = _mm512_set_epi16(
18071            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18072            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18073        );
18074        assert_eq_m512i(r, e);
18075    }
18076
18077    #[simd_test(enable = "avx512bw")]
18078    unsafe fn test_mm512_mask2_permutex2var_epi16() {
18079        #[rustfmt::skip]
18080        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18081                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18082        #[rustfmt::skip]
18083        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18084                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18085        let b = _mm512_set1_epi16(100);
18086        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
18087        assert_eq_m512i(r, idx);
18088        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
18089        #[rustfmt::skip]
18090        let e = _mm512_set_epi16(
18091            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18092            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18093        );
18094        assert_eq_m512i(r, e);
18095    }
18096
18097    #[simd_test(enable = "avx512bw,avx512vl")]
18098    unsafe fn test_mm256_permutex2var_epi16() {
18099        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18100        #[rustfmt::skip]
18101        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18102        let b = _mm256_set1_epi16(100);
18103        let r = _mm256_permutex2var_epi16(a, idx, b);
18104        let e = _mm256_set_epi16(
18105            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18106        );
18107        assert_eq_m256i(r, e);
18108    }
18109
18110    #[simd_test(enable = "avx512bw,avx512vl")]
18111    unsafe fn test_mm256_mask_permutex2var_epi16() {
18112        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18113        #[rustfmt::skip]
18114        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18115        let b = _mm256_set1_epi16(100);
18116        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
18117        assert_eq_m256i(r, a);
18118        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
18119        let e = _mm256_set_epi16(
18120            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18121        );
18122        assert_eq_m256i(r, e);
18123    }
18124
18125    #[simd_test(enable = "avx512bw,avx512vl")]
18126    unsafe fn test_mm256_maskz_permutex2var_epi16() {
18127        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18128        #[rustfmt::skip]
18129        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18130        let b = _mm256_set1_epi16(100);
18131        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
18132        assert_eq_m256i(r, _mm256_setzero_si256());
18133        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
18134        let e = _mm256_set_epi16(
18135            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18136        );
18137        assert_eq_m256i(r, e);
18138    }
18139
18140    #[simd_test(enable = "avx512bw,avx512vl")]
18141    unsafe fn test_mm256_mask2_permutex2var_epi16() {
18142        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18143        #[rustfmt::skip]
18144        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18145        let b = _mm256_set1_epi16(100);
18146        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
18147        assert_eq_m256i(r, idx);
18148        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
18149        #[rustfmt::skip]
18150        let e = _mm256_set_epi16(
18151            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18152        );
18153        assert_eq_m256i(r, e);
18154    }
18155
18156    #[simd_test(enable = "avx512bw,avx512vl")]
18157    unsafe fn test_mm_permutex2var_epi16() {
18158        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18159        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18160        let b = _mm_set1_epi16(100);
18161        let r = _mm_permutex2var_epi16(a, idx, b);
18162        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18163        assert_eq_m128i(r, e);
18164    }
18165
18166    #[simd_test(enable = "avx512bw,avx512vl")]
18167    unsafe fn test_mm_mask_permutex2var_epi16() {
18168        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18169        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18170        let b = _mm_set1_epi16(100);
18171        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
18172        assert_eq_m128i(r, a);
18173        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
18174        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18175        assert_eq_m128i(r, e);
18176    }
18177
18178    #[simd_test(enable = "avx512bw,avx512vl")]
18179    unsafe fn test_mm_maskz_permutex2var_epi16() {
18180        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18181        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18182        let b = _mm_set1_epi16(100);
18183        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
18184        assert_eq_m128i(r, _mm_setzero_si128());
18185        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
18186        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18187        assert_eq_m128i(r, e);
18188    }
18189
18190    #[simd_test(enable = "avx512bw,avx512vl")]
18191    unsafe fn test_mm_mask2_permutex2var_epi16() {
18192        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18193        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18194        let b = _mm_set1_epi16(100);
18195        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
18196        assert_eq_m128i(r, idx);
18197        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
18198        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18199        assert_eq_m128i(r, e);
18200    }
18201
18202    #[simd_test(enable = "avx512bw")]
18203    unsafe fn test_mm512_permutexvar_epi16() {
18204        let idx = _mm512_set1_epi16(1);
18205        #[rustfmt::skip]
18206        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18207                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18208        let r = _mm512_permutexvar_epi16(idx, a);
18209        let e = _mm512_set1_epi16(30);
18210        assert_eq_m512i(r, e);
18211    }
18212
18213    #[simd_test(enable = "avx512bw")]
18214    unsafe fn test_mm512_mask_permutexvar_epi16() {
18215        let idx = _mm512_set1_epi16(1);
18216        #[rustfmt::skip]
18217        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18218                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18219        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
18220        assert_eq_m512i(r, a);
18221        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
18222        let e = _mm512_set1_epi16(30);
18223        assert_eq_m512i(r, e);
18224    }
18225
18226    #[simd_test(enable = "avx512bw")]
18227    unsafe fn test_mm512_maskz_permutexvar_epi16() {
18228        let idx = _mm512_set1_epi16(1);
18229        #[rustfmt::skip]
18230        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18231                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18232        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
18233        assert_eq_m512i(r, _mm512_setzero_si512());
18234        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
18235        let e = _mm512_set1_epi16(30);
18236        assert_eq_m512i(r, e);
18237    }
18238
18239    #[simd_test(enable = "avx512bw,avx512vl")]
18240    unsafe fn test_mm256_permutexvar_epi16() {
18241        let idx = _mm256_set1_epi16(1);
18242        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18243        let r = _mm256_permutexvar_epi16(idx, a);
18244        let e = _mm256_set1_epi16(14);
18245        assert_eq_m256i(r, e);
18246    }
18247
18248    #[simd_test(enable = "avx512bw,avx512vl")]
18249    unsafe fn test_mm256_mask_permutexvar_epi16() {
18250        let idx = _mm256_set1_epi16(1);
18251        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18252        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
18253        assert_eq_m256i(r, a);
18254        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
18255        let e = _mm256_set1_epi16(14);
18256        assert_eq_m256i(r, e);
18257    }
18258
18259    #[simd_test(enable = "avx512bw,avx512vl")]
18260    unsafe fn test_mm256_maskz_permutexvar_epi16() {
18261        let idx = _mm256_set1_epi16(1);
18262        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18263        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
18264        assert_eq_m256i(r, _mm256_setzero_si256());
18265        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
18266        let e = _mm256_set1_epi16(14);
18267        assert_eq_m256i(r, e);
18268    }
18269
18270    #[simd_test(enable = "avx512bw,avx512vl")]
18271    unsafe fn test_mm_permutexvar_epi16() {
18272        let idx = _mm_set1_epi16(1);
18273        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18274        let r = _mm_permutexvar_epi16(idx, a);
18275        let e = _mm_set1_epi16(6);
18276        assert_eq_m128i(r, e);
18277    }
18278
18279    #[simd_test(enable = "avx512bw,avx512vl")]
18280    unsafe fn test_mm_mask_permutexvar_epi16() {
18281        let idx = _mm_set1_epi16(1);
18282        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18283        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
18284        assert_eq_m128i(r, a);
18285        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
18286        let e = _mm_set1_epi16(6);
18287        assert_eq_m128i(r, e);
18288    }
18289
18290    #[simd_test(enable = "avx512bw,avx512vl")]
18291    unsafe fn test_mm_maskz_permutexvar_epi16() {
18292        let idx = _mm_set1_epi16(1);
18293        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18294        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
18295        assert_eq_m128i(r, _mm_setzero_si128());
18296        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
18297        let e = _mm_set1_epi16(6);
18298        assert_eq_m128i(r, e);
18299    }
18300
18301    #[simd_test(enable = "avx512bw")]
18302    unsafe fn test_mm512_mask_blend_epi16() {
18303        let a = _mm512_set1_epi16(1);
18304        let b = _mm512_set1_epi16(2);
18305        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
18306        #[rustfmt::skip]
18307        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18308                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18309        assert_eq_m512i(r, e);
18310    }
18311
18312    #[simd_test(enable = "avx512bw,avx512vl")]
18313    unsafe fn test_mm256_mask_blend_epi16() {
18314        let a = _mm256_set1_epi16(1);
18315        let b = _mm256_set1_epi16(2);
18316        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
18317        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18318        assert_eq_m256i(r, e);
18319    }
18320
18321    #[simd_test(enable = "avx512bw,avx512vl")]
18322    unsafe fn test_mm_mask_blend_epi16() {
18323        let a = _mm_set1_epi16(1);
18324        let b = _mm_set1_epi16(2);
18325        let r = _mm_mask_blend_epi16(0b11110000, a, b);
18326        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
18327        assert_eq_m128i(r, e);
18328    }
18329
18330    #[simd_test(enable = "avx512bw")]
18331    unsafe fn test_mm512_mask_blend_epi8() {
18332        let a = _mm512_set1_epi8(1);
18333        let b = _mm512_set1_epi8(2);
18334        let r = _mm512_mask_blend_epi8(
18335            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
18336            a,
18337            b,
18338        );
18339        #[rustfmt::skip]
18340        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18341                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18342                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18343                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18344        assert_eq_m512i(r, e);
18345    }
18346
18347    #[simd_test(enable = "avx512bw,avx512vl")]
18348    unsafe fn test_mm256_mask_blend_epi8() {
18349        let a = _mm256_set1_epi8(1);
18350        let b = _mm256_set1_epi8(2);
18351        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
18352        #[rustfmt::skip]
18353        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18354                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18355        assert_eq_m256i(r, e);
18356    }
18357
18358    #[simd_test(enable = "avx512bw,avx512vl")]
18359    unsafe fn test_mm_mask_blend_epi8() {
18360        let a = _mm_set1_epi8(1);
18361        let b = _mm_set1_epi8(2);
18362        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
18363        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18364        assert_eq_m128i(r, e);
18365    }
18366
18367    #[simd_test(enable = "avx512bw")]
18368    unsafe fn test_mm512_broadcastw_epi16() {
18369        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18370        let r = _mm512_broadcastw_epi16(a);
18371        let e = _mm512_set1_epi16(24);
18372        assert_eq_m512i(r, e);
18373    }
18374
18375    #[simd_test(enable = "avx512bw")]
18376    unsafe fn test_mm512_mask_broadcastw_epi16() {
18377        let src = _mm512_set1_epi16(1);
18378        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18379        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
18380        assert_eq_m512i(r, src);
18381        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18382        let e = _mm512_set1_epi16(24);
18383        assert_eq_m512i(r, e);
18384    }
18385
18386    #[simd_test(enable = "avx512bw")]
18387    unsafe fn test_mm512_maskz_broadcastw_epi16() {
18388        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18389        let r = _mm512_maskz_broadcastw_epi16(0, a);
18390        assert_eq_m512i(r, _mm512_setzero_si512());
18391        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
18392        let e = _mm512_set1_epi16(24);
18393        assert_eq_m512i(r, e);
18394    }
18395
18396    #[simd_test(enable = "avx512bw,avx512vl")]
18397    unsafe fn test_mm256_mask_broadcastw_epi16() {
18398        let src = _mm256_set1_epi16(1);
18399        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18400        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
18401        assert_eq_m256i(r, src);
18402        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
18403        let e = _mm256_set1_epi16(24);
18404        assert_eq_m256i(r, e);
18405    }
18406
18407    #[simd_test(enable = "avx512bw,avx512vl")]
18408    unsafe fn test_mm256_maskz_broadcastw_epi16() {
18409        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18410        let r = _mm256_maskz_broadcastw_epi16(0, a);
18411        assert_eq_m256i(r, _mm256_setzero_si256());
18412        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
18413        let e = _mm256_set1_epi16(24);
18414        assert_eq_m256i(r, e);
18415    }
18416
18417    #[simd_test(enable = "avx512bw,avx512vl")]
18418    unsafe fn test_mm_mask_broadcastw_epi16() {
18419        let src = _mm_set1_epi16(1);
18420        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18421        let r = _mm_mask_broadcastw_epi16(src, 0, a);
18422        assert_eq_m128i(r, src);
18423        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
18424        let e = _mm_set1_epi16(24);
18425        assert_eq_m128i(r, e);
18426    }
18427
18428    #[simd_test(enable = "avx512bw,avx512vl")]
18429    unsafe fn test_mm_maskz_broadcastw_epi16() {
18430        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18431        let r = _mm_maskz_broadcastw_epi16(0, a);
18432        assert_eq_m128i(r, _mm_setzero_si128());
18433        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
18434        let e = _mm_set1_epi16(24);
18435        assert_eq_m128i(r, e);
18436    }
18437
18438    #[simd_test(enable = "avx512bw")]
18439    unsafe fn test_mm512_broadcastb_epi8() {
18440        let a = _mm_set_epi8(
18441            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18442        );
18443        let r = _mm512_broadcastb_epi8(a);
18444        let e = _mm512_set1_epi8(32);
18445        assert_eq_m512i(r, e);
18446    }
18447
18448    #[simd_test(enable = "avx512bw")]
18449    unsafe fn test_mm512_mask_broadcastb_epi8() {
18450        let src = _mm512_set1_epi8(1);
18451        let a = _mm_set_epi8(
18452            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18453        );
18454        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
18455        assert_eq_m512i(r, src);
18456        let r = _mm512_mask_broadcastb_epi8(
18457            src,
18458            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18459            a,
18460        );
18461        let e = _mm512_set1_epi8(32);
18462        assert_eq_m512i(r, e);
18463    }
18464
18465    #[simd_test(enable = "avx512bw")]
18466    unsafe fn test_mm512_maskz_broadcastb_epi8() {
18467        let a = _mm_set_epi8(
18468            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18469        );
18470        let r = _mm512_maskz_broadcastb_epi8(0, a);
18471        assert_eq_m512i(r, _mm512_setzero_si512());
18472        let r = _mm512_maskz_broadcastb_epi8(
18473            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18474            a,
18475        );
18476        let e = _mm512_set1_epi8(32);
18477        assert_eq_m512i(r, e);
18478    }
18479
18480    #[simd_test(enable = "avx512bw,avx512vl")]
18481    unsafe fn test_mm256_mask_broadcastb_epi8() {
18482        let src = _mm256_set1_epi8(1);
18483        let a = _mm_set_epi8(
18484            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18485        );
18486        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
18487        assert_eq_m256i(r, src);
18488        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18489        let e = _mm256_set1_epi8(32);
18490        assert_eq_m256i(r, e);
18491    }
18492
18493    #[simd_test(enable = "avx512bw,avx512vl")]
18494    unsafe fn test_mm256_maskz_broadcastb_epi8() {
18495        let a = _mm_set_epi8(
18496            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18497        );
18498        let r = _mm256_maskz_broadcastb_epi8(0, a);
18499        assert_eq_m256i(r, _mm256_setzero_si256());
18500        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
18501        let e = _mm256_set1_epi8(32);
18502        assert_eq_m256i(r, e);
18503    }
18504
18505    #[simd_test(enable = "avx512bw,avx512vl")]
18506    unsafe fn test_mm_mask_broadcastb_epi8() {
18507        let src = _mm_set1_epi8(1);
18508        let a = _mm_set_epi8(
18509            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18510        );
18511        let r = _mm_mask_broadcastb_epi8(src, 0, a);
18512        assert_eq_m128i(r, src);
18513        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
18514        let e = _mm_set1_epi8(32);
18515        assert_eq_m128i(r, e);
18516    }
18517
18518    #[simd_test(enable = "avx512bw,avx512vl")]
18519    unsafe fn test_mm_maskz_broadcastb_epi8() {
18520        let a = _mm_set_epi8(
18521            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18522        );
18523        let r = _mm_maskz_broadcastb_epi8(0, a);
18524        assert_eq_m128i(r, _mm_setzero_si128());
18525        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
18526        let e = _mm_set1_epi8(32);
18527        assert_eq_m128i(r, e);
18528    }
18529
18530    #[simd_test(enable = "avx512bw")]
18531    unsafe fn test_mm512_unpackhi_epi16() {
18532        #[rustfmt::skip]
18533        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18534                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18535        #[rustfmt::skip]
18536        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18537                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18538        let r = _mm512_unpackhi_epi16(a, b);
18539        #[rustfmt::skip]
18540        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18541                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18542        assert_eq_m512i(r, e);
18543    }
18544
18545    #[simd_test(enable = "avx512bw")]
18546    unsafe fn test_mm512_mask_unpackhi_epi16() {
18547        #[rustfmt::skip]
18548        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18549                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18550        #[rustfmt::skip]
18551        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18552                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18553        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
18554        assert_eq_m512i(r, a);
18555        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18556        #[rustfmt::skip]
18557        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18558                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18559        assert_eq_m512i(r, e);
18560    }
18561
18562    #[simd_test(enable = "avx512bw")]
18563    unsafe fn test_mm512_maskz_unpackhi_epi16() {
18564        #[rustfmt::skip]
18565        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18566                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18567        #[rustfmt::skip]
18568        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18569                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18570        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
18571        assert_eq_m512i(r, _mm512_setzero_si512());
18572        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
18573        #[rustfmt::skip]
18574        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18575                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18576        assert_eq_m512i(r, e);
18577    }
18578
18579    #[simd_test(enable = "avx512bw,avx512vl")]
18580    unsafe fn test_mm256_mask_unpackhi_epi16() {
18581        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18582        let b = _mm256_set_epi16(
18583            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18584        );
18585        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
18586        assert_eq_m256i(r, a);
18587        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
18588        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18589        assert_eq_m256i(r, e);
18590    }
18591
18592    #[simd_test(enable = "avx512bw,avx512vl")]
18593    unsafe fn test_mm256_maskz_unpackhi_epi16() {
18594        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18595        let b = _mm256_set_epi16(
18596            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18597        );
18598        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
18599        assert_eq_m256i(r, _mm256_setzero_si256());
18600        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
18601        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18602        assert_eq_m256i(r, e);
18603    }
18604
18605    #[simd_test(enable = "avx512bw,avx512vl")]
18606    unsafe fn test_mm_mask_unpackhi_epi16() {
18607        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18608        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18609        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
18610        assert_eq_m128i(r, a);
18611        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
18612        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18613        assert_eq_m128i(r, e);
18614    }
18615
18616    #[simd_test(enable = "avx512bw,avx512vl")]
18617    unsafe fn test_mm_maskz_unpackhi_epi16() {
18618        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18619        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18620        let r = _mm_maskz_unpackhi_epi16(0, a, b);
18621        assert_eq_m128i(r, _mm_setzero_si128());
18622        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
18623        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18624        assert_eq_m128i(r, e);
18625    }
18626
18627    #[simd_test(enable = "avx512bw")]
18628    unsafe fn test_mm512_unpackhi_epi8() {
18629        #[rustfmt::skip]
18630        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18631                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18632                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18633                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18634        #[rustfmt::skip]
18635        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18636                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18637                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18638                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18639        let r = _mm512_unpackhi_epi8(a, b);
18640        #[rustfmt::skip]
18641        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18642                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18643                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18644                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18645        assert_eq_m512i(r, e);
18646    }
18647
18648    #[simd_test(enable = "avx512bw")]
18649    unsafe fn test_mm512_mask_unpackhi_epi8() {
18650        #[rustfmt::skip]
18651        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18652                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18653                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18654                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18655        #[rustfmt::skip]
18656        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18657                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18658                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18659                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18660        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
18661        assert_eq_m512i(r, a);
18662        let r = _mm512_mask_unpackhi_epi8(
18663            a,
18664            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18665            a,
18666            b,
18667        );
18668        #[rustfmt::skip]
18669        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18670                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18671                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18672                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18673        assert_eq_m512i(r, e);
18674    }
18675
18676    #[simd_test(enable = "avx512bw")]
18677    unsafe fn test_mm512_maskz_unpackhi_epi8() {
18678        #[rustfmt::skip]
18679        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18680                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18681                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18682                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18683        #[rustfmt::skip]
18684        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18685                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18686                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18687                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18688        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
18689        assert_eq_m512i(r, _mm512_setzero_si512());
18690        let r = _mm512_maskz_unpackhi_epi8(
18691            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18692            a,
18693            b,
18694        );
18695        #[rustfmt::skip]
18696        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18697                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18698                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18699                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18700        assert_eq_m512i(r, e);
18701    }
18702
18703    #[simd_test(enable = "avx512bw,avx512vl")]
18704    unsafe fn test_mm256_mask_unpackhi_epi8() {
18705        #[rustfmt::skip]
18706        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18707                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18708        #[rustfmt::skip]
18709        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18710                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18711        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
18712        assert_eq_m256i(r, a);
18713        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18714        #[rustfmt::skip]
18715        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18716                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18717        assert_eq_m256i(r, e);
18718    }
18719
18720    #[simd_test(enable = "avx512bw,avx512vl")]
18721    unsafe fn test_mm256_maskz_unpackhi_epi8() {
18722        #[rustfmt::skip]
18723        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18724                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18725        #[rustfmt::skip]
18726        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18727                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18728        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
18729        assert_eq_m256i(r, _mm256_setzero_si256());
18730        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
18731        #[rustfmt::skip]
18732        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18733                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18734        assert_eq_m256i(r, e);
18735    }
18736
18737    #[simd_test(enable = "avx512bw,avx512vl")]
18738    unsafe fn test_mm_mask_unpackhi_epi8() {
18739        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18740        let b = _mm_set_epi8(
18741            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18742        );
18743        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
18744        assert_eq_m128i(r, a);
18745        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
18746        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18747        assert_eq_m128i(r, e);
18748    }
18749
18750    #[simd_test(enable = "avx512bw,avx512vl")]
18751    unsafe fn test_mm_maskz_unpackhi_epi8() {
18752        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18753        let b = _mm_set_epi8(
18754            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18755        );
18756        let r = _mm_maskz_unpackhi_epi8(0, a, b);
18757        assert_eq_m128i(r, _mm_setzero_si128());
18758        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
18759        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18760        assert_eq_m128i(r, e);
18761    }
18762
18763    #[simd_test(enable = "avx512bw")]
18764    unsafe fn test_mm512_unpacklo_epi16() {
18765        #[rustfmt::skip]
18766        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18767                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18768        #[rustfmt::skip]
18769        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18770                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18771        let r = _mm512_unpacklo_epi16(a, b);
18772        #[rustfmt::skip]
18773        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18774                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18775        assert_eq_m512i(r, e);
18776    }
18777
18778    #[simd_test(enable = "avx512bw")]
18779    unsafe fn test_mm512_mask_unpacklo_epi16() {
18780        #[rustfmt::skip]
18781        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18782                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18783        #[rustfmt::skip]
18784        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18785                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18786        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
18787        assert_eq_m512i(r, a);
18788        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18789        #[rustfmt::skip]
18790        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18791                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18792        assert_eq_m512i(r, e);
18793    }
18794
18795    #[simd_test(enable = "avx512bw")]
18796    unsafe fn test_mm512_maskz_unpacklo_epi16() {
18797        #[rustfmt::skip]
18798        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18799                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18800        #[rustfmt::skip]
18801        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18802                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18803        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
18804        assert_eq_m512i(r, _mm512_setzero_si512());
18805        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
18806        #[rustfmt::skip]
18807        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18808                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18809        assert_eq_m512i(r, e);
18810    }
18811
18812    #[simd_test(enable = "avx512bw,avx512vl")]
18813    unsafe fn test_mm256_mask_unpacklo_epi16() {
18814        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18815        let b = _mm256_set_epi16(
18816            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18817        );
18818        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
18819        assert_eq_m256i(r, a);
18820        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
18821        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18822        assert_eq_m256i(r, e);
18823    }
18824
18825    #[simd_test(enable = "avx512bw,avx512vl")]
18826    unsafe fn test_mm256_maskz_unpacklo_epi16() {
18827        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18828        let b = _mm256_set_epi16(
18829            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18830        );
18831        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
18832        assert_eq_m256i(r, _mm256_setzero_si256());
18833        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
18834        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18835        assert_eq_m256i(r, e);
18836    }
18837
18838    #[simd_test(enable = "avx512bw,avx512vl")]
18839    unsafe fn test_mm_mask_unpacklo_epi16() {
18840        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18841        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18842        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
18843        assert_eq_m128i(r, a);
18844        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
18845        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18846        assert_eq_m128i(r, e);
18847    }
18848
18849    #[simd_test(enable = "avx512bw,avx512vl")]
18850    unsafe fn test_mm_maskz_unpacklo_epi16() {
18851        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18852        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18853        let r = _mm_maskz_unpacklo_epi16(0, a, b);
18854        assert_eq_m128i(r, _mm_setzero_si128());
18855        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
18856        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18857        assert_eq_m128i(r, e);
18858    }
18859
18860    #[simd_test(enable = "avx512bw")]
18861    unsafe fn test_mm512_unpacklo_epi8() {
18862        #[rustfmt::skip]
18863        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18864                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18865                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18866                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18867        #[rustfmt::skip]
18868        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18869                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18870                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18871                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18872        let r = _mm512_unpacklo_epi8(a, b);
18873        #[rustfmt::skip]
18874        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18875                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18876                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18877                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18878        assert_eq_m512i(r, e);
18879    }
18880
18881    #[simd_test(enable = "avx512bw")]
18882    unsafe fn test_mm512_mask_unpacklo_epi8() {
18883        #[rustfmt::skip]
18884        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18885                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18886                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18887                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18888        #[rustfmt::skip]
18889        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18890                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18891                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18892                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18893        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
18894        assert_eq_m512i(r, a);
18895        let r = _mm512_mask_unpacklo_epi8(
18896            a,
18897            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18898            a,
18899            b,
18900        );
18901        #[rustfmt::skip]
18902        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18903                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18904                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18905                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18906        assert_eq_m512i(r, e);
18907    }
18908
18909    #[simd_test(enable = "avx512bw")]
18910    unsafe fn test_mm512_maskz_unpacklo_epi8() {
18911        #[rustfmt::skip]
18912        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18913                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18914                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18915                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18916        #[rustfmt::skip]
18917        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18918                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18919                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18920                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18921        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
18922        assert_eq_m512i(r, _mm512_setzero_si512());
18923        let r = _mm512_maskz_unpacklo_epi8(
18924            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18925            a,
18926            b,
18927        );
18928        #[rustfmt::skip]
18929        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18930                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18931                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18932                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18933        assert_eq_m512i(r, e);
18934    }
18935
18936    #[simd_test(enable = "avx512bw,avx512vl")]
18937    unsafe fn test_mm256_mask_unpacklo_epi8() {
18938        #[rustfmt::skip]
18939        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18940                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18941        #[rustfmt::skip]
18942        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18943                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18944        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
18945        assert_eq_m256i(r, a);
18946        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18947        #[rustfmt::skip]
18948        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18949                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18950        assert_eq_m256i(r, e);
18951    }
18952
18953    #[simd_test(enable = "avx512bw,avx512vl")]
18954    unsafe fn test_mm256_maskz_unpacklo_epi8() {
18955        #[rustfmt::skip]
18956        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18957                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18958        #[rustfmt::skip]
18959        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18960                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18961        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
18962        assert_eq_m256i(r, _mm256_setzero_si256());
18963        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
18964        #[rustfmt::skip]
18965        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18966                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18967        assert_eq_m256i(r, e);
18968    }
18969
18970    #[simd_test(enable = "avx512bw,avx512vl")]
18971    unsafe fn test_mm_mask_unpacklo_epi8() {
18972        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18973        let b = _mm_set_epi8(
18974            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18975        );
18976        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
18977        assert_eq_m128i(r, a);
18978        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
18979        let e = _mm_set_epi8(
18980            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
18981        );
18982        assert_eq_m128i(r, e);
18983    }
18984
18985    #[simd_test(enable = "avx512bw,avx512vl")]
18986    unsafe fn test_mm_maskz_unpacklo_epi8() {
18987        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18988        let b = _mm_set_epi8(
18989            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18990        );
18991        let r = _mm_maskz_unpacklo_epi8(0, a, b);
18992        assert_eq_m128i(r, _mm_setzero_si128());
18993        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
18994        let e = _mm_set_epi8(
18995            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
18996        );
18997        assert_eq_m128i(r, e);
18998    }
18999
19000    #[simd_test(enable = "avx512bw")]
19001    unsafe fn test_mm512_mask_mov_epi16() {
19002        let src = _mm512_set1_epi16(1);
19003        let a = _mm512_set1_epi16(2);
19004        let r = _mm512_mask_mov_epi16(src, 0, a);
19005        assert_eq_m512i(r, src);
19006        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19007        assert_eq_m512i(r, a);
19008    }
19009
19010    #[simd_test(enable = "avx512bw")]
19011    unsafe fn test_mm512_maskz_mov_epi16() {
19012        let a = _mm512_set1_epi16(2);
19013        let r = _mm512_maskz_mov_epi16(0, a);
19014        assert_eq_m512i(r, _mm512_setzero_si512());
19015        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19016        assert_eq_m512i(r, a);
19017    }
19018
19019    #[simd_test(enable = "avx512bw,avx512vl")]
19020    unsafe fn test_mm256_mask_mov_epi16() {
19021        let src = _mm256_set1_epi16(1);
19022        let a = _mm256_set1_epi16(2);
19023        let r = _mm256_mask_mov_epi16(src, 0, a);
19024        assert_eq_m256i(r, src);
19025        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19026        assert_eq_m256i(r, a);
19027    }
19028
19029    #[simd_test(enable = "avx512bw,avx512vl")]
19030    unsafe fn test_mm256_maskz_mov_epi16() {
19031        let a = _mm256_set1_epi16(2);
19032        let r = _mm256_maskz_mov_epi16(0, a);
19033        assert_eq_m256i(r, _mm256_setzero_si256());
19034        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19035        assert_eq_m256i(r, a);
19036    }
19037
19038    #[simd_test(enable = "avx512bw,avx512vl")]
19039    unsafe fn test_mm_mask_mov_epi16() {
19040        let src = _mm_set1_epi16(1);
19041        let a = _mm_set1_epi16(2);
19042        let r = _mm_mask_mov_epi16(src, 0, a);
19043        assert_eq_m128i(r, src);
19044        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19045        assert_eq_m128i(r, a);
19046    }
19047
19048    #[simd_test(enable = "avx512bw,avx512vl")]
19049    unsafe fn test_mm_maskz_mov_epi16() {
19050        let a = _mm_set1_epi16(2);
19051        let r = _mm_maskz_mov_epi16(0, a);
19052        assert_eq_m128i(r, _mm_setzero_si128());
19053        let r = _mm_maskz_mov_epi16(0b11111111, a);
19054        assert_eq_m128i(r, a);
19055    }
19056
19057    #[simd_test(enable = "avx512bw")]
19058    unsafe fn test_mm512_mask_mov_epi8() {
19059        let src = _mm512_set1_epi8(1);
19060        let a = _mm512_set1_epi8(2);
19061        let r = _mm512_mask_mov_epi8(src, 0, a);
19062        assert_eq_m512i(r, src);
19063        let r = _mm512_mask_mov_epi8(
19064            src,
19065            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19066            a,
19067        );
19068        assert_eq_m512i(r, a);
19069    }
19070
19071    #[simd_test(enable = "avx512bw")]
19072    unsafe fn test_mm512_maskz_mov_epi8() {
19073        let a = _mm512_set1_epi8(2);
19074        let r = _mm512_maskz_mov_epi8(0, a);
19075        assert_eq_m512i(r, _mm512_setzero_si512());
19076        let r = _mm512_maskz_mov_epi8(
19077            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19078            a,
19079        );
19080        assert_eq_m512i(r, a);
19081    }
19082
19083    #[simd_test(enable = "avx512bw,avx512vl")]
19084    unsafe fn test_mm256_mask_mov_epi8() {
19085        let src = _mm256_set1_epi8(1);
19086        let a = _mm256_set1_epi8(2);
19087        let r = _mm256_mask_mov_epi8(src, 0, a);
19088        assert_eq_m256i(r, src);
19089        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19090        assert_eq_m256i(r, a);
19091    }
19092
19093    #[simd_test(enable = "avx512bw,avx512vl")]
19094    unsafe fn test_mm256_maskz_mov_epi8() {
19095        let a = _mm256_set1_epi8(2);
19096        let r = _mm256_maskz_mov_epi8(0, a);
19097        assert_eq_m256i(r, _mm256_setzero_si256());
19098        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
19099        assert_eq_m256i(r, a);
19100    }
19101
19102    #[simd_test(enable = "avx512bw,avx512vl")]
19103    unsafe fn test_mm_mask_mov_epi8() {
19104        let src = _mm_set1_epi8(1);
19105        let a = _mm_set1_epi8(2);
19106        let r = _mm_mask_mov_epi8(src, 0, a);
19107        assert_eq_m128i(r, src);
19108        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
19109        assert_eq_m128i(r, a);
19110    }
19111
19112    #[simd_test(enable = "avx512bw,avx512vl")]
19113    unsafe fn test_mm_maskz_mov_epi8() {
19114        let a = _mm_set1_epi8(2);
19115        let r = _mm_maskz_mov_epi8(0, a);
19116        assert_eq_m128i(r, _mm_setzero_si128());
19117        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
19118        assert_eq_m128i(r, a);
19119    }
19120
19121    #[simd_test(enable = "avx512bw")]
19122    unsafe fn test_mm512_mask_set1_epi16() {
19123        let src = _mm512_set1_epi16(2);
19124        let a: i16 = 11;
19125        let r = _mm512_mask_set1_epi16(src, 0, a);
19126        assert_eq_m512i(r, src);
19127        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19128        let e = _mm512_set1_epi16(11);
19129        assert_eq_m512i(r, e);
19130    }
19131
19132    #[simd_test(enable = "avx512bw")]
19133    unsafe fn test_mm512_maskz_set1_epi16() {
19134        let a: i16 = 11;
19135        let r = _mm512_maskz_set1_epi16(0, a);
19136        assert_eq_m512i(r, _mm512_setzero_si512());
19137        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
19138        let e = _mm512_set1_epi16(11);
19139        assert_eq_m512i(r, e);
19140    }
19141
19142    #[simd_test(enable = "avx512bw,avx512vl")]
19143    unsafe fn test_mm256_mask_set1_epi16() {
19144        let src = _mm256_set1_epi16(2);
19145        let a: i16 = 11;
19146        let r = _mm256_mask_set1_epi16(src, 0, a);
19147        assert_eq_m256i(r, src);
19148        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
19149        let e = _mm256_set1_epi16(11);
19150        assert_eq_m256i(r, e);
19151    }
19152
19153    #[simd_test(enable = "avx512bw,avx512vl")]
19154    unsafe fn test_mm256_maskz_set1_epi16() {
19155        let a: i16 = 11;
19156        let r = _mm256_maskz_set1_epi16(0, a);
19157        assert_eq_m256i(r, _mm256_setzero_si256());
19158        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
19159        let e = _mm256_set1_epi16(11);
19160        assert_eq_m256i(r, e);
19161    }
19162
19163    #[simd_test(enable = "avx512bw,avx512vl")]
19164    unsafe fn test_mm_mask_set1_epi16() {
19165        let src = _mm_set1_epi16(2);
19166        let a: i16 = 11;
19167        let r = _mm_mask_set1_epi16(src, 0, a);
19168        assert_eq_m128i(r, src);
19169        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
19170        let e = _mm_set1_epi16(11);
19171        assert_eq_m128i(r, e);
19172    }
19173
19174    #[simd_test(enable = "avx512bw,avx512vl")]
19175    unsafe fn test_mm_maskz_set1_epi16() {
19176        let a: i16 = 11;
19177        let r = _mm_maskz_set1_epi16(0, a);
19178        assert_eq_m128i(r, _mm_setzero_si128());
19179        let r = _mm_maskz_set1_epi16(0b11111111, a);
19180        let e = _mm_set1_epi16(11);
19181        assert_eq_m128i(r, e);
19182    }
19183
19184    #[simd_test(enable = "avx512bw")]
19185    unsafe fn test_mm512_mask_set1_epi8() {
19186        let src = _mm512_set1_epi8(2);
19187        let a: i8 = 11;
19188        let r = _mm512_mask_set1_epi8(src, 0, a);
19189        assert_eq_m512i(r, src);
19190        let r = _mm512_mask_set1_epi8(
19191            src,
19192            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19193            a,
19194        );
19195        let e = _mm512_set1_epi8(11);
19196        assert_eq_m512i(r, e);
19197    }
19198
19199    #[simd_test(enable = "avx512bw")]
19200    unsafe fn test_mm512_maskz_set1_epi8() {
19201        let a: i8 = 11;
19202        let r = _mm512_maskz_set1_epi8(0, a);
19203        assert_eq_m512i(r, _mm512_setzero_si512());
19204        let r = _mm512_maskz_set1_epi8(
19205            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19206            a,
19207        );
19208        let e = _mm512_set1_epi8(11);
19209        assert_eq_m512i(r, e);
19210    }
19211
19212    #[simd_test(enable = "avx512bw,avx512vl")]
19213    unsafe fn test_mm256_mask_set1_epi8() {
19214        let src = _mm256_set1_epi8(2);
19215        let a: i8 = 11;
19216        let r = _mm256_mask_set1_epi8(src, 0, a);
19217        assert_eq_m256i(r, src);
19218        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19219        let e = _mm256_set1_epi8(11);
19220        assert_eq_m256i(r, e);
19221    }
19222
19223    #[simd_test(enable = "avx512bw,avx512vl")]
19224    unsafe fn test_mm256_maskz_set1_epi8() {
19225        let a: i8 = 11;
19226        let r = _mm256_maskz_set1_epi8(0, a);
19227        assert_eq_m256i(r, _mm256_setzero_si256());
19228        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
19229        let e = _mm256_set1_epi8(11);
19230        assert_eq_m256i(r, e);
19231    }
19232
19233    #[simd_test(enable = "avx512bw,avx512vl")]
19234    unsafe fn test_mm_mask_set1_epi8() {
19235        let src = _mm_set1_epi8(2);
19236        let a: i8 = 11;
19237        let r = _mm_mask_set1_epi8(src, 0, a);
19238        assert_eq_m128i(r, src);
19239        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
19240        let e = _mm_set1_epi8(11);
19241        assert_eq_m128i(r, e);
19242    }
19243
19244    #[simd_test(enable = "avx512bw,avx512vl")]
19245    unsafe fn test_mm_maskz_set1_epi8() {
19246        let a: i8 = 11;
19247        let r = _mm_maskz_set1_epi8(0, a);
19248        assert_eq_m128i(r, _mm_setzero_si128());
19249        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
19250        let e = _mm_set1_epi8(11);
19251        assert_eq_m128i(r, e);
19252    }
19253
19254    #[simd_test(enable = "avx512bw")]
19255    unsafe fn test_mm512_shufflelo_epi16() {
19256        #[rustfmt::skip]
19257        let a = _mm512_set_epi16(
19258            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19259            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19260        );
19261        #[rustfmt::skip]
19262        let e = _mm512_set_epi16(
19263            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19264            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19265        );
19266        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
19267        assert_eq_m512i(r, e);
19268    }
19269
19270    #[simd_test(enable = "avx512bw")]
19271    unsafe fn test_mm512_mask_shufflelo_epi16() {
19272        #[rustfmt::skip]
19273        let a = _mm512_set_epi16(
19274            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19275            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19276        );
19277        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19278        assert_eq_m512i(r, a);
19279        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
19280            a,
19281            0b11111111_11111111_11111111_11111111,
19282            a,
19283        );
19284        #[rustfmt::skip]
19285        let e = _mm512_set_epi16(
19286            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19287            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19288        );
19289        assert_eq_m512i(r, e);
19290    }
19291
19292    #[simd_test(enable = "avx512bw")]
19293    unsafe fn test_mm512_maskz_shufflelo_epi16() {
19294        #[rustfmt::skip]
19295        let a = _mm512_set_epi16(
19296            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19297            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19298        );
19299        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19300        assert_eq_m512i(r, _mm512_setzero_si512());
19301        let r =
19302            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19303        #[rustfmt::skip]
19304        let e = _mm512_set_epi16(
19305            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19306            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19307        );
19308        assert_eq_m512i(r, e);
19309    }
19310
19311    #[simd_test(enable = "avx512bw,avx512vl")]
19312    unsafe fn test_mm256_mask_shufflelo_epi16() {
19313        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19314        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19315        assert_eq_m256i(r, a);
19316        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19317        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19318        assert_eq_m256i(r, e);
19319    }
19320
19321    #[simd_test(enable = "avx512bw,avx512vl")]
19322    unsafe fn test_mm256_maskz_shufflelo_epi16() {
19323        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19324        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19325        assert_eq_m256i(r, _mm256_setzero_si256());
19326        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19327        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19328        assert_eq_m256i(r, e);
19329    }
19330
19331    #[simd_test(enable = "avx512bw,avx512vl")]
19332    unsafe fn test_mm_mask_shufflelo_epi16() {
19333        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19334        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19335        assert_eq_m128i(r, a);
19336        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19337        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19338        assert_eq_m128i(r, e);
19339    }
19340
19341    #[simd_test(enable = "avx512bw,avx512vl")]
19342    unsafe fn test_mm_maskz_shufflelo_epi16() {
19343        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19344        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19345        assert_eq_m128i(r, _mm_setzero_si128());
19346        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
19347        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19348        assert_eq_m128i(r, e);
19349    }
19350
19351    #[simd_test(enable = "avx512bw")]
19352    unsafe fn test_mm512_shufflehi_epi16() {
19353        #[rustfmt::skip]
19354        let a = _mm512_set_epi16(
19355            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19356            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19357        );
19358        #[rustfmt::skip]
19359        let e = _mm512_set_epi16(
19360            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19361            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19362        );
19363        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
19364        assert_eq_m512i(r, e);
19365    }
19366
19367    #[simd_test(enable = "avx512bw")]
19368    unsafe fn test_mm512_mask_shufflehi_epi16() {
19369        #[rustfmt::skip]
19370        let a = _mm512_set_epi16(
19371            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19372            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19373        );
19374        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19375        assert_eq_m512i(r, a);
19376        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
19377            a,
19378            0b11111111_11111111_11111111_11111111,
19379            a,
19380        );
19381        #[rustfmt::skip]
19382        let e = _mm512_set_epi16(
19383            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19384            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19385        );
19386        assert_eq_m512i(r, e);
19387    }
19388
19389    #[simd_test(enable = "avx512bw")]
19390    unsafe fn test_mm512_maskz_shufflehi_epi16() {
19391        #[rustfmt::skip]
19392        let a = _mm512_set_epi16(
19393            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19394            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19395        );
19396        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19397        assert_eq_m512i(r, _mm512_setzero_si512());
19398        let r =
19399            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19400        #[rustfmt::skip]
19401        let e = _mm512_set_epi16(
19402            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19403            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19404        );
19405        assert_eq_m512i(r, e);
19406    }
19407
19408    #[simd_test(enable = "avx512bw,avx512vl")]
19409    unsafe fn test_mm256_mask_shufflehi_epi16() {
19410        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19411        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19412        assert_eq_m256i(r, a);
19413        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19414        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19415        assert_eq_m256i(r, e);
19416    }
19417
19418    #[simd_test(enable = "avx512bw,avx512vl")]
19419    unsafe fn test_mm256_maskz_shufflehi_epi16() {
19420        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19421        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19422        assert_eq_m256i(r, _mm256_setzero_si256());
19423        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19424        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19425        assert_eq_m256i(r, e);
19426    }
19427
19428    #[simd_test(enable = "avx512bw,avx512vl")]
19429    unsafe fn test_mm_mask_shufflehi_epi16() {
19430        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19431        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19432        assert_eq_m128i(r, a);
19433        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19434        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19435        assert_eq_m128i(r, e);
19436    }
19437
19438    #[simd_test(enable = "avx512bw,avx512vl")]
19439    unsafe fn test_mm_maskz_shufflehi_epi16() {
19440        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19441        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19442        assert_eq_m128i(r, _mm_setzero_si128());
19443        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
19444        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19445        assert_eq_m128i(r, e);
19446    }
19447
19448    #[simd_test(enable = "avx512bw")]
19449    unsafe fn test_mm512_shuffle_epi8() {
19450        #[rustfmt::skip]
19451        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19452                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19453                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19454                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19455        let b = _mm512_set1_epi8(1);
19456        let r = _mm512_shuffle_epi8(a, b);
19457        #[rustfmt::skip]
19458        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19459                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19460                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19461                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19462        assert_eq_m512i(r, e);
19463    }
19464
19465    #[simd_test(enable = "avx512bw")]
19466    unsafe fn test_mm512_mask_shuffle_epi8() {
19467        #[rustfmt::skip]
19468        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19469                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19470                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19471                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19472        let b = _mm512_set1_epi8(1);
19473        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
19474        assert_eq_m512i(r, a);
19475        let r = _mm512_mask_shuffle_epi8(
19476            a,
19477            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19478            a,
19479            b,
19480        );
19481        #[rustfmt::skip]
19482        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19483                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19484                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19485                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19486        assert_eq_m512i(r, e);
19487    }
19488
19489    #[simd_test(enable = "avx512bw")]
19490    unsafe fn test_mm512_maskz_shuffle_epi8() {
19491        #[rustfmt::skip]
19492        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19493                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19494                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19495                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19496        let b = _mm512_set1_epi8(1);
19497        let r = _mm512_maskz_shuffle_epi8(0, a, b);
19498        assert_eq_m512i(r, _mm512_setzero_si512());
19499        let r = _mm512_maskz_shuffle_epi8(
19500            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19501            a,
19502            b,
19503        );
19504        #[rustfmt::skip]
19505        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19506                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19507                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19508                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19509        assert_eq_m512i(r, e);
19510    }
19511
19512    #[simd_test(enable = "avx512bw,avx512vl")]
19513    unsafe fn test_mm256_mask_shuffle_epi8() {
19514        #[rustfmt::skip]
19515        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19516                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19517        let b = _mm256_set1_epi8(1);
19518        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
19519        assert_eq_m256i(r, a);
19520        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19521        #[rustfmt::skip]
19522        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19523                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19524        assert_eq_m256i(r, e);
19525    }
19526
19527    #[simd_test(enable = "avx512bw,avx512vl")]
19528    unsafe fn test_mm256_maskz_shuffle_epi8() {
19529        #[rustfmt::skip]
19530        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19531                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19532        let b = _mm256_set1_epi8(1);
19533        let r = _mm256_maskz_shuffle_epi8(0, a, b);
19534        assert_eq_m256i(r, _mm256_setzero_si256());
19535        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
19536        #[rustfmt::skip]
19537        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19538                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19539        assert_eq_m256i(r, e);
19540    }
19541
19542    #[simd_test(enable = "avx512bw,avx512vl")]
19543    unsafe fn test_mm_mask_shuffle_epi8() {
19544        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19545        let b = _mm_set1_epi8(1);
19546        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
19547        assert_eq_m128i(r, a);
19548        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
19549        let e = _mm_set_epi8(
19550            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19551        );
19552        assert_eq_m128i(r, e);
19553    }
19554
19555    #[simd_test(enable = "avx512bw,avx512vl")]
19556    unsafe fn test_mm_maskz_shuffle_epi8() {
19557        #[rustfmt::skip]
19558        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
19559        let b = _mm_set1_epi8(1);
19560        let r = _mm_maskz_shuffle_epi8(0, a, b);
19561        assert_eq_m128i(r, _mm_setzero_si128());
19562        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
19563        let e = _mm_set_epi8(
19564            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19565        );
19566        assert_eq_m128i(r, e);
19567    }
19568
19569    #[simd_test(enable = "avx512bw")]
19570    unsafe fn test_mm512_test_epi16_mask() {
19571        let a = _mm512_set1_epi16(1 << 0);
19572        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19573        let r = _mm512_test_epi16_mask(a, b);
19574        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19575        assert_eq!(r, e);
19576    }
19577
19578    #[simd_test(enable = "avx512bw")]
19579    unsafe fn test_mm512_mask_test_epi16_mask() {
19580        let a = _mm512_set1_epi16(1 << 0);
19581        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19582        let r = _mm512_mask_test_epi16_mask(0, a, b);
19583        assert_eq!(r, 0);
19584        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19585        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19586        assert_eq!(r, e);
19587    }
19588
19589    #[simd_test(enable = "avx512bw,avx512vl")]
19590    unsafe fn test_mm256_test_epi16_mask() {
19591        let a = _mm256_set1_epi16(1 << 0);
19592        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19593        let r = _mm256_test_epi16_mask(a, b);
19594        let e: __mmask16 = 0b11111111_11111111;
19595        assert_eq!(r, e);
19596    }
19597
19598    #[simd_test(enable = "avx512bw,avx512vl")]
19599    unsafe fn test_mm256_mask_test_epi16_mask() {
19600        let a = _mm256_set1_epi16(1 << 0);
19601        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19602        let r = _mm256_mask_test_epi16_mask(0, a, b);
19603        assert_eq!(r, 0);
19604        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
19605        let e: __mmask16 = 0b11111111_11111111;
19606        assert_eq!(r, e);
19607    }
19608
19609    #[simd_test(enable = "avx512bw,avx512vl")]
19610    unsafe fn test_mm_test_epi16_mask() {
19611        let a = _mm_set1_epi16(1 << 0);
19612        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19613        let r = _mm_test_epi16_mask(a, b);
19614        let e: __mmask8 = 0b11111111;
19615        assert_eq!(r, e);
19616    }
19617
19618    #[simd_test(enable = "avx512bw,avx512vl")]
19619    unsafe fn test_mm_mask_test_epi16_mask() {
19620        let a = _mm_set1_epi16(1 << 0);
19621        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19622        let r = _mm_mask_test_epi16_mask(0, a, b);
19623        assert_eq!(r, 0);
19624        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
19625        let e: __mmask8 = 0b11111111;
19626        assert_eq!(r, e);
19627    }
19628
19629    #[simd_test(enable = "avx512bw")]
19630    unsafe fn test_mm512_test_epi8_mask() {
19631        let a = _mm512_set1_epi8(1 << 0);
19632        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19633        let r = _mm512_test_epi8_mask(a, b);
19634        let e: __mmask64 =
19635            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19636        assert_eq!(r, e);
19637    }
19638
19639    #[simd_test(enable = "avx512bw")]
19640    unsafe fn test_mm512_mask_test_epi8_mask() {
19641        let a = _mm512_set1_epi8(1 << 0);
19642        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19643        let r = _mm512_mask_test_epi8_mask(0, a, b);
19644        assert_eq!(r, 0);
19645        let r = _mm512_mask_test_epi8_mask(
19646            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19647            a,
19648            b,
19649        );
19650        let e: __mmask64 =
19651            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19652        assert_eq!(r, e);
19653    }
19654
19655    #[simd_test(enable = "avx512bw,avx512vl")]
19656    unsafe fn test_mm256_test_epi8_mask() {
19657        let a = _mm256_set1_epi8(1 << 0);
19658        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19659        let r = _mm256_test_epi8_mask(a, b);
19660        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19661        assert_eq!(r, e);
19662    }
19663
19664    #[simd_test(enable = "avx512bw,avx512vl")]
19665    unsafe fn test_mm256_mask_test_epi8_mask() {
19666        let a = _mm256_set1_epi8(1 << 0);
19667        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19668        let r = _mm256_mask_test_epi8_mask(0, a, b);
19669        assert_eq!(r, 0);
19670        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19671        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19672        assert_eq!(r, e);
19673    }
19674
19675    #[simd_test(enable = "avx512bw,avx512vl")]
19676    unsafe fn test_mm_test_epi8_mask() {
19677        let a = _mm_set1_epi8(1 << 0);
19678        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19679        let r = _mm_test_epi8_mask(a, b);
19680        let e: __mmask16 = 0b11111111_11111111;
19681        assert_eq!(r, e);
19682    }
19683
19684    #[simd_test(enable = "avx512bw,avx512vl")]
19685    unsafe fn test_mm_mask_test_epi8_mask() {
19686        let a = _mm_set1_epi8(1 << 0);
19687        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19688        let r = _mm_mask_test_epi8_mask(0, a, b);
19689        assert_eq!(r, 0);
19690        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
19691        let e: __mmask16 = 0b11111111_11111111;
19692        assert_eq!(r, e);
19693    }
19694
19695    #[simd_test(enable = "avx512bw")]
19696    unsafe fn test_mm512_testn_epi16_mask() {
19697        let a = _mm512_set1_epi16(1 << 0);
19698        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19699        let r = _mm512_testn_epi16_mask(a, b);
19700        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19701        assert_eq!(r, e);
19702    }
19703
19704    #[simd_test(enable = "avx512bw")]
19705    unsafe fn test_mm512_mask_testn_epi16_mask() {
19706        let a = _mm512_set1_epi16(1 << 0);
19707        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19708        let r = _mm512_mask_testn_epi16_mask(0, a, b);
19709        assert_eq!(r, 0);
19710        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19711        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19712        assert_eq!(r, e);
19713    }
19714
19715    #[simd_test(enable = "avx512bw,avx512vl")]
19716    unsafe fn test_mm256_testn_epi16_mask() {
19717        let a = _mm256_set1_epi16(1 << 0);
19718        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19719        let r = _mm256_testn_epi16_mask(a, b);
19720        let e: __mmask16 = 0b00000000_00000000;
19721        assert_eq!(r, e);
19722    }
19723
19724    #[simd_test(enable = "avx512bw,avx512vl")]
19725    unsafe fn test_mm256_mask_testn_epi16_mask() {
19726        let a = _mm256_set1_epi16(1 << 0);
19727        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19728        let r = _mm256_mask_testn_epi16_mask(0, a, b);
19729        assert_eq!(r, 0);
19730        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
19731        let e: __mmask16 = 0b00000000_00000000;
19732        assert_eq!(r, e);
19733    }
19734
19735    #[simd_test(enable = "avx512bw,avx512vl")]
19736    unsafe fn test_mm_testn_epi16_mask() {
19737        let a = _mm_set1_epi16(1 << 0);
19738        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19739        let r = _mm_testn_epi16_mask(a, b);
19740        let e: __mmask8 = 0b00000000;
19741        assert_eq!(r, e);
19742    }
19743
19744    #[simd_test(enable = "avx512bw,avx512vl")]
19745    unsafe fn test_mm_mask_testn_epi16_mask() {
19746        let a = _mm_set1_epi16(1 << 0);
19747        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19748        let r = _mm_mask_testn_epi16_mask(0, a, b);
19749        assert_eq!(r, 0);
19750        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
19751        let e: __mmask8 = 0b00000000;
19752        assert_eq!(r, e);
19753    }
19754
19755    #[simd_test(enable = "avx512bw")]
19756    unsafe fn test_mm512_testn_epi8_mask() {
19757        let a = _mm512_set1_epi8(1 << 0);
19758        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19759        let r = _mm512_testn_epi8_mask(a, b);
19760        let e: __mmask64 =
19761            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19762        assert_eq!(r, e);
19763    }
19764
19765    #[simd_test(enable = "avx512bw")]
19766    unsafe fn test_mm512_mask_testn_epi8_mask() {
19767        let a = _mm512_set1_epi8(1 << 0);
19768        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19769        let r = _mm512_mask_testn_epi8_mask(0, a, b);
19770        assert_eq!(r, 0);
19771        let r = _mm512_mask_testn_epi8_mask(
19772            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19773            a,
19774            b,
19775        );
19776        let e: __mmask64 =
19777            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19778        assert_eq!(r, e);
19779    }
19780
19781    #[simd_test(enable = "avx512bw,avx512vl")]
19782    unsafe fn test_mm256_testn_epi8_mask() {
19783        let a = _mm256_set1_epi8(1 << 0);
19784        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19785        let r = _mm256_testn_epi8_mask(a, b);
19786        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19787        assert_eq!(r, e);
19788    }
19789
19790    #[simd_test(enable = "avx512bw,avx512vl")]
19791    unsafe fn test_mm256_mask_testn_epi8_mask() {
19792        let a = _mm256_set1_epi8(1 << 0);
19793        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19794        let r = _mm256_mask_testn_epi8_mask(0, a, b);
19795        assert_eq!(r, 0);
19796        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19797        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19798        assert_eq!(r, e);
19799    }
19800
19801    #[simd_test(enable = "avx512bw,avx512vl")]
19802    unsafe fn test_mm_testn_epi8_mask() {
19803        let a = _mm_set1_epi8(1 << 0);
19804        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19805        let r = _mm_testn_epi8_mask(a, b);
19806        let e: __mmask16 = 0b00000000_00000000;
19807        assert_eq!(r, e);
19808    }
19809
19810    #[simd_test(enable = "avx512bw,avx512vl")]
19811    unsafe fn test_mm_mask_testn_epi8_mask() {
19812        let a = _mm_set1_epi8(1 << 0);
19813        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19814        let r = _mm_mask_testn_epi8_mask(0, a, b);
19815        assert_eq!(r, 0);
19816        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
19817        let e: __mmask16 = 0b00000000_00000000;
19818        assert_eq!(r, e);
19819    }
19820
19821    #[simd_test(enable = "avx512bw")]
19822    unsafe fn test_store_mask64() {
19823        let a: __mmask64 =
19824            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19825        let mut r = 0;
19826        _store_mask64(&mut r, a);
19827        assert_eq!(r, a);
19828    }
19829
19830    #[simd_test(enable = "avx512bw")]
19831    unsafe fn test_store_mask32() {
19832        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
19833        let mut r = 0;
19834        _store_mask32(&mut r, a);
19835        assert_eq!(r, a);
19836    }
19837
19838    #[simd_test(enable = "avx512bw")]
19839    unsafe fn test_load_mask64() {
19840        let p: __mmask64 =
19841            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19842        let r = _load_mask64(&p);
19843        let e: __mmask64 =
19844            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19845        assert_eq!(r, e);
19846    }
19847
19848    #[simd_test(enable = "avx512bw")]
19849    unsafe fn test_load_mask32() {
19850        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
19851        let r = _load_mask32(&p);
19852        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
19853        assert_eq!(r, e);
19854    }
19855
19856    #[simd_test(enable = "avx512bw")]
19857    unsafe fn test_mm512_sad_epu8() {
19858        let a = _mm512_set1_epi8(2);
19859        let b = _mm512_set1_epi8(4);
19860        let r = _mm512_sad_epu8(a, b);
19861        let e = _mm512_set1_epi64(16);
19862        assert_eq_m512i(r, e);
19863    }
19864
19865    #[simd_test(enable = "avx512bw")]
19866    unsafe fn test_mm512_dbsad_epu8() {
19867        let a = _mm512_set1_epi8(2);
19868        let b = _mm512_set1_epi8(4);
19869        let r = _mm512_dbsad_epu8::<0>(a, b);
19870        let e = _mm512_set1_epi16(8);
19871        assert_eq_m512i(r, e);
19872    }
19873
19874    #[simd_test(enable = "avx512bw")]
19875    unsafe fn test_mm512_mask_dbsad_epu8() {
19876        let src = _mm512_set1_epi16(1);
19877        let a = _mm512_set1_epi8(2);
19878        let b = _mm512_set1_epi8(4);
19879        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
19880        assert_eq_m512i(r, src);
19881        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
19882        let e = _mm512_set1_epi16(8);
19883        assert_eq_m512i(r, e);
19884    }
19885
19886    #[simd_test(enable = "avx512bw")]
19887    unsafe fn test_mm512_maskz_dbsad_epu8() {
19888        let a = _mm512_set1_epi8(2);
19889        let b = _mm512_set1_epi8(4);
19890        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
19891        assert_eq_m512i(r, _mm512_setzero_si512());
19892        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
19893        let e = _mm512_set1_epi16(8);
19894        assert_eq_m512i(r, e);
19895    }
19896
19897    #[simd_test(enable = "avx512bw,avx512vl")]
19898    unsafe fn test_mm256_dbsad_epu8() {
19899        let a = _mm256_set1_epi8(2);
19900        let b = _mm256_set1_epi8(4);
19901        let r = _mm256_dbsad_epu8::<0>(a, b);
19902        let e = _mm256_set1_epi16(8);
19903        assert_eq_m256i(r, e);
19904    }
19905
19906    #[simd_test(enable = "avx512bw,avx512vl")]
19907    unsafe fn test_mm256_mask_dbsad_epu8() {
19908        let src = _mm256_set1_epi16(1);
19909        let a = _mm256_set1_epi8(2);
19910        let b = _mm256_set1_epi8(4);
19911        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
19912        assert_eq_m256i(r, src);
19913        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
19914        let e = _mm256_set1_epi16(8);
19915        assert_eq_m256i(r, e);
19916    }
19917
19918    #[simd_test(enable = "avx512bw,avx512vl")]
19919    unsafe fn test_mm256_maskz_dbsad_epu8() {
19920        let a = _mm256_set1_epi8(2);
19921        let b = _mm256_set1_epi8(4);
19922        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
19923        assert_eq_m256i(r, _mm256_setzero_si256());
19924        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
19925        let e = _mm256_set1_epi16(8);
19926        assert_eq_m256i(r, e);
19927    }
19928
19929    #[simd_test(enable = "avx512bw,avx512vl")]
19930    unsafe fn test_mm_dbsad_epu8() {
19931        let a = _mm_set1_epi8(2);
19932        let b = _mm_set1_epi8(4);
19933        let r = _mm_dbsad_epu8::<0>(a, b);
19934        let e = _mm_set1_epi16(8);
19935        assert_eq_m128i(r, e);
19936    }
19937
19938    #[simd_test(enable = "avx512bw,avx512vl")]
19939    unsafe fn test_mm_mask_dbsad_epu8() {
19940        let src = _mm_set1_epi16(1);
19941        let a = _mm_set1_epi8(2);
19942        let b = _mm_set1_epi8(4);
19943        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
19944        assert_eq_m128i(r, src);
19945        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
19946        let e = _mm_set1_epi16(8);
19947        assert_eq_m128i(r, e);
19948    }
19949
19950    #[simd_test(enable = "avx512bw,avx512vl")]
19951    unsafe fn test_mm_maskz_dbsad_epu8() {
19952        let a = _mm_set1_epi8(2);
19953        let b = _mm_set1_epi8(4);
19954        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
19955        assert_eq_m128i(r, _mm_setzero_si128());
19956        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
19957        let e = _mm_set1_epi16(8);
19958        assert_eq_m128i(r, e);
19959    }
19960
19961    #[simd_test(enable = "avx512bw")]
19962    unsafe fn test_mm512_movepi16_mask() {
19963        let a = _mm512_set1_epi16(1 << 15);
19964        let r = _mm512_movepi16_mask(a);
19965        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19966        assert_eq!(r, e);
19967    }
19968
19969    #[simd_test(enable = "avx512bw,avx512vl")]
19970    unsafe fn test_mm256_movepi16_mask() {
19971        let a = _mm256_set1_epi16(1 << 15);
19972        let r = _mm256_movepi16_mask(a);
19973        let e: __mmask16 = 0b11111111_11111111;
19974        assert_eq!(r, e);
19975    }
19976
19977    #[simd_test(enable = "avx512bw,avx512vl")]
19978    unsafe fn test_mm_movepi16_mask() {
19979        let a = _mm_set1_epi16(1 << 15);
19980        let r = _mm_movepi16_mask(a);
19981        let e: __mmask8 = 0b11111111;
19982        assert_eq!(r, e);
19983    }
19984
19985    #[simd_test(enable = "avx512bw")]
19986    unsafe fn test_mm512_movepi8_mask() {
19987        let a = _mm512_set1_epi8(1 << 7);
19988        let r = _mm512_movepi8_mask(a);
19989        let e: __mmask64 =
19990            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19991        assert_eq!(r, e);
19992    }
19993
19994    #[simd_test(enable = "avx512bw,avx512vl")]
19995    unsafe fn test_mm256_movepi8_mask() {
19996        let a = _mm256_set1_epi8(1 << 7);
19997        let r = _mm256_movepi8_mask(a);
19998        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19999        assert_eq!(r, e);
20000    }
20001
20002    #[simd_test(enable = "avx512bw,avx512vl")]
20003    unsafe fn test_mm_movepi8_mask() {
20004        let a = _mm_set1_epi8(1 << 7);
20005        let r = _mm_movepi8_mask(a);
20006        let e: __mmask16 = 0b11111111_11111111;
20007        assert_eq!(r, e);
20008    }
20009
20010    #[simd_test(enable = "avx512bw")]
20011    unsafe fn test_mm512_movm_epi16() {
20012        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20013        let r = _mm512_movm_epi16(a);
20014        let e = _mm512_set1_epi16(
20015            1 << 15
20016                | 1 << 14
20017                | 1 << 13
20018                | 1 << 12
20019                | 1 << 11
20020                | 1 << 10
20021                | 1 << 9
20022                | 1 << 8
20023                | 1 << 7
20024                | 1 << 6
20025                | 1 << 5
20026                | 1 << 4
20027                | 1 << 3
20028                | 1 << 2
20029                | 1 << 1
20030                | 1 << 0,
20031        );
20032        assert_eq_m512i(r, e);
20033    }
20034
20035    #[simd_test(enable = "avx512bw,avx512vl")]
20036    unsafe fn test_mm256_movm_epi16() {
20037        let a: __mmask16 = 0b11111111_11111111;
20038        let r = _mm256_movm_epi16(a);
20039        let e = _mm256_set1_epi16(
20040            1 << 15
20041                | 1 << 14
20042                | 1 << 13
20043                | 1 << 12
20044                | 1 << 11
20045                | 1 << 10
20046                | 1 << 9
20047                | 1 << 8
20048                | 1 << 7
20049                | 1 << 6
20050                | 1 << 5
20051                | 1 << 4
20052                | 1 << 3
20053                | 1 << 2
20054                | 1 << 1
20055                | 1 << 0,
20056        );
20057        assert_eq_m256i(r, e);
20058    }
20059
20060    #[simd_test(enable = "avx512bw,avx512vl")]
20061    unsafe fn test_mm_movm_epi16() {
20062        let a: __mmask8 = 0b11111111;
20063        let r = _mm_movm_epi16(a);
20064        let e = _mm_set1_epi16(
20065            1 << 15
20066                | 1 << 14
20067                | 1 << 13
20068                | 1 << 12
20069                | 1 << 11
20070                | 1 << 10
20071                | 1 << 9
20072                | 1 << 8
20073                | 1 << 7
20074                | 1 << 6
20075                | 1 << 5
20076                | 1 << 4
20077                | 1 << 3
20078                | 1 << 2
20079                | 1 << 1
20080                | 1 << 0,
20081        );
20082        assert_eq_m128i(r, e);
20083    }
20084
20085    #[simd_test(enable = "avx512bw")]
20086    unsafe fn test_mm512_movm_epi8() {
20087        let a: __mmask64 =
20088            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20089        let r = _mm512_movm_epi8(a);
20090        let e =
20091            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20092        assert_eq_m512i(r, e);
20093    }
20094
20095    #[simd_test(enable = "avx512bw,avx512vl")]
20096    unsafe fn test_mm256_movm_epi8() {
20097        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20098        let r = _mm256_movm_epi8(a);
20099        let e =
20100            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20101        assert_eq_m256i(r, e);
20102    }
20103
20104    #[simd_test(enable = "avx512bw,avx512vl")]
20105    unsafe fn test_mm_movm_epi8() {
20106        let a: __mmask16 = 0b11111111_11111111;
20107        let r = _mm_movm_epi8(a);
20108        let e =
20109            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20110        assert_eq_m128i(r, e);
20111    }
20112
20113    #[simd_test(enable = "avx512bw")]
20114    unsafe fn test_cvtmask32_u32() {
20115        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
20116        let r = _cvtmask32_u32(a);
20117        let e: u32 = 0b11001100_00110011_01100110_10011001;
20118        assert_eq!(r, e);
20119    }
20120
20121    #[simd_test(enable = "avx512bw")]
20122    unsafe fn test_cvtu32_mask32() {
20123        let a: u32 = 0b11001100_00110011_01100110_10011001;
20124        let r = _cvtu32_mask32(a);
20125        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
20126        assert_eq!(r, e);
20127    }
20128
20129    #[simd_test(enable = "avx512bw")]
20130    unsafe fn test_kadd_mask32() {
20131        let a: __mmask32 = 11;
20132        let b: __mmask32 = 22;
20133        let r = _kadd_mask32(a, b);
20134        let e: __mmask32 = 33;
20135        assert_eq!(r, e);
20136    }
20137
20138    #[simd_test(enable = "avx512bw")]
20139    unsafe fn test_kadd_mask64() {
20140        let a: __mmask64 = 11;
20141        let b: __mmask64 = 22;
20142        let r = _kadd_mask64(a, b);
20143        let e: __mmask64 = 33;
20144        assert_eq!(r, e);
20145    }
20146
20147    #[simd_test(enable = "avx512bw")]
20148    unsafe fn test_kand_mask32() {
20149        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20150        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20151        let r = _kand_mask32(a, b);
20152        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
20153        assert_eq!(r, e);
20154    }
20155
20156    #[simd_test(enable = "avx512bw")]
20157    unsafe fn test_kand_mask64() {
20158        let a: __mmask64 =
20159            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20160        let b: __mmask64 =
20161            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20162        let r = _kand_mask64(a, b);
20163        let e: __mmask64 =
20164            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20165        assert_eq!(r, e);
20166    }
20167
20168    #[simd_test(enable = "avx512bw")]
20169    unsafe fn test_knot_mask32() {
20170        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20171        let r = _knot_mask32(a);
20172        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
20173        assert_eq!(r, e);
20174    }
20175
20176    #[simd_test(enable = "avx512bw")]
20177    unsafe fn test_knot_mask64() {
20178        let a: __mmask64 =
20179            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20180        let r = _knot_mask64(a);
20181        let e: __mmask64 =
20182            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20183        assert_eq!(r, e);
20184    }
20185
20186    #[simd_test(enable = "avx512bw")]
20187    unsafe fn test_kandn_mask32() {
20188        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20189        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20190        let r = _kandn_mask32(a, b);
20191        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20192        assert_eq!(r, e);
20193    }
20194
20195    #[simd_test(enable = "avx512bw")]
20196    unsafe fn test_kandn_mask64() {
20197        let a: __mmask64 =
20198            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20199        let b: __mmask64 =
20200            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20201        let r = _kandn_mask64(a, b);
20202        let e: __mmask64 =
20203            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20204        assert_eq!(r, e);
20205    }
20206
20207    #[simd_test(enable = "avx512bw")]
20208    unsafe fn test_kor_mask32() {
20209        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20210        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20211        let r = _kor_mask32(a, b);
20212        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20213        assert_eq!(r, e);
20214    }
20215
20216    #[simd_test(enable = "avx512bw")]
20217    unsafe fn test_kor_mask64() {
20218        let a: __mmask64 =
20219            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20220        let b: __mmask64 =
20221            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20222        let r = _kor_mask64(a, b);
20223        let e: __mmask64 =
20224            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20225        assert_eq!(r, e);
20226    }
20227
20228    #[simd_test(enable = "avx512bw")]
20229    unsafe fn test_kxor_mask32() {
20230        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20231        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20232        let r = _kxor_mask32(a, b);
20233        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20234        assert_eq!(r, e);
20235    }
20236
20237    #[simd_test(enable = "avx512bw")]
20238    unsafe fn test_kxor_mask64() {
20239        let a: __mmask64 =
20240            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20241        let b: __mmask64 =
20242            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20243        let r = _kxor_mask64(a, b);
20244        let e: __mmask64 =
20245            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20246        assert_eq!(r, e);
20247    }
20248
20249    #[simd_test(enable = "avx512bw")]
20250    unsafe fn test_kxnor_mask32() {
20251        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20252        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20253        let r = _kxnor_mask32(a, b);
20254        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20255        assert_eq!(r, e);
20256    }
20257
20258    #[simd_test(enable = "avx512bw")]
20259    unsafe fn test_kxnor_mask64() {
20260        let a: __mmask64 =
20261            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20262        let b: __mmask64 =
20263            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20264        let r = _kxnor_mask64(a, b);
20265        let e: __mmask64 =
20266            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20267        assert_eq!(r, e);
20268    }
20269
20270    #[simd_test(enable = "avx512bw")]
20271    unsafe fn test_kortest_mask32_u8() {
20272        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20273        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20274        let mut all_ones: u8 = 0;
20275        let r = _kortest_mask32_u8(a, b, &mut all_ones);
20276        assert_eq!(r, 0);
20277        assert_eq!(all_ones, 1);
20278    }
20279
20280    #[simd_test(enable = "avx512bw")]
20281    unsafe fn test_kortest_mask64_u8() {
20282        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20283        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20284        let mut all_ones: u8 = 0;
20285        let r = _kortest_mask64_u8(a, b, &mut all_ones);
20286        assert_eq!(r, 0);
20287        assert_eq!(all_ones, 0);
20288    }
20289
20290    #[simd_test(enable = "avx512bw")]
20291    unsafe fn test_kortestc_mask32_u8() {
20292        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20293        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20294        let r = _kortestc_mask32_u8(a, b);
20295        assert_eq!(r, 1);
20296    }
20297
20298    #[simd_test(enable = "avx512bw")]
20299    unsafe fn test_kortestc_mask64_u8() {
20300        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20301        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20302        let r = _kortestc_mask64_u8(a, b);
20303        assert_eq!(r, 0);
20304    }
20305
20306    #[simd_test(enable = "avx512bw")]
20307    unsafe fn test_kortestz_mask32_u8() {
20308        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20309        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20310        let r = _kortestz_mask32_u8(a, b);
20311        assert_eq!(r, 0);
20312    }
20313
20314    #[simd_test(enable = "avx512bw")]
20315    unsafe fn test_kortestz_mask64_u8() {
20316        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20317        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20318        let r = _kortestz_mask64_u8(a, b);
20319        assert_eq!(r, 0);
20320    }
20321
20322    #[simd_test(enable = "avx512bw")]
20323    unsafe fn test_kshiftli_mask32() {
20324        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20325        let r = _kshiftli_mask32::<3>(a);
20326        let e: __mmask32 = 0b0100101101001011_0100101101001000;
20327        assert_eq!(r, e);
20328    }
20329
20330    #[simd_test(enable = "avx512bw")]
20331    unsafe fn test_kshiftli_mask64() {
20332        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20333        let r = _kshiftli_mask64::<3>(a);
20334        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
20335        assert_eq!(r, e);
20336    }
20337
20338    #[simd_test(enable = "avx512bw")]
20339    unsafe fn test_kshiftri_mask32() {
20340        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20341        let r = _kshiftri_mask32::<3>(a);
20342        let e: __mmask32 = 0b0000110100101101_0010110100101101;
20343        assert_eq!(r, e);
20344    }
20345
20346    #[simd_test(enable = "avx512bw")]
20347    unsafe fn test_kshiftri_mask64() {
20348        let a: __mmask64 = 0b0110100101101001011_0100101101001000;
20349        let r = _kshiftri_mask64::<3>(a);
20350        let e: __mmask64 = 0b0110100101101001_0110100101101001;
20351        assert_eq!(r, e);
20352    }
20353
20354    #[simd_test(enable = "avx512bw")]
20355    unsafe fn test_ktest_mask32_u8() {
20356        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20357        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20358        let mut and_not: u8 = 0;
20359        let r = _ktest_mask32_u8(a, b, &mut and_not);
20360        assert_eq!(r, 1);
20361        assert_eq!(and_not, 0);
20362    }
20363
20364    #[simd_test(enable = "avx512bw")]
20365    unsafe fn test_ktestc_mask32_u8() {
20366        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20367        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20368        let r = _ktestc_mask32_u8(a, b);
20369        assert_eq!(r, 0);
20370    }
20371
20372    #[simd_test(enable = "avx512bw")]
20373    unsafe fn test_ktestz_mask32_u8() {
20374        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20375        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20376        let r = _ktestz_mask32_u8(a, b);
20377        assert_eq!(r, 1);
20378    }
20379
20380    #[simd_test(enable = "avx512bw")]
20381    unsafe fn test_ktest_mask64_u8() {
20382        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20383        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20384        let mut and_not: u8 = 0;
20385        let r = _ktest_mask64_u8(a, b, &mut and_not);
20386        assert_eq!(r, 1);
20387        assert_eq!(and_not, 0);
20388    }
20389
20390    #[simd_test(enable = "avx512bw")]
20391    unsafe fn test_ktestc_mask64_u8() {
20392        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20393        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20394        let r = _ktestc_mask64_u8(a, b);
20395        assert_eq!(r, 0);
20396    }
20397
20398    #[simd_test(enable = "avx512bw")]
20399    unsafe fn test_ktestz_mask64_u8() {
20400        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20401        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20402        let r = _ktestz_mask64_u8(a, b);
20403        assert_eq!(r, 1);
20404    }
20405
20406    #[simd_test(enable = "avx512bw")]
20407    unsafe fn test_mm512_kunpackw() {
20408        let a: u32 = 0x00110011;
20409        let b: u32 = 0x00001011;
20410        let r = _mm512_kunpackw(a, b);
20411        let e: u32 = 0x00111011;
20412        assert_eq!(r, e);
20413    }
20414
20415    #[simd_test(enable = "avx512bw")]
20416    unsafe fn test_mm512_kunpackd() {
20417        let a: u64 = 0x11001100_00110011;
20418        let b: u64 = 0x00101110_00001011;
20419        let r = _mm512_kunpackd(a, b);
20420        let e: u64 = 0x00110011_00001011;
20421        assert_eq!(r, e);
20422    }
20423
20424    #[simd_test(enable = "avx512bw")]
20425    unsafe fn test_mm512_cvtepi16_epi8() {
20426        let a = _mm512_set1_epi16(2);
20427        let r = _mm512_cvtepi16_epi8(a);
20428        let e = _mm256_set1_epi8(2);
20429        assert_eq_m256i(r, e);
20430    }
20431
20432    #[simd_test(enable = "avx512bw")]
20433    unsafe fn test_mm512_mask_cvtepi16_epi8() {
20434        let src = _mm256_set1_epi8(1);
20435        let a = _mm512_set1_epi16(2);
20436        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
20437        assert_eq_m256i(r, src);
20438        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20439        let e = _mm256_set1_epi8(2);
20440        assert_eq_m256i(r, e);
20441    }
20442
20443    #[simd_test(enable = "avx512bw")]
20444    unsafe fn test_mm512_maskz_cvtepi16_epi8() {
20445        let a = _mm512_set1_epi16(2);
20446        let r = _mm512_maskz_cvtepi16_epi8(0, a);
20447        assert_eq_m256i(r, _mm256_setzero_si256());
20448        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20449        let e = _mm256_set1_epi8(2);
20450        assert_eq_m256i(r, e);
20451    }
20452
20453    #[simd_test(enable = "avx512bw,avx512vl")]
20454    unsafe fn test_mm256_cvtepi16_epi8() {
20455        let a = _mm256_set1_epi16(2);
20456        let r = _mm256_cvtepi16_epi8(a);
20457        let e = _mm_set1_epi8(2);
20458        assert_eq_m128i(r, e);
20459    }
20460
20461    #[simd_test(enable = "avx512bw,avx512vl")]
20462    unsafe fn test_mm256_mask_cvtepi16_epi8() {
20463        let src = _mm_set1_epi8(1);
20464        let a = _mm256_set1_epi16(2);
20465        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
20466        assert_eq_m128i(r, src);
20467        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
20468        let e = _mm_set1_epi8(2);
20469        assert_eq_m128i(r, e);
20470    }
20471
20472    #[simd_test(enable = "avx512bw,avx512vl")]
20473    unsafe fn test_mm256_maskz_cvtepi16_epi8() {
20474        let a = _mm256_set1_epi16(2);
20475        let r = _mm256_maskz_cvtepi16_epi8(0, a);
20476        assert_eq_m128i(r, _mm_setzero_si128());
20477        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
20478        let e = _mm_set1_epi8(2);
20479        assert_eq_m128i(r, e);
20480    }
20481
20482    #[simd_test(enable = "avx512bw,avx512vl")]
20483    unsafe fn test_mm_cvtepi16_epi8() {
20484        let a = _mm_set1_epi16(2);
20485        let r = _mm_cvtepi16_epi8(a);
20486        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20487        assert_eq_m128i(r, e);
20488    }
20489
20490    #[simd_test(enable = "avx512bw,avx512vl")]
20491    unsafe fn test_mm_mask_cvtepi16_epi8() {
20492        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20493        let a = _mm_set1_epi16(2);
20494        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
20495        assert_eq_m128i(r, src);
20496        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
20497        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20498        assert_eq_m128i(r, e);
20499    }
20500
20501    #[simd_test(enable = "avx512bw,avx512vl")]
20502    unsafe fn test_mm_maskz_cvtepi16_epi8() {
20503        let a = _mm_set1_epi16(2);
20504        let r = _mm_maskz_cvtepi16_epi8(0, a);
20505        assert_eq_m128i(r, _mm_setzero_si128());
20506        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
20507        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20508        assert_eq_m128i(r, e);
20509    }
20510
20511    #[simd_test(enable = "avx512bw")]
20512    unsafe fn test_mm512_cvtsepi16_epi8() {
20513        let a = _mm512_set1_epi16(i16::MAX);
20514        let r = _mm512_cvtsepi16_epi8(a);
20515        let e = _mm256_set1_epi8(i8::MAX);
20516        assert_eq_m256i(r, e);
20517    }
20518
20519    #[simd_test(enable = "avx512bw")]
20520    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
20521        let src = _mm256_set1_epi8(1);
20522        let a = _mm512_set1_epi16(i16::MAX);
20523        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
20524        assert_eq_m256i(r, src);
20525        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20526        let e = _mm256_set1_epi8(i8::MAX);
20527        assert_eq_m256i(r, e);
20528    }
20529
20530    #[simd_test(enable = "avx512bw,avx512vl")]
20531    unsafe fn test_mm256_cvtsepi16_epi8() {
20532        let a = _mm256_set1_epi16(i16::MAX);
20533        let r = _mm256_cvtsepi16_epi8(a);
20534        let e = _mm_set1_epi8(i8::MAX);
20535        assert_eq_m128i(r, e);
20536    }
20537
20538    #[simd_test(enable = "avx512bw,avx512vl")]
20539    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
20540        let src = _mm_set1_epi8(1);
20541        let a = _mm256_set1_epi16(i16::MAX);
20542        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
20543        assert_eq_m128i(r, src);
20544        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
20545        let e = _mm_set1_epi8(i8::MAX);
20546        assert_eq_m128i(r, e);
20547    }
20548
20549    #[simd_test(enable = "avx512bw,avx512vl")]
20550    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
20551        let a = _mm256_set1_epi16(i16::MAX);
20552        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
20553        assert_eq_m128i(r, _mm_setzero_si128());
20554        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
20555        let e = _mm_set1_epi8(i8::MAX);
20556        assert_eq_m128i(r, e);
20557    }
20558
20559    #[simd_test(enable = "avx512bw,avx512vl")]
20560    unsafe fn test_mm_cvtsepi16_epi8() {
20561        let a = _mm_set1_epi16(i16::MAX);
20562        let r = _mm_cvtsepi16_epi8(a);
20563        #[rustfmt::skip]
20564        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20565        assert_eq_m128i(r, e);
20566    }
20567
20568    #[simd_test(enable = "avx512bw,avx512vl")]
20569    unsafe fn test_mm_mask_cvtsepi16_epi8() {
20570        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20571        let a = _mm_set1_epi16(i16::MAX);
20572        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
20573        assert_eq_m128i(r, src);
20574        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
20575        #[rustfmt::skip]
20576        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20577        assert_eq_m128i(r, e);
20578    }
20579
20580    #[simd_test(enable = "avx512bw,avx512vl")]
20581    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
20582        let a = _mm_set1_epi16(i16::MAX);
20583        let r = _mm_maskz_cvtsepi16_epi8(0, a);
20584        assert_eq_m128i(r, _mm_setzero_si128());
20585        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
20586        #[rustfmt::skip]
20587        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20588        assert_eq_m128i(r, e);
20589    }
20590
20591    #[simd_test(enable = "avx512bw")]
20592    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
20593        let a = _mm512_set1_epi16(i16::MAX);
20594        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
20595        assert_eq_m256i(r, _mm256_setzero_si256());
20596        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20597        let e = _mm256_set1_epi8(i8::MAX);
20598        assert_eq_m256i(r, e);
20599    }
20600
20601    #[simd_test(enable = "avx512bw")]
20602    unsafe fn test_mm512_cvtusepi16_epi8() {
20603        let a = _mm512_set1_epi16(i16::MIN);
20604        let r = _mm512_cvtusepi16_epi8(a);
20605        let e = _mm256_set1_epi8(-1);
20606        assert_eq_m256i(r, e);
20607    }
20608
20609    #[simd_test(enable = "avx512bw")]
20610    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
20611        let src = _mm256_set1_epi8(1);
20612        let a = _mm512_set1_epi16(i16::MIN);
20613        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
20614        assert_eq_m256i(r, src);
20615        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20616        let e = _mm256_set1_epi8(-1);
20617        assert_eq_m256i(r, e);
20618    }
20619
20620    #[simd_test(enable = "avx512bw")]
20621    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
20622        let a = _mm512_set1_epi16(i16::MIN);
20623        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
20624        assert_eq_m256i(r, _mm256_setzero_si256());
20625        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20626        let e = _mm256_set1_epi8(-1);
20627        assert_eq_m256i(r, e);
20628    }
20629
20630    #[simd_test(enable = "avx512bw,avx512vl")]
20631    unsafe fn test_mm256_cvtusepi16_epi8() {
20632        let a = _mm256_set1_epi16(i16::MIN);
20633        let r = _mm256_cvtusepi16_epi8(a);
20634        let e = _mm_set1_epi8(-1);
20635        assert_eq_m128i(r, e);
20636    }
20637
20638    #[simd_test(enable = "avx512bw,avx512vl")]
20639    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
20640        let src = _mm_set1_epi8(1);
20641        let a = _mm256_set1_epi16(i16::MIN);
20642        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
20643        assert_eq_m128i(r, src);
20644        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
20645        let e = _mm_set1_epi8(-1);
20646        assert_eq_m128i(r, e);
20647    }
20648
20649    #[simd_test(enable = "avx512bw,avx512vl")]
20650    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
20651        let a = _mm256_set1_epi16(i16::MIN);
20652        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
20653        assert_eq_m128i(r, _mm_setzero_si128());
20654        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
20655        let e = _mm_set1_epi8(-1);
20656        assert_eq_m128i(r, e);
20657    }
20658
20659    #[simd_test(enable = "avx512bw,avx512vl")]
20660    unsafe fn test_mm_cvtusepi16_epi8() {
20661        let a = _mm_set1_epi16(i16::MIN);
20662        let r = _mm_cvtusepi16_epi8(a);
20663        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20664        assert_eq_m128i(r, e);
20665    }
20666
20667    #[simd_test(enable = "avx512bw,avx512vl")]
20668    unsafe fn test_mm_mask_cvtusepi16_epi8() {
20669        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20670        let a = _mm_set1_epi16(i16::MIN);
20671        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
20672        assert_eq_m128i(r, src);
20673        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
20674        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20675        assert_eq_m128i(r, e);
20676    }
20677
20678    #[simd_test(enable = "avx512bw,avx512vl")]
20679    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
20680        let a = _mm_set1_epi16(i16::MIN);
20681        let r = _mm_maskz_cvtusepi16_epi8(0, a);
20682        assert_eq_m128i(r, _mm_setzero_si128());
20683        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
20684        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20685        assert_eq_m128i(r, e);
20686    }
20687
20688    #[simd_test(enable = "avx512bw")]
20689    unsafe fn test_mm512_cvtepi8_epi16() {
20690        let a = _mm256_set1_epi8(2);
20691        let r = _mm512_cvtepi8_epi16(a);
20692        let e = _mm512_set1_epi16(2);
20693        assert_eq_m512i(r, e);
20694    }
20695
20696    #[simd_test(enable = "avx512bw")]
20697    unsafe fn test_mm512_mask_cvtepi8_epi16() {
20698        let src = _mm512_set1_epi16(1);
20699        let a = _mm256_set1_epi8(2);
20700        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
20701        assert_eq_m512i(r, src);
20702        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20703        let e = _mm512_set1_epi16(2);
20704        assert_eq_m512i(r, e);
20705    }
20706
20707    #[simd_test(enable = "avx512bw")]
20708    unsafe fn test_mm512_maskz_cvtepi8_epi16() {
20709        let a = _mm256_set1_epi8(2);
20710        let r = _mm512_maskz_cvtepi8_epi16(0, a);
20711        assert_eq_m512i(r, _mm512_setzero_si512());
20712        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
20713        let e = _mm512_set1_epi16(2);
20714        assert_eq_m512i(r, e);
20715    }
20716
20717    #[simd_test(enable = "avx512bw,avx512vl")]
20718    unsafe fn test_mm256_mask_cvtepi8_epi16() {
20719        let src = _mm256_set1_epi16(1);
20720        let a = _mm_set1_epi8(2);
20721        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
20722        assert_eq_m256i(r, src);
20723        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
20724        let e = _mm256_set1_epi16(2);
20725        assert_eq_m256i(r, e);
20726    }
20727
20728    #[simd_test(enable = "avx512bw,avx512vl")]
20729    unsafe fn test_mm256_maskz_cvtepi8_epi16() {
20730        let a = _mm_set1_epi8(2);
20731        let r = _mm256_maskz_cvtepi8_epi16(0, a);
20732        assert_eq_m256i(r, _mm256_setzero_si256());
20733        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
20734        let e = _mm256_set1_epi16(2);
20735        assert_eq_m256i(r, e);
20736    }
20737
20738    #[simd_test(enable = "avx512bw,avx512vl")]
20739    unsafe fn test_mm_mask_cvtepi8_epi16() {
20740        let src = _mm_set1_epi16(1);
20741        let a = _mm_set1_epi8(2);
20742        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
20743        assert_eq_m128i(r, src);
20744        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
20745        let e = _mm_set1_epi16(2);
20746        assert_eq_m128i(r, e);
20747    }
20748
20749    #[simd_test(enable = "avx512bw,avx512vl")]
20750    unsafe fn test_mm_maskz_cvtepi8_epi16() {
20751        let a = _mm_set1_epi8(2);
20752        let r = _mm_maskz_cvtepi8_epi16(0, a);
20753        assert_eq_m128i(r, _mm_setzero_si128());
20754        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
20755        let e = _mm_set1_epi16(2);
20756        assert_eq_m128i(r, e);
20757    }
20758
20759    #[simd_test(enable = "avx512bw")]
20760    unsafe fn test_mm512_cvtepu8_epi16() {
20761        let a = _mm256_set1_epi8(2);
20762        let r = _mm512_cvtepu8_epi16(a);
20763        let e = _mm512_set1_epi16(2);
20764        assert_eq_m512i(r, e);
20765    }
20766
20767    #[simd_test(enable = "avx512bw")]
20768    unsafe fn test_mm512_mask_cvtepu8_epi16() {
20769        let src = _mm512_set1_epi16(1);
20770        let a = _mm256_set1_epi8(2);
20771        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
20772        assert_eq_m512i(r, src);
20773        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20774        let e = _mm512_set1_epi16(2);
20775        assert_eq_m512i(r, e);
20776    }
20777
20778    #[simd_test(enable = "avx512bw")]
20779    unsafe fn test_mm512_maskz_cvtepu8_epi16() {
20780        let a = _mm256_set1_epi8(2);
20781        let r = _mm512_maskz_cvtepu8_epi16(0, a);
20782        assert_eq_m512i(r, _mm512_setzero_si512());
20783        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
20784        let e = _mm512_set1_epi16(2);
20785        assert_eq_m512i(r, e);
20786    }
20787
20788    #[simd_test(enable = "avx512bw,avx512vl")]
20789    unsafe fn test_mm256_mask_cvtepu8_epi16() {
20790        let src = _mm256_set1_epi16(1);
20791        let a = _mm_set1_epi8(2);
20792        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
20793        assert_eq_m256i(r, src);
20794        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
20795        let e = _mm256_set1_epi16(2);
20796        assert_eq_m256i(r, e);
20797    }
20798
20799    #[simd_test(enable = "avx512bw,avx512vl")]
20800    unsafe fn test_mm256_maskz_cvtepu8_epi16() {
20801        let a = _mm_set1_epi8(2);
20802        let r = _mm256_maskz_cvtepu8_epi16(0, a);
20803        assert_eq_m256i(r, _mm256_setzero_si256());
20804        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
20805        let e = _mm256_set1_epi16(2);
20806        assert_eq_m256i(r, e);
20807    }
20808
20809    #[simd_test(enable = "avx512bw,avx512vl")]
20810    unsafe fn test_mm_mask_cvtepu8_epi16() {
20811        let src = _mm_set1_epi16(1);
20812        let a = _mm_set1_epi8(2);
20813        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
20814        assert_eq_m128i(r, src);
20815        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
20816        let e = _mm_set1_epi16(2);
20817        assert_eq_m128i(r, e);
20818    }
20819
20820    #[simd_test(enable = "avx512bw,avx512vl")]
20821    unsafe fn test_mm_maskz_cvtepu8_epi16() {
20822        let a = _mm_set1_epi8(2);
20823        let r = _mm_maskz_cvtepu8_epi16(0, a);
20824        assert_eq_m128i(r, _mm_setzero_si128());
20825        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
20826        let e = _mm_set1_epi16(2);
20827        assert_eq_m128i(r, e);
20828    }
20829
20830    #[simd_test(enable = "avx512bw")]
20831    unsafe fn test_mm512_bslli_epi128() {
20832        #[rustfmt::skip]
20833        let a = _mm512_set_epi8(
20834            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20835            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20836            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20837            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20838        );
20839        let r = _mm512_bslli_epi128::<9>(a);
20840        #[rustfmt::skip]
20841        let e = _mm512_set_epi8(
20842            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20843            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20844            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20845            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20846        );
20847        assert_eq_m512i(r, e);
20848    }
20849
20850    #[simd_test(enable = "avx512bw")]
20851    unsafe fn test_mm512_bsrli_epi128() {
20852        #[rustfmt::skip]
20853        let a = _mm512_set_epi8(
20854            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
20855            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
20856            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
20857            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
20858        );
20859        let r = _mm512_bsrli_epi128::<3>(a);
20860        #[rustfmt::skip]
20861        let e = _mm512_set_epi8(
20862            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
20863            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
20864            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
20865            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
20866        );
20867        assert_eq_m512i(r, e);
20868    }
20869
20870    #[simd_test(enable = "avx512bw")]
20871    unsafe fn test_mm512_alignr_epi8() {
20872        #[rustfmt::skip]
20873        let a = _mm512_set_epi8(
20874            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20875            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20876            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20877            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20878        );
20879        let b = _mm512_set1_epi8(1);
20880        let r = _mm512_alignr_epi8::<14>(a, b);
20881        #[rustfmt::skip]
20882        let e = _mm512_set_epi8(
20883            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20884            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20885            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20886            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20887        );
20888        assert_eq_m512i(r, e);
20889    }
20890
20891    #[simd_test(enable = "avx512bw")]
20892    unsafe fn test_mm512_mask_alignr_epi8() {
20893        #[rustfmt::skip]
20894        let a = _mm512_set_epi8(
20895            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20896            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20897            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20898            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20899        );
20900        let b = _mm512_set1_epi8(1);
20901        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
20902        assert_eq_m512i(r, a);
20903        let r = _mm512_mask_alignr_epi8::<14>(
20904            a,
20905            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20906            a,
20907            b,
20908        );
20909        #[rustfmt::skip]
20910        let e = _mm512_set_epi8(
20911            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20912            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20913            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20914            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20915        );
20916        assert_eq_m512i(r, e);
20917    }
20918
20919    #[simd_test(enable = "avx512bw")]
20920    unsafe fn test_mm512_maskz_alignr_epi8() {
20921        #[rustfmt::skip]
20922        let a = _mm512_set_epi8(
20923            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20924            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20925            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20926            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20927        );
20928        let b = _mm512_set1_epi8(1);
20929        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
20930        assert_eq_m512i(r, _mm512_setzero_si512());
20931        let r = _mm512_maskz_alignr_epi8::<14>(
20932            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20933            a,
20934            b,
20935        );
20936        #[rustfmt::skip]
20937        let e = _mm512_set_epi8(
20938            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20939            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20940            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20941            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20942        );
20943        assert_eq_m512i(r, e);
20944    }
20945
20946    #[simd_test(enable = "avx512bw,avx512vl")]
20947    unsafe fn test_mm256_mask_alignr_epi8() {
20948        #[rustfmt::skip]
20949        let a = _mm256_set_epi8(
20950            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20951            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20952        );
20953        let b = _mm256_set1_epi8(1);
20954        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
20955        assert_eq_m256i(r, a);
20956        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
20957        #[rustfmt::skip]
20958        let e = _mm256_set_epi8(
20959            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20960            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20961        );
20962        assert_eq_m256i(r, e);
20963    }
20964
20965    #[simd_test(enable = "avx512bw,avx512vl")]
20966    unsafe fn test_mm256_maskz_alignr_epi8() {
20967        #[rustfmt::skip]
20968        let a = _mm256_set_epi8(
20969            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20970            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20971        );
20972        let b = _mm256_set1_epi8(1);
20973        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
20974        assert_eq_m256i(r, _mm256_setzero_si256());
20975        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
20976        #[rustfmt::skip]
20977        let e = _mm256_set_epi8(
20978            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20979            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20980        );
20981        assert_eq_m256i(r, e);
20982    }
20983
20984    #[simd_test(enable = "avx512bw,avx512vl")]
20985    unsafe fn test_mm_mask_alignr_epi8() {
20986        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
20987        let b = _mm_set1_epi8(1);
20988        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
20989        assert_eq_m128i(r, a);
20990        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
20991        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
20992        assert_eq_m128i(r, e);
20993    }
20994
20995    #[simd_test(enable = "avx512bw,avx512vl")]
20996    unsafe fn test_mm_maskz_alignr_epi8() {
20997        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
20998        let b = _mm_set1_epi8(1);
20999        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21000        assert_eq_m128i(r, _mm_setzero_si128());
21001        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21002        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21003        assert_eq_m128i(r, e);
21004    }
21005
21006    #[simd_test(enable = "avx512bw")]
21007    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21008        let a = _mm512_set1_epi16(i16::MAX);
21009        let mut r = _mm256_undefined_si256();
21010        _mm512_mask_cvtsepi16_storeu_epi8(
21011            &mut r as *mut _ as *mut i8,
21012            0b11111111_11111111_11111111_11111111,
21013            a,
21014        );
21015        let e = _mm256_set1_epi8(i8::MAX);
21016        assert_eq_m256i(r, e);
21017    }
21018
21019    #[simd_test(enable = "avx512bw,avx512vl")]
21020    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21021        let a = _mm256_set1_epi16(i16::MAX);
21022        let mut r = _mm_undefined_si128();
21023        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21024        let e = _mm_set1_epi8(i8::MAX);
21025        assert_eq_m128i(r, e);
21026    }
21027
21028    #[simd_test(enable = "avx512bw,avx512vl")]
21029    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
21030        let a = _mm_set1_epi16(i16::MAX);
21031        let mut r = _mm_set1_epi8(0);
21032        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21033        #[rustfmt::skip]
21034        let e = _mm_set_epi8(
21035            0, 0, 0, 0, 0, 0, 0, 0,
21036            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
21037        );
21038        assert_eq_m128i(r, e);
21039    }
21040
21041    #[simd_test(enable = "avx512bw")]
21042    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
21043        let a = _mm512_set1_epi16(8);
21044        let mut r = _mm256_undefined_si256();
21045        _mm512_mask_cvtepi16_storeu_epi8(
21046            &mut r as *mut _ as *mut i8,
21047            0b11111111_11111111_11111111_11111111,
21048            a,
21049        );
21050        let e = _mm256_set1_epi8(8);
21051        assert_eq_m256i(r, e);
21052    }
21053
21054    #[simd_test(enable = "avx512bw,avx512vl")]
21055    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
21056        let a = _mm256_set1_epi16(8);
21057        let mut r = _mm_undefined_si128();
21058        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21059        let e = _mm_set1_epi8(8);
21060        assert_eq_m128i(r, e);
21061    }
21062
21063    #[simd_test(enable = "avx512bw,avx512vl")]
21064    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
21065        let a = _mm_set1_epi16(8);
21066        let mut r = _mm_set1_epi8(0);
21067        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21068        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
21069        assert_eq_m128i(r, e);
21070    }
21071
21072    #[simd_test(enable = "avx512bw")]
21073    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
21074        let a = _mm512_set1_epi16(i16::MAX);
21075        let mut r = _mm256_undefined_si256();
21076        _mm512_mask_cvtusepi16_storeu_epi8(
21077            &mut r as *mut _ as *mut i8,
21078            0b11111111_11111111_11111111_11111111,
21079            a,
21080        );
21081        let e = _mm256_set1_epi8(u8::MAX as i8);
21082        assert_eq_m256i(r, e);
21083    }
21084
21085    #[simd_test(enable = "avx512bw,avx512vl")]
21086    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
21087        let a = _mm256_set1_epi16(i16::MAX);
21088        let mut r = _mm_undefined_si128();
21089        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21090        let e = _mm_set1_epi8(u8::MAX as i8);
21091        assert_eq_m128i(r, e);
21092    }
21093
21094    #[simd_test(enable = "avx512bw,avx512vl")]
21095    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
21096        let a = _mm_set1_epi16(i16::MAX);
21097        let mut r = _mm_set1_epi8(0);
21098        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21099        #[rustfmt::skip]
21100        let e = _mm_set_epi8(
21101            0, 0, 0, 0,
21102            0, 0, 0, 0,
21103            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, 
21104            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
21105        );
21106        assert_eq_m128i(r, e);
21107    }
21108}