core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7use core::hint::unreachable_unchecked;
8
9#[cfg(test)]
10use stdarch_test::assert_instr;
11
12/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
15#[inline]
16#[target_feature(enable = "avx512bw")]
17#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18#[cfg_attr(test, assert_instr(vpabsw))]
19pub fn _mm512_abs_epi16(a: __m512i) -> __m512i {
20    unsafe {
21        let a = a.as_i16x32();
22        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
23        transmute(simd_select(cmp, a, simd_neg(a)))
24    }
25}
26
27/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
30#[inline]
31#[target_feature(enable = "avx512bw")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpabsw))]
34pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
35    unsafe {
36        let abs = _mm512_abs_epi16(a).as_i16x32();
37        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
38    }
39}
40
41/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
42///
43/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
44#[inline]
45#[target_feature(enable = "avx512bw")]
46#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
49    unsafe {
50        let abs = _mm512_abs_epi16(a).as_i16x32();
51        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
52    }
53}
54
55/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
56///
57/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
58#[inline]
59#[target_feature(enable = "avx512bw,avx512vl")]
60#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
61#[cfg_attr(test, assert_instr(vpabsw))]
62pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
63    unsafe {
64        let abs = _mm256_abs_epi16(a).as_i16x16();
65        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
66    }
67}
68
69/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
70///
71/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
72#[inline]
73#[target_feature(enable = "avx512bw,avx512vl")]
74#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
75#[cfg_attr(test, assert_instr(vpabsw))]
76pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
77    unsafe {
78        let abs = _mm256_abs_epi16(a).as_i16x16();
79        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
80    }
81}
82
83/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
84///
85/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
86#[inline]
87#[target_feature(enable = "avx512bw,avx512vl")]
88#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
89#[cfg_attr(test, assert_instr(vpabsw))]
90pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
91    unsafe {
92        let abs = _mm_abs_epi16(a).as_i16x8();
93        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
94    }
95}
96
97/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
98///
99/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
100#[inline]
101#[target_feature(enable = "avx512bw,avx512vl")]
102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
103#[cfg_attr(test, assert_instr(vpabsw))]
104pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
105    unsafe {
106        let abs = _mm_abs_epi16(a).as_i16x8();
107        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
108    }
109}
110
111/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
112///
113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
114#[inline]
115#[target_feature(enable = "avx512bw")]
116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
117#[cfg_attr(test, assert_instr(vpabsb))]
118pub fn _mm512_abs_epi8(a: __m512i) -> __m512i {
119    unsafe {
120        let a = a.as_i8x64();
121        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
122        transmute(simd_select(cmp, a, simd_neg(a)))
123    }
124}
125
126/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
127///
128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
129#[inline]
130#[target_feature(enable = "avx512bw")]
131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
132#[cfg_attr(test, assert_instr(vpabsb))]
133pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
134    unsafe {
135        let abs = _mm512_abs_epi8(a).as_i8x64();
136        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
137    }
138}
139
140/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
141///
142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
143#[inline]
144#[target_feature(enable = "avx512bw")]
145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
146#[cfg_attr(test, assert_instr(vpabsb))]
147pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
148    unsafe {
149        let abs = _mm512_abs_epi8(a).as_i8x64();
150        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
151    }
152}
153
154/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
157#[inline]
158#[target_feature(enable = "avx512bw,avx512vl")]
159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
160#[cfg_attr(test, assert_instr(vpabsb))]
161pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
162    unsafe {
163        let abs = _mm256_abs_epi8(a).as_i8x32();
164        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
165    }
166}
167
168/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
169///
170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
171#[inline]
172#[target_feature(enable = "avx512bw,avx512vl")]
173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
174#[cfg_attr(test, assert_instr(vpabsb))]
175pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
176    unsafe {
177        let abs = _mm256_abs_epi8(a).as_i8x32();
178        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
179    }
180}
181
182/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
183///
184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
185#[inline]
186#[target_feature(enable = "avx512bw,avx512vl")]
187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
188#[cfg_attr(test, assert_instr(vpabsb))]
189pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
190    unsafe {
191        let abs = _mm_abs_epi8(a).as_i8x16();
192        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
193    }
194}
195
196/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
199#[inline]
200#[target_feature(enable = "avx512bw,avx512vl")]
201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
202#[cfg_attr(test, assert_instr(vpabsb))]
203pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
204    unsafe {
205        let abs = _mm_abs_epi8(a).as_i8x16();
206        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
207    }
208}
209
210/// Add packed 16-bit integers in a and b, and store the results in dst.
211///
212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
213#[inline]
214#[target_feature(enable = "avx512bw")]
215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
216#[cfg_attr(test, assert_instr(vpaddw))]
217pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
218    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
219}
220
221/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
222///
223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
224#[inline]
225#[target_feature(enable = "avx512bw")]
226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
227#[cfg_attr(test, assert_instr(vpaddw))]
228pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
229    unsafe {
230        let add = _mm512_add_epi16(a, b).as_i16x32();
231        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
232    }
233}
234
235/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
238#[inline]
239#[target_feature(enable = "avx512bw")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(test, assert_instr(vpaddw))]
242pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
243    unsafe {
244        let add = _mm512_add_epi16(a, b).as_i16x32();
245        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
246    }
247}
248
249/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
252#[inline]
253#[target_feature(enable = "avx512bw,avx512vl")]
254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
255#[cfg_attr(test, assert_instr(vpaddw))]
256pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
257    unsafe {
258        let add = _mm256_add_epi16(a, b).as_i16x16();
259        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
260    }
261}
262
263/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
264///
265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
266#[inline]
267#[target_feature(enable = "avx512bw,avx512vl")]
268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
269#[cfg_attr(test, assert_instr(vpaddw))]
270pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
271    unsafe {
272        let add = _mm256_add_epi16(a, b).as_i16x16();
273        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
274    }
275}
276
277/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
278///
279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
280#[inline]
281#[target_feature(enable = "avx512bw,avx512vl")]
282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
283#[cfg_attr(test, assert_instr(vpaddw))]
284pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
285    unsafe {
286        let add = _mm_add_epi16(a, b).as_i16x8();
287        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
288    }
289}
290
291/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
292///
293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
294#[inline]
295#[target_feature(enable = "avx512bw,avx512vl")]
296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
297#[cfg_attr(test, assert_instr(vpaddw))]
298pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
299    unsafe {
300        let add = _mm_add_epi16(a, b).as_i16x8();
301        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
302    }
303}
304
305/// Add packed 8-bit integers in a and b, and store the results in dst.
306///
307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
308#[inline]
309#[target_feature(enable = "avx512bw")]
310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
311#[cfg_attr(test, assert_instr(vpaddb))]
312pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
313    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
314}
315
316/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
317///
318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
319#[inline]
320#[target_feature(enable = "avx512bw")]
321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
322#[cfg_attr(test, assert_instr(vpaddb))]
323pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
324    unsafe {
325        let add = _mm512_add_epi8(a, b).as_i8x64();
326        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
327    }
328}
329
330/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
333#[inline]
334#[target_feature(enable = "avx512bw")]
335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
336#[cfg_attr(test, assert_instr(vpaddb))]
337pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
338    unsafe {
339        let add = _mm512_add_epi8(a, b).as_i8x64();
340        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
341    }
342}
343
344/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
345///
346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
347#[inline]
348#[target_feature(enable = "avx512bw,avx512vl")]
349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
350#[cfg_attr(test, assert_instr(vpaddb))]
351pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
352    unsafe {
353        let add = _mm256_add_epi8(a, b).as_i8x32();
354        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
355    }
356}
357
358/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
359///
360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
361#[inline]
362#[target_feature(enable = "avx512bw,avx512vl")]
363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
364#[cfg_attr(test, assert_instr(vpaddb))]
365pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
366    unsafe {
367        let add = _mm256_add_epi8(a, b).as_i8x32();
368        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
369    }
370}
371
372/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
373///
374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
375#[inline]
376#[target_feature(enable = "avx512bw,avx512vl")]
377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
378#[cfg_attr(test, assert_instr(vpaddb))]
379pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
380    unsafe {
381        let add = _mm_add_epi8(a, b).as_i8x16();
382        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
383    }
384}
385
386/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
387///
388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
389#[inline]
390#[target_feature(enable = "avx512bw,avx512vl")]
391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
392#[cfg_attr(test, assert_instr(vpaddb))]
393pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
394    unsafe {
395        let add = _mm_add_epi8(a, b).as_i8x16();
396        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
397    }
398}
399
400/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
401///
402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
403#[inline]
404#[target_feature(enable = "avx512bw")]
405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
406#[cfg_attr(test, assert_instr(vpaddusw))]
407pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
408    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
409}
410
411/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
412///
413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
414#[inline]
415#[target_feature(enable = "avx512bw")]
416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
417#[cfg_attr(test, assert_instr(vpaddusw))]
418pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
419    unsafe {
420        let add = _mm512_adds_epu16(a, b).as_u16x32();
421        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
422    }
423}
424
425/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
426///
427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
428#[inline]
429#[target_feature(enable = "avx512bw")]
430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
431#[cfg_attr(test, assert_instr(vpaddusw))]
432pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
433    unsafe {
434        let add = _mm512_adds_epu16(a, b).as_u16x32();
435        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
436    }
437}
438
439/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
440///
441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
442#[inline]
443#[target_feature(enable = "avx512bw,avx512vl")]
444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
445#[cfg_attr(test, assert_instr(vpaddusw))]
446pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
447    unsafe {
448        let add = _mm256_adds_epu16(a, b).as_u16x16();
449        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
450    }
451}
452
453/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
456#[inline]
457#[target_feature(enable = "avx512bw,avx512vl")]
458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
459#[cfg_attr(test, assert_instr(vpaddusw))]
460pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
461    unsafe {
462        let add = _mm256_adds_epu16(a, b).as_u16x16();
463        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
464    }
465}
466
467/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
468///
469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
470#[inline]
471#[target_feature(enable = "avx512bw,avx512vl")]
472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
473#[cfg_attr(test, assert_instr(vpaddusw))]
474pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
475    unsafe {
476        let add = _mm_adds_epu16(a, b).as_u16x8();
477        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
478    }
479}
480
481/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
482///
483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
484#[inline]
485#[target_feature(enable = "avx512bw,avx512vl")]
486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
487#[cfg_attr(test, assert_instr(vpaddusw))]
488pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
489    unsafe {
490        let add = _mm_adds_epu16(a, b).as_u16x8();
491        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
492    }
493}
494
495/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
498#[inline]
499#[target_feature(enable = "avx512bw")]
500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
501#[cfg_attr(test, assert_instr(vpaddusb))]
502pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
503    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
504}
505
506/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
507///
508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
509#[inline]
510#[target_feature(enable = "avx512bw")]
511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
512#[cfg_attr(test, assert_instr(vpaddusb))]
513pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
514    unsafe {
515        let add = _mm512_adds_epu8(a, b).as_u8x64();
516        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
517    }
518}
519
520/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
521///
522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
523#[inline]
524#[target_feature(enable = "avx512bw")]
525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
526#[cfg_attr(test, assert_instr(vpaddusb))]
527pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
528    unsafe {
529        let add = _mm512_adds_epu8(a, b).as_u8x64();
530        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
531    }
532}
533
534/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
537#[inline]
538#[target_feature(enable = "avx512bw,avx512vl")]
539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
540#[cfg_attr(test, assert_instr(vpaddusb))]
541pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
542    unsafe {
543        let add = _mm256_adds_epu8(a, b).as_u8x32();
544        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
545    }
546}
547
548/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
549///
550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
551#[inline]
552#[target_feature(enable = "avx512bw,avx512vl")]
553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
554#[cfg_attr(test, assert_instr(vpaddusb))]
555pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
556    unsafe {
557        let add = _mm256_adds_epu8(a, b).as_u8x32();
558        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
559    }
560}
561
562/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
563///
564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
565#[inline]
566#[target_feature(enable = "avx512bw,avx512vl")]
567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
568#[cfg_attr(test, assert_instr(vpaddusb))]
569pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
570    unsafe {
571        let add = _mm_adds_epu8(a, b).as_u8x16();
572        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
573    }
574}
575
576/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
577///
578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
579#[inline]
580#[target_feature(enable = "avx512bw,avx512vl")]
581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
582#[cfg_attr(test, assert_instr(vpaddusb))]
583pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
584    unsafe {
585        let add = _mm_adds_epu8(a, b).as_u8x16();
586        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
587    }
588}
589
590/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
591///
592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
593#[inline]
594#[target_feature(enable = "avx512bw")]
595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
596#[cfg_attr(test, assert_instr(vpaddsw))]
597pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
598    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
599}
600
601/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
602///
603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
604#[inline]
605#[target_feature(enable = "avx512bw")]
606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
607#[cfg_attr(test, assert_instr(vpaddsw))]
608pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
609    unsafe {
610        let add = _mm512_adds_epi16(a, b).as_i16x32();
611        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
612    }
613}
614
615/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
616///
617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
618#[inline]
619#[target_feature(enable = "avx512bw")]
620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
621#[cfg_attr(test, assert_instr(vpaddsw))]
622pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
623    unsafe {
624        let add = _mm512_adds_epi16(a, b).as_i16x32();
625        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
626    }
627}
628
629/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
630///
631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
632#[inline]
633#[target_feature(enable = "avx512bw,avx512vl")]
634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
635#[cfg_attr(test, assert_instr(vpaddsw))]
636pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
637    unsafe {
638        let add = _mm256_adds_epi16(a, b).as_i16x16();
639        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
640    }
641}
642
643/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
644///
645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
646#[inline]
647#[target_feature(enable = "avx512bw,avx512vl")]
648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
649#[cfg_attr(test, assert_instr(vpaddsw))]
650pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
651    unsafe {
652        let add = _mm256_adds_epi16(a, b).as_i16x16();
653        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
654    }
655}
656
657/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
660#[inline]
661#[target_feature(enable = "avx512bw,avx512vl")]
662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
663#[cfg_attr(test, assert_instr(vpaddsw))]
664pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
665    unsafe {
666        let add = _mm_adds_epi16(a, b).as_i16x8();
667        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
668    }
669}
670
671/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
672///
673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
674#[inline]
675#[target_feature(enable = "avx512bw,avx512vl")]
676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
677#[cfg_attr(test, assert_instr(vpaddsw))]
678pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
679    unsafe {
680        let add = _mm_adds_epi16(a, b).as_i16x8();
681        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
682    }
683}
684
685/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
686///
687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
688#[inline]
689#[target_feature(enable = "avx512bw")]
690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
691#[cfg_attr(test, assert_instr(vpaddsb))]
692pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
693    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
694}
695
696/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
697///
698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
699#[inline]
700#[target_feature(enable = "avx512bw")]
701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
702#[cfg_attr(test, assert_instr(vpaddsb))]
703pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
704    unsafe {
705        let add = _mm512_adds_epi8(a, b).as_i8x64();
706        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
707    }
708}
709
710/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
711///
712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
713#[inline]
714#[target_feature(enable = "avx512bw")]
715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
716#[cfg_attr(test, assert_instr(vpaddsb))]
717pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
718    unsafe {
719        let add = _mm512_adds_epi8(a, b).as_i8x64();
720        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
721    }
722}
723
724/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
725///
726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
727#[inline]
728#[target_feature(enable = "avx512bw,avx512vl")]
729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
730#[cfg_attr(test, assert_instr(vpaddsb))]
731pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
732    unsafe {
733        let add = _mm256_adds_epi8(a, b).as_i8x32();
734        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
735    }
736}
737
738/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
739///
740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
741#[inline]
742#[target_feature(enable = "avx512bw,avx512vl")]
743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
744#[cfg_attr(test, assert_instr(vpaddsb))]
745pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
746    unsafe {
747        let add = _mm256_adds_epi8(a, b).as_i8x32();
748        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
749    }
750}
751
752/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
753///
754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
755#[inline]
756#[target_feature(enable = "avx512bw,avx512vl")]
757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
758#[cfg_attr(test, assert_instr(vpaddsb))]
759pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
760    unsafe {
761        let add = _mm_adds_epi8(a, b).as_i8x16();
762        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
763    }
764}
765
766/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
767///
768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
769#[inline]
770#[target_feature(enable = "avx512bw,avx512vl")]
771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
772#[cfg_attr(test, assert_instr(vpaddsb))]
773pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
774    unsafe {
775        let add = _mm_adds_epi8(a, b).as_i8x16();
776        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
777    }
778}
779
780/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
781///
782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
783#[inline]
784#[target_feature(enable = "avx512bw")]
785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
786#[cfg_attr(test, assert_instr(vpsubw))]
787pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
788    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
789}
790
791/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
792///
793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
794#[inline]
795#[target_feature(enable = "avx512bw")]
796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
797#[cfg_attr(test, assert_instr(vpsubw))]
798pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
799    unsafe {
800        let sub = _mm512_sub_epi16(a, b).as_i16x32();
801        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
802    }
803}
804
805/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
806///
807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
808#[inline]
809#[target_feature(enable = "avx512bw")]
810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
811#[cfg_attr(test, assert_instr(vpsubw))]
812pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
813    unsafe {
814        let sub = _mm512_sub_epi16(a, b).as_i16x32();
815        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
816    }
817}
818
819/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
820///
821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
822#[inline]
823#[target_feature(enable = "avx512bw,avx512vl")]
824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
825#[cfg_attr(test, assert_instr(vpsubw))]
826pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
827    unsafe {
828        let sub = _mm256_sub_epi16(a, b).as_i16x16();
829        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
830    }
831}
832
833/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
834///
835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
836#[inline]
837#[target_feature(enable = "avx512bw,avx512vl")]
838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
839#[cfg_attr(test, assert_instr(vpsubw))]
840pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
841    unsafe {
842        let sub = _mm256_sub_epi16(a, b).as_i16x16();
843        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
844    }
845}
846
847/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
848///
849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
850#[inline]
851#[target_feature(enable = "avx512bw,avx512vl")]
852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
853#[cfg_attr(test, assert_instr(vpsubw))]
854pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
855    unsafe {
856        let sub = _mm_sub_epi16(a, b).as_i16x8();
857        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
858    }
859}
860
861/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
864#[inline]
865#[target_feature(enable = "avx512bw,avx512vl")]
866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
867#[cfg_attr(test, assert_instr(vpsubw))]
868pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
869    unsafe {
870        let sub = _mm_sub_epi16(a, b).as_i16x8();
871        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
872    }
873}
874
875/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
876///
877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
878#[inline]
879#[target_feature(enable = "avx512bw")]
880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
881#[cfg_attr(test, assert_instr(vpsubb))]
882pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
883    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
884}
885
886/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
887///
888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
889#[inline]
890#[target_feature(enable = "avx512bw")]
891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
892#[cfg_attr(test, assert_instr(vpsubb))]
893pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
894    unsafe {
895        let sub = _mm512_sub_epi8(a, b).as_i8x64();
896        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
897    }
898}
899
900/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
901///
902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
903#[inline]
904#[target_feature(enable = "avx512bw")]
905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
906#[cfg_attr(test, assert_instr(vpsubb))]
907pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
908    unsafe {
909        let sub = _mm512_sub_epi8(a, b).as_i8x64();
910        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
911    }
912}
913
914/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
915///
916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
917#[inline]
918#[target_feature(enable = "avx512bw,avx512vl")]
919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
920#[cfg_attr(test, assert_instr(vpsubb))]
921pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
922    unsafe {
923        let sub = _mm256_sub_epi8(a, b).as_i8x32();
924        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
925    }
926}
927
928/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
929///
930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
931#[inline]
932#[target_feature(enable = "avx512bw,avx512vl")]
933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
934#[cfg_attr(test, assert_instr(vpsubb))]
935pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
936    unsafe {
937        let sub = _mm256_sub_epi8(a, b).as_i8x32();
938        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
939    }
940}
941
942/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
943///
944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
945#[inline]
946#[target_feature(enable = "avx512bw,avx512vl")]
947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
948#[cfg_attr(test, assert_instr(vpsubb))]
949pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
950    unsafe {
951        let sub = _mm_sub_epi8(a, b).as_i8x16();
952        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
953    }
954}
955
956/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
957///
958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
959#[inline]
960#[target_feature(enable = "avx512bw,avx512vl")]
961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
962#[cfg_attr(test, assert_instr(vpsubb))]
963pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
964    unsafe {
965        let sub = _mm_sub_epi8(a, b).as_i8x16();
966        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
967    }
968}
969
970/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
971///
972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
973#[inline]
974#[target_feature(enable = "avx512bw")]
975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
976#[cfg_attr(test, assert_instr(vpsubusw))]
977pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
978    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
979}
980
981/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
982///
983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
984#[inline]
985#[target_feature(enable = "avx512bw")]
986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
987#[cfg_attr(test, assert_instr(vpsubusw))]
988pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
989    unsafe {
990        let sub = _mm512_subs_epu16(a, b).as_u16x32();
991        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
992    }
993}
994
995/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
996///
997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
998#[inline]
999#[target_feature(enable = "avx512bw")]
1000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1001#[cfg_attr(test, assert_instr(vpsubusw))]
1002pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1003    unsafe {
1004        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1005        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1006    }
1007}
1008
1009/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1010///
1011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1012#[inline]
1013#[target_feature(enable = "avx512bw,avx512vl")]
1014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1015#[cfg_attr(test, assert_instr(vpsubusw))]
1016pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1017    unsafe {
1018        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1019        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1020    }
1021}
1022
1023/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1026#[inline]
1027#[target_feature(enable = "avx512bw,avx512vl")]
1028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1029#[cfg_attr(test, assert_instr(vpsubusw))]
1030pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1031    unsafe {
1032        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1033        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1034    }
1035}
1036
1037/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1038///
1039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1040#[inline]
1041#[target_feature(enable = "avx512bw,avx512vl")]
1042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1043#[cfg_attr(test, assert_instr(vpsubusw))]
1044pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1045    unsafe {
1046        let sub = _mm_subs_epu16(a, b).as_u16x8();
1047        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1048    }
1049}
1050
1051/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1052///
1053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1054#[inline]
1055#[target_feature(enable = "avx512bw,avx512vl")]
1056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1057#[cfg_attr(test, assert_instr(vpsubusw))]
1058pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1059    unsafe {
1060        let sub = _mm_subs_epu16(a, b).as_u16x8();
1061        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1062    }
1063}
1064
1065/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1066///
1067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1068#[inline]
1069#[target_feature(enable = "avx512bw")]
1070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1071#[cfg_attr(test, assert_instr(vpsubusb))]
1072pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1073    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1074}
1075
1076/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1077///
1078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1079#[inline]
1080#[target_feature(enable = "avx512bw")]
1081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1082#[cfg_attr(test, assert_instr(vpsubusb))]
1083pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1084    unsafe {
1085        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1086        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1087    }
1088}
1089
1090/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1091///
1092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1093#[inline]
1094#[target_feature(enable = "avx512bw")]
1095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1096#[cfg_attr(test, assert_instr(vpsubusb))]
1097pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1098    unsafe {
1099        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1100        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1101    }
1102}
1103
1104/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1105///
1106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1107#[inline]
1108#[target_feature(enable = "avx512bw,avx512vl")]
1109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1110#[cfg_attr(test, assert_instr(vpsubusb))]
1111pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1112    unsafe {
1113        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1114        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1115    }
1116}
1117
1118/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1119///
1120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1121#[inline]
1122#[target_feature(enable = "avx512bw,avx512vl")]
1123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1124#[cfg_attr(test, assert_instr(vpsubusb))]
1125pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1126    unsafe {
1127        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1128        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1129    }
1130}
1131
1132/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1133///
1134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1135#[inline]
1136#[target_feature(enable = "avx512bw,avx512vl")]
1137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1138#[cfg_attr(test, assert_instr(vpsubusb))]
1139pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1140    unsafe {
1141        let sub = _mm_subs_epu8(a, b).as_u8x16();
1142        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1143    }
1144}
1145
1146/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1147///
1148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1149#[inline]
1150#[target_feature(enable = "avx512bw,avx512vl")]
1151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1152#[cfg_attr(test, assert_instr(vpsubusb))]
1153pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1154    unsafe {
1155        let sub = _mm_subs_epu8(a, b).as_u8x16();
1156        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1157    }
1158}
1159
1160/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1161///
1162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1163#[inline]
1164#[target_feature(enable = "avx512bw")]
1165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1166#[cfg_attr(test, assert_instr(vpsubsw))]
1167pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1168    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1169}
1170
1171/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1172///
1173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1174#[inline]
1175#[target_feature(enable = "avx512bw")]
1176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1177#[cfg_attr(test, assert_instr(vpsubsw))]
1178pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1179    unsafe {
1180        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1181        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1182    }
1183}
1184
1185/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1186///
1187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1188#[inline]
1189#[target_feature(enable = "avx512bw")]
1190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1191#[cfg_attr(test, assert_instr(vpsubsw))]
1192pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1193    unsafe {
1194        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1195        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1196    }
1197}
1198
1199/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1200///
1201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1202#[inline]
1203#[target_feature(enable = "avx512bw,avx512vl")]
1204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1205#[cfg_attr(test, assert_instr(vpsubsw))]
1206pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1207    unsafe {
1208        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1209        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1210    }
1211}
1212
1213/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1214///
1215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1216#[inline]
1217#[target_feature(enable = "avx512bw,avx512vl")]
1218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1219#[cfg_attr(test, assert_instr(vpsubsw))]
1220pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1221    unsafe {
1222        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1223        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1224    }
1225}
1226
1227/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1228///
1229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1230#[inline]
1231#[target_feature(enable = "avx512bw,avx512vl")]
1232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1233#[cfg_attr(test, assert_instr(vpsubsw))]
1234pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1235    unsafe {
1236        let sub = _mm_subs_epi16(a, b).as_i16x8();
1237        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1238    }
1239}
1240
1241/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1242///
1243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1244#[inline]
1245#[target_feature(enable = "avx512bw,avx512vl")]
1246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1247#[cfg_attr(test, assert_instr(vpsubsw))]
1248pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1249    unsafe {
1250        let sub = _mm_subs_epi16(a, b).as_i16x8();
1251        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1252    }
1253}
1254
1255/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1256///
1257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1258#[inline]
1259#[target_feature(enable = "avx512bw")]
1260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1261#[cfg_attr(test, assert_instr(vpsubsb))]
1262pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1263    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1264}
1265
1266/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1267///
1268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1269#[inline]
1270#[target_feature(enable = "avx512bw")]
1271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1272#[cfg_attr(test, assert_instr(vpsubsb))]
1273pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1274    unsafe {
1275        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1276        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1277    }
1278}
1279
1280/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1281///
1282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1283#[inline]
1284#[target_feature(enable = "avx512bw")]
1285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1286#[cfg_attr(test, assert_instr(vpsubsb))]
1287pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1288    unsafe {
1289        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1290        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1291    }
1292}
1293
1294/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1295///
1296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1297#[inline]
1298#[target_feature(enable = "avx512bw,avx512vl")]
1299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1300#[cfg_attr(test, assert_instr(vpsubsb))]
1301pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1302    unsafe {
1303        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1304        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1305    }
1306}
1307
1308/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1309///
1310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1311#[inline]
1312#[target_feature(enable = "avx512bw,avx512vl")]
1313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1314#[cfg_attr(test, assert_instr(vpsubsb))]
1315pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1316    unsafe {
1317        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1318        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1319    }
1320}
1321
1322/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1323///
1324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1325#[inline]
1326#[target_feature(enable = "avx512bw,avx512vl")]
1327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1328#[cfg_attr(test, assert_instr(vpsubsb))]
1329pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1330    unsafe {
1331        let sub = _mm_subs_epi8(a, b).as_i8x16();
1332        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1333    }
1334}
1335
1336/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1337///
1338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1339#[inline]
1340#[target_feature(enable = "avx512bw,avx512vl")]
1341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1342#[cfg_attr(test, assert_instr(vpsubsb))]
1343pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1344    unsafe {
1345        let sub = _mm_subs_epi8(a, b).as_i8x16();
1346        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1347    }
1348}
1349
1350/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1351///
1352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1353#[inline]
1354#[target_feature(enable = "avx512bw")]
1355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1356#[cfg_attr(test, assert_instr(vpmulhuw))]
1357pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1358    unsafe {
1359        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1360        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1361        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1362        transmute(simd_cast::<u32x32, u16x32>(r))
1363    }
1364}
1365
1366/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1367///
1368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1369#[inline]
1370#[target_feature(enable = "avx512bw")]
1371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1372#[cfg_attr(test, assert_instr(vpmulhuw))]
1373pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1374    unsafe {
1375        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1376        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1377    }
1378}
1379
1380/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1381///
1382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1383#[inline]
1384#[target_feature(enable = "avx512bw")]
1385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1386#[cfg_attr(test, assert_instr(vpmulhuw))]
1387pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1388    unsafe {
1389        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1390        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1391    }
1392}
1393
1394/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1395///
1396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1397#[inline]
1398#[target_feature(enable = "avx512bw,avx512vl")]
1399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1400#[cfg_attr(test, assert_instr(vpmulhuw))]
1401pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1402    unsafe {
1403        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1404        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1405    }
1406}
1407
1408/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1409///
1410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1411#[inline]
1412#[target_feature(enable = "avx512bw,avx512vl")]
1413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1414#[cfg_attr(test, assert_instr(vpmulhuw))]
1415pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1416    unsafe {
1417        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1418        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1419    }
1420}
1421
1422/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1423///
1424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1425#[inline]
1426#[target_feature(enable = "avx512bw,avx512vl")]
1427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1428#[cfg_attr(test, assert_instr(vpmulhuw))]
1429pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1430    unsafe {
1431        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1432        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1433    }
1434}
1435
1436/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1437///
1438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1439#[inline]
1440#[target_feature(enable = "avx512bw,avx512vl")]
1441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1442#[cfg_attr(test, assert_instr(vpmulhuw))]
1443pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1444    unsafe {
1445        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1446        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1447    }
1448}
1449
1450/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1451///
1452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1453#[inline]
1454#[target_feature(enable = "avx512bw")]
1455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1456#[cfg_attr(test, assert_instr(vpmulhw))]
1457pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1458    unsafe {
1459        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1460        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1461        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1462        transmute(simd_cast::<i32x32, i16x32>(r))
1463    }
1464}
1465
1466/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1467///
1468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1469#[inline]
1470#[target_feature(enable = "avx512bw")]
1471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1472#[cfg_attr(test, assert_instr(vpmulhw))]
1473pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1474    unsafe {
1475        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1476        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1477    }
1478}
1479
1480/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1481///
1482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1483#[inline]
1484#[target_feature(enable = "avx512bw")]
1485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1486#[cfg_attr(test, assert_instr(vpmulhw))]
1487pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1488    unsafe {
1489        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1490        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1491    }
1492}
1493
1494/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1495///
1496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1497#[inline]
1498#[target_feature(enable = "avx512bw,avx512vl")]
1499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1500#[cfg_attr(test, assert_instr(vpmulhw))]
1501pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1502    unsafe {
1503        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1504        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1505    }
1506}
1507
1508/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1509///
1510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1511#[inline]
1512#[target_feature(enable = "avx512bw,avx512vl")]
1513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1514#[cfg_attr(test, assert_instr(vpmulhw))]
1515pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1516    unsafe {
1517        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1518        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1519    }
1520}
1521
1522/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1523///
1524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1525#[inline]
1526#[target_feature(enable = "avx512bw,avx512vl")]
1527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1528#[cfg_attr(test, assert_instr(vpmulhw))]
1529pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1530    unsafe {
1531        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1532        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1533    }
1534}
1535
1536/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1537///
1538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1539#[inline]
1540#[target_feature(enable = "avx512bw,avx512vl")]
1541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1542#[cfg_attr(test, assert_instr(vpmulhw))]
1543pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1544    unsafe {
1545        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1546        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1547    }
1548}
1549
1550/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1551///
1552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1553#[inline]
1554#[target_feature(enable = "avx512bw")]
1555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1556#[cfg_attr(test, assert_instr(vpmulhrsw))]
1557pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1558    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1559}
1560
1561/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1562///
1563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1564#[inline]
1565#[target_feature(enable = "avx512bw")]
1566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1567#[cfg_attr(test, assert_instr(vpmulhrsw))]
1568pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1569    unsafe {
1570        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1571        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1572    }
1573}
1574
1575/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1576///
1577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1578#[inline]
1579#[target_feature(enable = "avx512bw")]
1580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1581#[cfg_attr(test, assert_instr(vpmulhrsw))]
1582pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1583    unsafe {
1584        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1585        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1586    }
1587}
1588
1589/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1590///
1591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1592#[inline]
1593#[target_feature(enable = "avx512bw,avx512vl")]
1594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1595#[cfg_attr(test, assert_instr(vpmulhrsw))]
1596pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1597    unsafe {
1598        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1599        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1600    }
1601}
1602
1603/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1604///
1605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1606#[inline]
1607#[target_feature(enable = "avx512bw,avx512vl")]
1608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1609#[cfg_attr(test, assert_instr(vpmulhrsw))]
1610pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1611    unsafe {
1612        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1613        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1614    }
1615}
1616
1617/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1618///
1619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1620#[inline]
1621#[target_feature(enable = "avx512bw,avx512vl")]
1622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1623#[cfg_attr(test, assert_instr(vpmulhrsw))]
1624pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1625    unsafe {
1626        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1627        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1628    }
1629}
1630
1631/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1632///
1633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1634#[inline]
1635#[target_feature(enable = "avx512bw,avx512vl")]
1636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1637#[cfg_attr(test, assert_instr(vpmulhrsw))]
1638pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1639    unsafe {
1640        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1641        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1642    }
1643}
1644
1645/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1646///
1647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1648#[inline]
1649#[target_feature(enable = "avx512bw")]
1650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1651#[cfg_attr(test, assert_instr(vpmullw))]
1652pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1653    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1654}
1655
1656/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1657///
1658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1659#[inline]
1660#[target_feature(enable = "avx512bw")]
1661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1662#[cfg_attr(test, assert_instr(vpmullw))]
1663pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1664    unsafe {
1665        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1666        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1667    }
1668}
1669
1670/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1671///
1672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1673#[inline]
1674#[target_feature(enable = "avx512bw")]
1675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1676#[cfg_attr(test, assert_instr(vpmullw))]
1677pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1678    unsafe {
1679        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1680        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1681    }
1682}
1683
1684/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1685///
1686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1687#[inline]
1688#[target_feature(enable = "avx512bw,avx512vl")]
1689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1690#[cfg_attr(test, assert_instr(vpmullw))]
1691pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1692    unsafe {
1693        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1694        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1695    }
1696}
1697
1698/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1699///
1700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1701#[inline]
1702#[target_feature(enable = "avx512bw,avx512vl")]
1703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1704#[cfg_attr(test, assert_instr(vpmullw))]
1705pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1706    unsafe {
1707        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1708        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1709    }
1710}
1711
1712/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1713///
1714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1715#[inline]
1716#[target_feature(enable = "avx512bw,avx512vl")]
1717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1718#[cfg_attr(test, assert_instr(vpmullw))]
1719pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1720    unsafe {
1721        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1722        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1723    }
1724}
1725
1726/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1727///
1728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1729#[inline]
1730#[target_feature(enable = "avx512bw,avx512vl")]
1731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1732#[cfg_attr(test, assert_instr(vpmullw))]
1733pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1734    unsafe {
1735        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1736        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1737    }
1738}
1739
1740/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1741///
1742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1743#[inline]
1744#[target_feature(enable = "avx512bw")]
1745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1746#[cfg_attr(test, assert_instr(vpmaxuw))]
1747pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1748    unsafe {
1749        let a = a.as_u16x32();
1750        let b = b.as_u16x32();
1751        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1752    }
1753}
1754
1755/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1756///
1757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1758#[inline]
1759#[target_feature(enable = "avx512bw")]
1760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1761#[cfg_attr(test, assert_instr(vpmaxuw))]
1762pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1763    unsafe {
1764        let max = _mm512_max_epu16(a, b).as_u16x32();
1765        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1766    }
1767}
1768
1769/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1770///
1771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1772#[inline]
1773#[target_feature(enable = "avx512bw")]
1774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1775#[cfg_attr(test, assert_instr(vpmaxuw))]
1776pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1777    unsafe {
1778        let max = _mm512_max_epu16(a, b).as_u16x32();
1779        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1780    }
1781}
1782
1783/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1784///
1785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1786#[inline]
1787#[target_feature(enable = "avx512bw,avx512vl")]
1788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1789#[cfg_attr(test, assert_instr(vpmaxuw))]
1790pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1791    unsafe {
1792        let max = _mm256_max_epu16(a, b).as_u16x16();
1793        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1794    }
1795}
1796
1797/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1798///
1799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1800#[inline]
1801#[target_feature(enable = "avx512bw,avx512vl")]
1802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1803#[cfg_attr(test, assert_instr(vpmaxuw))]
1804pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1805    unsafe {
1806        let max = _mm256_max_epu16(a, b).as_u16x16();
1807        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1808    }
1809}
1810
1811/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1812///
1813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1814#[inline]
1815#[target_feature(enable = "avx512bw,avx512vl")]
1816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1817#[cfg_attr(test, assert_instr(vpmaxuw))]
1818pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1819    unsafe {
1820        let max = _mm_max_epu16(a, b).as_u16x8();
1821        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1822    }
1823}
1824
1825/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1826///
1827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1828#[inline]
1829#[target_feature(enable = "avx512bw,avx512vl")]
1830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1831#[cfg_attr(test, assert_instr(vpmaxuw))]
1832pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1833    unsafe {
1834        let max = _mm_max_epu16(a, b).as_u16x8();
1835        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1836    }
1837}
1838
1839/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1840///
1841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1842#[inline]
1843#[target_feature(enable = "avx512bw")]
1844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1845#[cfg_attr(test, assert_instr(vpmaxub))]
1846pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1847    unsafe {
1848        let a = a.as_u8x64();
1849        let b = b.as_u8x64();
1850        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1851    }
1852}
1853
1854/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
1857#[inline]
1858#[target_feature(enable = "avx512bw")]
1859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1860#[cfg_attr(test, assert_instr(vpmaxub))]
1861pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1862    unsafe {
1863        let max = _mm512_max_epu8(a, b).as_u8x64();
1864        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1865    }
1866}
1867
1868/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
1871#[inline]
1872#[target_feature(enable = "avx512bw")]
1873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874#[cfg_attr(test, assert_instr(vpmaxub))]
1875pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1876    unsafe {
1877        let max = _mm512_max_epu8(a, b).as_u8x64();
1878        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
1879    }
1880}
1881
1882/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
1885#[inline]
1886#[target_feature(enable = "avx512bw,avx512vl")]
1887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1888#[cfg_attr(test, assert_instr(vpmaxub))]
1889pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1890    unsafe {
1891        let max = _mm256_max_epu8(a, b).as_u8x32();
1892        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1893    }
1894}
1895
1896/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
1899#[inline]
1900#[target_feature(enable = "avx512bw,avx512vl")]
1901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902#[cfg_attr(test, assert_instr(vpmaxub))]
1903pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1904    unsafe {
1905        let max = _mm256_max_epu8(a, b).as_u8x32();
1906        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
1907    }
1908}
1909
1910/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1911///
1912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
1913#[inline]
1914#[target_feature(enable = "avx512bw,avx512vl")]
1915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1916#[cfg_attr(test, assert_instr(vpmaxub))]
1917pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1918    unsafe {
1919        let max = _mm_max_epu8(a, b).as_u8x16();
1920        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1921    }
1922}
1923
1924/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1925///
1926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
1927#[inline]
1928#[target_feature(enable = "avx512bw,avx512vl")]
1929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1930#[cfg_attr(test, assert_instr(vpmaxub))]
1931pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1932    unsafe {
1933        let max = _mm_max_epu8(a, b).as_u8x16();
1934        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
1935    }
1936}
1937
1938/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1939///
1940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
1941#[inline]
1942#[target_feature(enable = "avx512bw")]
1943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1944#[cfg_attr(test, assert_instr(vpmaxsw))]
1945pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1946    unsafe {
1947        let a = a.as_i16x32();
1948        let b = b.as_i16x32();
1949        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1950    }
1951}
1952
1953/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1954///
1955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
1956#[inline]
1957#[target_feature(enable = "avx512bw")]
1958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1959#[cfg_attr(test, assert_instr(vpmaxsw))]
1960pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1961    unsafe {
1962        let max = _mm512_max_epi16(a, b).as_i16x32();
1963        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1964    }
1965}
1966
1967/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1968///
1969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
1970#[inline]
1971#[target_feature(enable = "avx512bw")]
1972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1973#[cfg_attr(test, assert_instr(vpmaxsw))]
1974pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1975    unsafe {
1976        let max = _mm512_max_epi16(a, b).as_i16x32();
1977        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
1978    }
1979}
1980
1981/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1982///
1983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
1984#[inline]
1985#[target_feature(enable = "avx512bw,avx512vl")]
1986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1987#[cfg_attr(test, assert_instr(vpmaxsw))]
1988pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1989    unsafe {
1990        let max = _mm256_max_epi16(a, b).as_i16x16();
1991        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1992    }
1993}
1994
1995/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1996///
1997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
1998#[inline]
1999#[target_feature(enable = "avx512bw,avx512vl")]
2000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2001#[cfg_attr(test, assert_instr(vpmaxsw))]
2002pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2003    unsafe {
2004        let max = _mm256_max_epi16(a, b).as_i16x16();
2005        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2006    }
2007}
2008
2009/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2010///
2011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2012#[inline]
2013#[target_feature(enable = "avx512bw,avx512vl")]
2014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2015#[cfg_attr(test, assert_instr(vpmaxsw))]
2016pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2017    unsafe {
2018        let max = _mm_max_epi16(a, b).as_i16x8();
2019        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2020    }
2021}
2022
2023/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2024///
2025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2026#[inline]
2027#[target_feature(enable = "avx512bw,avx512vl")]
2028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2029#[cfg_attr(test, assert_instr(vpmaxsw))]
2030pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2031    unsafe {
2032        let max = _mm_max_epi16(a, b).as_i16x8();
2033        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2034    }
2035}
2036
2037/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2038///
2039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2040#[inline]
2041#[target_feature(enable = "avx512bw")]
2042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2043#[cfg_attr(test, assert_instr(vpmaxsb))]
2044pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2045    unsafe {
2046        let a = a.as_i8x64();
2047        let b = b.as_i8x64();
2048        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
2049    }
2050}
2051
2052/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2053///
2054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2055#[inline]
2056#[target_feature(enable = "avx512bw")]
2057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2058#[cfg_attr(test, assert_instr(vpmaxsb))]
2059pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2060    unsafe {
2061        let max = _mm512_max_epi8(a, b).as_i8x64();
2062        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2063    }
2064}
2065
2066/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2067///
2068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2069#[inline]
2070#[target_feature(enable = "avx512bw")]
2071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2072#[cfg_attr(test, assert_instr(vpmaxsb))]
2073pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2074    unsafe {
2075        let max = _mm512_max_epi8(a, b).as_i8x64();
2076        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2077    }
2078}
2079
2080/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2081///
2082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2083#[inline]
2084#[target_feature(enable = "avx512bw,avx512vl")]
2085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2086#[cfg_attr(test, assert_instr(vpmaxsb))]
2087pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2088    unsafe {
2089        let max = _mm256_max_epi8(a, b).as_i8x32();
2090        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2091    }
2092}
2093
2094/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2095///
2096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2097#[inline]
2098#[target_feature(enable = "avx512bw,avx512vl")]
2099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2100#[cfg_attr(test, assert_instr(vpmaxsb))]
2101pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2102    unsafe {
2103        let max = _mm256_max_epi8(a, b).as_i8x32();
2104        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2105    }
2106}
2107
2108/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2109///
2110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2111#[inline]
2112#[target_feature(enable = "avx512bw,avx512vl")]
2113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2114#[cfg_attr(test, assert_instr(vpmaxsb))]
2115pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2116    unsafe {
2117        let max = _mm_max_epi8(a, b).as_i8x16();
2118        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2119    }
2120}
2121
2122/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2123///
2124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2125#[inline]
2126#[target_feature(enable = "avx512bw,avx512vl")]
2127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2128#[cfg_attr(test, assert_instr(vpmaxsb))]
2129pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2130    unsafe {
2131        let max = _mm_max_epi8(a, b).as_i8x16();
2132        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2133    }
2134}
2135
2136/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2137///
2138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2139#[inline]
2140#[target_feature(enable = "avx512bw")]
2141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2142#[cfg_attr(test, assert_instr(vpminuw))]
2143pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2144    unsafe {
2145        let a = a.as_u16x32();
2146        let b = b.as_u16x32();
2147        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2148    }
2149}
2150
2151/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2152///
2153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2154#[inline]
2155#[target_feature(enable = "avx512bw")]
2156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2157#[cfg_attr(test, assert_instr(vpminuw))]
2158pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2159    unsafe {
2160        let min = _mm512_min_epu16(a, b).as_u16x32();
2161        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2162    }
2163}
2164
2165/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2166///
2167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2168#[inline]
2169#[target_feature(enable = "avx512bw")]
2170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2171#[cfg_attr(test, assert_instr(vpminuw))]
2172pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2173    unsafe {
2174        let min = _mm512_min_epu16(a, b).as_u16x32();
2175        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2176    }
2177}
2178
2179/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2180///
2181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2182#[inline]
2183#[target_feature(enable = "avx512bw,avx512vl")]
2184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2185#[cfg_attr(test, assert_instr(vpminuw))]
2186pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2187    unsafe {
2188        let min = _mm256_min_epu16(a, b).as_u16x16();
2189        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2190    }
2191}
2192
2193/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2194///
2195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2196#[inline]
2197#[target_feature(enable = "avx512bw,avx512vl")]
2198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2199#[cfg_attr(test, assert_instr(vpminuw))]
2200pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2201    unsafe {
2202        let min = _mm256_min_epu16(a, b).as_u16x16();
2203        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2204    }
2205}
2206
2207/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2208///
2209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2210#[inline]
2211#[target_feature(enable = "avx512bw,avx512vl")]
2212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2213#[cfg_attr(test, assert_instr(vpminuw))]
2214pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2215    unsafe {
2216        let min = _mm_min_epu16(a, b).as_u16x8();
2217        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2218    }
2219}
2220
2221/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2222///
2223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2224#[inline]
2225#[target_feature(enable = "avx512bw,avx512vl")]
2226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2227#[cfg_attr(test, assert_instr(vpminuw))]
2228pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2229    unsafe {
2230        let min = _mm_min_epu16(a, b).as_u16x8();
2231        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2232    }
2233}
2234
2235/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2236///
2237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2238#[inline]
2239#[target_feature(enable = "avx512bw")]
2240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2241#[cfg_attr(test, assert_instr(vpminub))]
2242pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2243    unsafe {
2244        let a = a.as_u8x64();
2245        let b = b.as_u8x64();
2246        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2247    }
2248}
2249
2250/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2253#[inline]
2254#[target_feature(enable = "avx512bw")]
2255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2256#[cfg_attr(test, assert_instr(vpminub))]
2257pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2258    unsafe {
2259        let min = _mm512_min_epu8(a, b).as_u8x64();
2260        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2261    }
2262}
2263
2264/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2265///
2266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2267#[inline]
2268#[target_feature(enable = "avx512bw")]
2269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2270#[cfg_attr(test, assert_instr(vpminub))]
2271pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2272    unsafe {
2273        let min = _mm512_min_epu8(a, b).as_u8x64();
2274        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2275    }
2276}
2277
2278/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2279///
2280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2281#[inline]
2282#[target_feature(enable = "avx512bw,avx512vl")]
2283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2284#[cfg_attr(test, assert_instr(vpminub))]
2285pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2286    unsafe {
2287        let min = _mm256_min_epu8(a, b).as_u8x32();
2288        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2289    }
2290}
2291
2292/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2293///
2294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2295#[inline]
2296#[target_feature(enable = "avx512bw,avx512vl")]
2297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2298#[cfg_attr(test, assert_instr(vpminub))]
2299pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2300    unsafe {
2301        let min = _mm256_min_epu8(a, b).as_u8x32();
2302        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2303    }
2304}
2305
2306/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2307///
2308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2309#[inline]
2310#[target_feature(enable = "avx512bw,avx512vl")]
2311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2312#[cfg_attr(test, assert_instr(vpminub))]
2313pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2314    unsafe {
2315        let min = _mm_min_epu8(a, b).as_u8x16();
2316        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2317    }
2318}
2319
2320/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2321///
2322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2323#[inline]
2324#[target_feature(enable = "avx512bw,avx512vl")]
2325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2326#[cfg_attr(test, assert_instr(vpminub))]
2327pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2328    unsafe {
2329        let min = _mm_min_epu8(a, b).as_u8x16();
2330        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2331    }
2332}
2333
2334/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2335///
2336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2337#[inline]
2338#[target_feature(enable = "avx512bw")]
2339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2340#[cfg_attr(test, assert_instr(vpminsw))]
2341pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2342    unsafe {
2343        let a = a.as_i16x32();
2344        let b = b.as_i16x32();
2345        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2346    }
2347}
2348
2349/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2350///
2351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2352#[inline]
2353#[target_feature(enable = "avx512bw")]
2354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2355#[cfg_attr(test, assert_instr(vpminsw))]
2356pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2357    unsafe {
2358        let min = _mm512_min_epi16(a, b).as_i16x32();
2359        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2360    }
2361}
2362
2363/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2364///
2365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2366#[inline]
2367#[target_feature(enable = "avx512bw")]
2368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2369#[cfg_attr(test, assert_instr(vpminsw))]
2370pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2371    unsafe {
2372        let min = _mm512_min_epi16(a, b).as_i16x32();
2373        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2374    }
2375}
2376
2377/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2378///
2379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2380#[inline]
2381#[target_feature(enable = "avx512bw,avx512vl")]
2382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2383#[cfg_attr(test, assert_instr(vpminsw))]
2384pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2385    unsafe {
2386        let min = _mm256_min_epi16(a, b).as_i16x16();
2387        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2388    }
2389}
2390
2391/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2392///
2393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2394#[inline]
2395#[target_feature(enable = "avx512bw,avx512vl")]
2396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2397#[cfg_attr(test, assert_instr(vpminsw))]
2398pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2399    unsafe {
2400        let min = _mm256_min_epi16(a, b).as_i16x16();
2401        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2402    }
2403}
2404
2405/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2406///
2407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2408#[inline]
2409#[target_feature(enable = "avx512bw,avx512vl")]
2410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2411#[cfg_attr(test, assert_instr(vpminsw))]
2412pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2413    unsafe {
2414        let min = _mm_min_epi16(a, b).as_i16x8();
2415        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2416    }
2417}
2418
2419/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2420///
2421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2422#[inline]
2423#[target_feature(enable = "avx512bw,avx512vl")]
2424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2425#[cfg_attr(test, assert_instr(vpminsw))]
2426pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2427    unsafe {
2428        let min = _mm_min_epi16(a, b).as_i16x8();
2429        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2430    }
2431}
2432
2433/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2434///
2435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2436#[inline]
2437#[target_feature(enable = "avx512bw")]
2438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2439#[cfg_attr(test, assert_instr(vpminsb))]
2440pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2441    unsafe {
2442        let a = a.as_i8x64();
2443        let b = b.as_i8x64();
2444        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2445    }
2446}
2447
2448/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2449///
2450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2451#[inline]
2452#[target_feature(enable = "avx512bw")]
2453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2454#[cfg_attr(test, assert_instr(vpminsb))]
2455pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2456    unsafe {
2457        let min = _mm512_min_epi8(a, b).as_i8x64();
2458        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2459    }
2460}
2461
2462/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2463///
2464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2465#[inline]
2466#[target_feature(enable = "avx512bw")]
2467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2468#[cfg_attr(test, assert_instr(vpminsb))]
2469pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2470    unsafe {
2471        let min = _mm512_min_epi8(a, b).as_i8x64();
2472        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2473    }
2474}
2475
2476/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2477///
2478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2479#[inline]
2480#[target_feature(enable = "avx512bw,avx512vl")]
2481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2482#[cfg_attr(test, assert_instr(vpminsb))]
2483pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2484    unsafe {
2485        let min = _mm256_min_epi8(a, b).as_i8x32();
2486        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2487    }
2488}
2489
2490/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2491///
2492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2493#[inline]
2494#[target_feature(enable = "avx512bw,avx512vl")]
2495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2496#[cfg_attr(test, assert_instr(vpminsb))]
2497pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2498    unsafe {
2499        let min = _mm256_min_epi8(a, b).as_i8x32();
2500        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2501    }
2502}
2503
2504/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2505///
2506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2507#[inline]
2508#[target_feature(enable = "avx512bw,avx512vl")]
2509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2510#[cfg_attr(test, assert_instr(vpminsb))]
2511pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2512    unsafe {
2513        let min = _mm_min_epi8(a, b).as_i8x16();
2514        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2515    }
2516}
2517
2518/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2519///
2520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2521#[inline]
2522#[target_feature(enable = "avx512bw,avx512vl")]
2523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2524#[cfg_attr(test, assert_instr(vpminsb))]
2525pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2526    unsafe {
2527        let min = _mm_min_epi8(a, b).as_i8x16();
2528        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2529    }
2530}
2531
2532/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2533///
2534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2535#[inline]
2536#[target_feature(enable = "avx512bw")]
2537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2538#[cfg_attr(test, assert_instr(vpcmp))]
2539pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2540    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2541}
2542
2543/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2544///
2545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2546#[inline]
2547#[target_feature(enable = "avx512bw")]
2548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2549#[cfg_attr(test, assert_instr(vpcmp))]
2550pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2551    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2552}
2553
2554/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2555///
2556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2557#[inline]
2558#[target_feature(enable = "avx512bw,avx512vl")]
2559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2560#[cfg_attr(test, assert_instr(vpcmp))]
2561pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2562    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2563}
2564
2565/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2566///
2567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2568#[inline]
2569#[target_feature(enable = "avx512bw,avx512vl")]
2570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2571#[cfg_attr(test, assert_instr(vpcmp))]
2572pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2573    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2574}
2575
2576/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2577///
2578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2579#[inline]
2580#[target_feature(enable = "avx512bw,avx512vl")]
2581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2582#[cfg_attr(test, assert_instr(vpcmp))]
2583pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2584    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2585}
2586
2587/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2588///
2589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2590#[inline]
2591#[target_feature(enable = "avx512bw,avx512vl")]
2592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2593#[cfg_attr(test, assert_instr(vpcmp))]
2594pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2595    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2596}
2597
2598/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2599///
2600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2601#[inline]
2602#[target_feature(enable = "avx512bw")]
2603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2604#[cfg_attr(test, assert_instr(vpcmp))]
2605pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2606    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2607}
2608
2609/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2612#[inline]
2613#[target_feature(enable = "avx512bw")]
2614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2615#[cfg_attr(test, assert_instr(vpcmp))]
2616pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2617    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2618}
2619
2620/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2621///
2622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2623#[inline]
2624#[target_feature(enable = "avx512bw,avx512vl")]
2625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2626#[cfg_attr(test, assert_instr(vpcmp))]
2627pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2628    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2629}
2630
2631/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2632///
2633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2634#[inline]
2635#[target_feature(enable = "avx512bw,avx512vl")]
2636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2637#[cfg_attr(test, assert_instr(vpcmp))]
2638pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2639    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2640}
2641
2642/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2643///
2644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2645#[inline]
2646#[target_feature(enable = "avx512bw,avx512vl")]
2647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2648#[cfg_attr(test, assert_instr(vpcmp))]
2649pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2650    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2651}
2652
2653/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2654///
2655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2656#[inline]
2657#[target_feature(enable = "avx512bw,avx512vl")]
2658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2659#[cfg_attr(test, assert_instr(vpcmp))]
2660pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2661    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2662}
2663
2664/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2665///
2666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2667#[inline]
2668#[target_feature(enable = "avx512bw")]
2669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2670#[cfg_attr(test, assert_instr(vpcmp))]
2671pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2672    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2673}
2674
2675/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2676///
2677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2678#[inline]
2679#[target_feature(enable = "avx512bw")]
2680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2681#[cfg_attr(test, assert_instr(vpcmp))]
2682pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2683    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2684}
2685
2686/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2687///
2688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2689#[inline]
2690#[target_feature(enable = "avx512bw,avx512vl")]
2691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2692#[cfg_attr(test, assert_instr(vpcmp))]
2693pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2694    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2695}
2696
2697/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2698///
2699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2700#[inline]
2701#[target_feature(enable = "avx512bw,avx512vl")]
2702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2703#[cfg_attr(test, assert_instr(vpcmp))]
2704pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2705    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2706}
2707
2708/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2709///
2710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2711#[inline]
2712#[target_feature(enable = "avx512bw,avx512vl")]
2713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2714#[cfg_attr(test, assert_instr(vpcmp))]
2715pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2716    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2717}
2718
2719/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2720///
2721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2722#[inline]
2723#[target_feature(enable = "avx512bw,avx512vl")]
2724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2725#[cfg_attr(test, assert_instr(vpcmp))]
2726pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2727    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2728}
2729
2730/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2731///
2732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2733#[inline]
2734#[target_feature(enable = "avx512bw")]
2735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2736#[cfg_attr(test, assert_instr(vpcmp))]
2737pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2738    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2739}
2740
2741/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2742///
2743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2744#[inline]
2745#[target_feature(enable = "avx512bw")]
2746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2747#[cfg_attr(test, assert_instr(vpcmp))]
2748pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2749    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2750}
2751
2752/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2753///
2754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2755#[inline]
2756#[target_feature(enable = "avx512bw,avx512vl")]
2757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2758#[cfg_attr(test, assert_instr(vpcmp))]
2759pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2760    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2761}
2762
2763/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2766#[inline]
2767#[target_feature(enable = "avx512bw,avx512vl")]
2768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2769#[cfg_attr(test, assert_instr(vpcmp))]
2770pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2771    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2772}
2773
2774/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2775///
2776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2777#[inline]
2778#[target_feature(enable = "avx512bw,avx512vl")]
2779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2780#[cfg_attr(test, assert_instr(vpcmp))]
2781pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2782    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2783}
2784
2785/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2786///
2787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2788#[inline]
2789#[target_feature(enable = "avx512bw,avx512vl")]
2790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2791#[cfg_attr(test, assert_instr(vpcmp))]
2792pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2793    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2794}
2795
2796/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2797///
2798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2799#[inline]
2800#[target_feature(enable = "avx512bw")]
2801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2802#[cfg_attr(test, assert_instr(vpcmp))]
2803pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2804    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
2805}
2806
2807/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2808///
2809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
2810#[inline]
2811#[target_feature(enable = "avx512bw")]
2812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2813#[cfg_attr(test, assert_instr(vpcmp))]
2814pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2815    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2816}
2817
2818/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2819///
2820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
2821#[inline]
2822#[target_feature(enable = "avx512bw,avx512vl")]
2823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2824#[cfg_attr(test, assert_instr(vpcmp))]
2825pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2826    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
2827}
2828
2829/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2830///
2831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
2832#[inline]
2833#[target_feature(enable = "avx512bw,avx512vl")]
2834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2835#[cfg_attr(test, assert_instr(vpcmp))]
2836pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2837    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2838}
2839
2840/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2841///
2842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
2843#[inline]
2844#[target_feature(enable = "avx512bw,avx512vl")]
2845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2846#[cfg_attr(test, assert_instr(vpcmp))]
2847pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2848    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
2849}
2850
2851/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2852///
2853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
2854#[inline]
2855#[target_feature(enable = "avx512bw,avx512vl")]
2856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2857#[cfg_attr(test, assert_instr(vpcmp))]
2858pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2859    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2860}
2861
2862/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2863///
2864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
2865#[inline]
2866#[target_feature(enable = "avx512bw")]
2867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2868#[cfg_attr(test, assert_instr(vpcmp))]
2869pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2870    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
2871}
2872
2873/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2874///
2875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
2876#[inline]
2877#[target_feature(enable = "avx512bw")]
2878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2879#[cfg_attr(test, assert_instr(vpcmp))]
2880pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2881    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2882}
2883
2884/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2885///
2886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
2887#[inline]
2888#[target_feature(enable = "avx512bw,avx512vl")]
2889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2890#[cfg_attr(test, assert_instr(vpcmp))]
2891pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2892    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
2893}
2894
2895/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
2898#[inline]
2899#[target_feature(enable = "avx512bw,avx512vl")]
2900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2901#[cfg_attr(test, assert_instr(vpcmp))]
2902pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2903    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2904}
2905
2906/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2907///
2908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
2909#[inline]
2910#[target_feature(enable = "avx512bw,avx512vl")]
2911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2912#[cfg_attr(test, assert_instr(vpcmp))]
2913pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2914    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
2915}
2916
2917/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2918///
2919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
2920#[inline]
2921#[target_feature(enable = "avx512bw,avx512vl")]
2922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2923#[cfg_attr(test, assert_instr(vpcmp))]
2924pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2925    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2926}
2927
2928/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2929///
2930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
2931#[inline]
2932#[target_feature(enable = "avx512bw")]
2933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2934#[cfg_attr(test, assert_instr(vpcmp))]
2935pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2936    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
2937}
2938
2939/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2940///
2941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
2942#[inline]
2943#[target_feature(enable = "avx512bw")]
2944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2945#[cfg_attr(test, assert_instr(vpcmp))]
2946pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2947    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2948}
2949
2950/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2951///
2952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
2953#[inline]
2954#[target_feature(enable = "avx512bw,avx512vl")]
2955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2956#[cfg_attr(test, assert_instr(vpcmp))]
2957pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2958    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
2959}
2960
2961/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2962///
2963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
2964#[inline]
2965#[target_feature(enable = "avx512bw,avx512vl")]
2966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2967#[cfg_attr(test, assert_instr(vpcmp))]
2968pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2969    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2970}
2971
2972/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2973///
2974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
2975#[inline]
2976#[target_feature(enable = "avx512bw,avx512vl")]
2977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2978#[cfg_attr(test, assert_instr(vpcmp))]
2979pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2980    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
2981}
2982
2983/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
2986#[inline]
2987#[target_feature(enable = "avx512bw,avx512vl")]
2988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2989#[cfg_attr(test, assert_instr(vpcmp))]
2990pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2991    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2992}
2993
2994/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2995///
2996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
2997#[inline]
2998#[target_feature(enable = "avx512bw")]
2999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3000#[cfg_attr(test, assert_instr(vpcmp))]
3001pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3002    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3003}
3004
3005/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3006///
3007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3008#[inline]
3009#[target_feature(enable = "avx512bw")]
3010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3011#[cfg_attr(test, assert_instr(vpcmp))]
3012pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3013    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3014}
3015
3016/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3017///
3018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3019#[inline]
3020#[target_feature(enable = "avx512bw,avx512vl")]
3021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3022#[cfg_attr(test, assert_instr(vpcmp))]
3023pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3024    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3025}
3026
3027/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3028///
3029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3030#[inline]
3031#[target_feature(enable = "avx512bw,avx512vl")]
3032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3033#[cfg_attr(test, assert_instr(vpcmp))]
3034pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3035    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3036}
3037
3038/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3039///
3040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3041#[inline]
3042#[target_feature(enable = "avx512bw,avx512vl")]
3043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3044#[cfg_attr(test, assert_instr(vpcmp))]
3045pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3046    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3047}
3048
3049/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3050///
3051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3052#[inline]
3053#[target_feature(enable = "avx512bw,avx512vl")]
3054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3055#[cfg_attr(test, assert_instr(vpcmp))]
3056pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3057    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3058}
3059
3060/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3061///
3062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3063#[inline]
3064#[target_feature(enable = "avx512bw")]
3065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3066#[cfg_attr(test, assert_instr(vpcmp))]
3067pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3068    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3069}
3070
3071/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3072///
3073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3074#[inline]
3075#[target_feature(enable = "avx512bw")]
3076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3077#[cfg_attr(test, assert_instr(vpcmp))]
3078pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3079    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3080}
3081
3082/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3083///
3084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3085#[inline]
3086#[target_feature(enable = "avx512bw,avx512vl")]
3087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3088#[cfg_attr(test, assert_instr(vpcmp))]
3089pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3090    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3091}
3092
3093/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3094///
3095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3096#[inline]
3097#[target_feature(enable = "avx512bw,avx512vl")]
3098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3099#[cfg_attr(test, assert_instr(vpcmp))]
3100pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3101    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3102}
3103
3104/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3105///
3106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3107#[inline]
3108#[target_feature(enable = "avx512bw,avx512vl")]
3109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3110#[cfg_attr(test, assert_instr(vpcmp))]
3111pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3112    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3113}
3114
3115/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3116///
3117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3118#[inline]
3119#[target_feature(enable = "avx512bw,avx512vl")]
3120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3121#[cfg_attr(test, assert_instr(vpcmp))]
3122pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3123    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3124}
3125
3126/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3127///
3128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3129#[inline]
3130#[target_feature(enable = "avx512bw")]
3131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3132#[cfg_attr(test, assert_instr(vpcmp))]
3133pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3134    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3135}
3136
3137/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3138///
3139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3140#[inline]
3141#[target_feature(enable = "avx512bw")]
3142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3143#[cfg_attr(test, assert_instr(vpcmp))]
3144pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3145    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3146}
3147
3148/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3151#[inline]
3152#[target_feature(enable = "avx512bw,avx512vl")]
3153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3154#[cfg_attr(test, assert_instr(vpcmp))]
3155pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3156    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3157}
3158
3159/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3160///
3161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3162#[inline]
3163#[target_feature(enable = "avx512bw,avx512vl")]
3164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3165#[cfg_attr(test, assert_instr(vpcmp))]
3166pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3167    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3168}
3169
3170/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3171///
3172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3173#[inline]
3174#[target_feature(enable = "avx512bw,avx512vl")]
3175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3176#[cfg_attr(test, assert_instr(vpcmp))]
3177pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3178    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3179}
3180
3181/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3182///
3183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3184#[inline]
3185#[target_feature(enable = "avx512bw,avx512vl")]
3186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3187#[cfg_attr(test, assert_instr(vpcmp))]
3188pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3189    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3190}
3191
3192/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3193///
3194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3195#[inline]
3196#[target_feature(enable = "avx512bw")]
3197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3198#[cfg_attr(test, assert_instr(vpcmp))]
3199pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3200    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3201}
3202
3203/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3204///
3205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3206#[inline]
3207#[target_feature(enable = "avx512bw")]
3208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3209#[cfg_attr(test, assert_instr(vpcmp))]
3210pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3211    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3212}
3213
3214/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3215///
3216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3217#[inline]
3218#[target_feature(enable = "avx512bw,avx512vl")]
3219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3220#[cfg_attr(test, assert_instr(vpcmp))]
3221pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3222    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3223}
3224
3225/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3226///
3227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3228#[inline]
3229#[target_feature(enable = "avx512bw,avx512vl")]
3230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3231#[cfg_attr(test, assert_instr(vpcmp))]
3232pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3233    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3234}
3235
3236/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3237///
3238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3239#[inline]
3240#[target_feature(enable = "avx512bw,avx512vl")]
3241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3242#[cfg_attr(test, assert_instr(vpcmp))]
3243pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3244    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3245}
3246
3247/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3250#[inline]
3251#[target_feature(enable = "avx512bw,avx512vl")]
3252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3253#[cfg_attr(test, assert_instr(vpcmp))]
3254pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3255    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3256}
3257
3258/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3259///
3260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3261#[inline]
3262#[target_feature(enable = "avx512bw")]
3263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3264#[cfg_attr(test, assert_instr(vpcmp))]
3265pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3266    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3267}
3268
3269/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3270///
3271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3272#[inline]
3273#[target_feature(enable = "avx512bw")]
3274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3275#[cfg_attr(test, assert_instr(vpcmp))]
3276pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3277    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3278}
3279
3280/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3281///
3282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3283#[inline]
3284#[target_feature(enable = "avx512bw,avx512vl")]
3285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3286#[cfg_attr(test, assert_instr(vpcmp))]
3287pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3288    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3289}
3290
3291/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3292///
3293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3294#[inline]
3295#[target_feature(enable = "avx512bw,avx512vl")]
3296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3297#[cfg_attr(test, assert_instr(vpcmp))]
3298pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3299    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3300}
3301
3302/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3303///
3304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3305#[inline]
3306#[target_feature(enable = "avx512bw,avx512vl")]
3307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3308#[cfg_attr(test, assert_instr(vpcmp))]
3309pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3310    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3311}
3312
3313/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3314///
3315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3316#[inline]
3317#[target_feature(enable = "avx512bw,avx512vl")]
3318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3319#[cfg_attr(test, assert_instr(vpcmp))]
3320pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3321    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3322}
3323
3324/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3325///
3326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3327#[inline]
3328#[target_feature(enable = "avx512bw")]
3329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3330#[cfg_attr(test, assert_instr(vpcmp))]
3331pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3332    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3333}
3334
3335/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3336///
3337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3338#[inline]
3339#[target_feature(enable = "avx512bw")]
3340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3341#[cfg_attr(test, assert_instr(vpcmp))]
3342pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3343    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3344}
3345
3346/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3347///
3348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3349#[inline]
3350#[target_feature(enable = "avx512bw,avx512vl")]
3351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3352#[cfg_attr(test, assert_instr(vpcmp))]
3353pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3354    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3355}
3356
3357/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3358///
3359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3360#[inline]
3361#[target_feature(enable = "avx512bw,avx512vl")]
3362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3363#[cfg_attr(test, assert_instr(vpcmp))]
3364pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3365    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3366}
3367
3368/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3369///
3370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3371#[inline]
3372#[target_feature(enable = "avx512bw,avx512vl")]
3373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3374#[cfg_attr(test, assert_instr(vpcmp))]
3375pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3376    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3377}
3378
3379/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3380///
3381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3382#[inline]
3383#[target_feature(enable = "avx512bw,avx512vl")]
3384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3385#[cfg_attr(test, assert_instr(vpcmp))]
3386pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3387    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3388}
3389
3390/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3393#[inline]
3394#[target_feature(enable = "avx512bw")]
3395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3396#[cfg_attr(test, assert_instr(vpcmp))]
3397pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3398    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3399}
3400
3401/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3404#[inline]
3405#[target_feature(enable = "avx512bw")]
3406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3407#[cfg_attr(test, assert_instr(vpcmp))]
3408pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3409    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3410}
3411
3412/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3415#[inline]
3416#[target_feature(enable = "avx512bw,avx512vl")]
3417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3418#[cfg_attr(test, assert_instr(vpcmp))]
3419pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3420    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3421}
3422
3423/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3426#[inline]
3427#[target_feature(enable = "avx512bw,avx512vl")]
3428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3429#[cfg_attr(test, assert_instr(vpcmp))]
3430pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3431    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3432}
3433
3434/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3437#[inline]
3438#[target_feature(enable = "avx512bw,avx512vl")]
3439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3440#[cfg_attr(test, assert_instr(vpcmp))]
3441pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3442    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3443}
3444
3445/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3448#[inline]
3449#[target_feature(enable = "avx512bw,avx512vl")]
3450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3451#[cfg_attr(test, assert_instr(vpcmp))]
3452pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3453    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3454}
3455
3456/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3459#[inline]
3460#[target_feature(enable = "avx512bw")]
3461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3462#[cfg_attr(test, assert_instr(vpcmp))]
3463pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3464    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3465}
3466
3467/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3470#[inline]
3471#[target_feature(enable = "avx512bw")]
3472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3473#[cfg_attr(test, assert_instr(vpcmp))]
3474pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3475    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3476}
3477
3478/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3481#[inline]
3482#[target_feature(enable = "avx512bw,avx512vl")]
3483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3484#[cfg_attr(test, assert_instr(vpcmp))]
3485pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3486    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3487}
3488
3489/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3492#[inline]
3493#[target_feature(enable = "avx512bw,avx512vl")]
3494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495#[cfg_attr(test, assert_instr(vpcmp))]
3496pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3497    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3498}
3499
3500/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3503#[inline]
3504#[target_feature(enable = "avx512bw,avx512vl")]
3505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3506#[cfg_attr(test, assert_instr(vpcmp))]
3507pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3508    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3509}
3510
3511/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3514#[inline]
3515#[target_feature(enable = "avx512bw,avx512vl")]
3516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3517#[cfg_attr(test, assert_instr(vpcmp))]
3518pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3519    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3520}
3521
3522/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3525#[inline]
3526#[target_feature(enable = "avx512bw")]
3527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3528#[cfg_attr(test, assert_instr(vpcmp))]
3529pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3530    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3531}
3532
3533/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3536#[inline]
3537#[target_feature(enable = "avx512bw")]
3538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3539#[cfg_attr(test, assert_instr(vpcmp))]
3540pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3541    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3542}
3543
3544/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3547#[inline]
3548#[target_feature(enable = "avx512bw,avx512vl")]
3549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3550#[cfg_attr(test, assert_instr(vpcmp))]
3551pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3552    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3553}
3554
3555/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3558#[inline]
3559#[target_feature(enable = "avx512bw,avx512vl")]
3560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3561#[cfg_attr(test, assert_instr(vpcmp))]
3562pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3563    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3564}
3565
3566/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3569#[inline]
3570#[target_feature(enable = "avx512bw,avx512vl")]
3571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3572#[cfg_attr(test, assert_instr(vpcmp))]
3573pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3574    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3575}
3576
3577/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3580#[inline]
3581#[target_feature(enable = "avx512bw,avx512vl")]
3582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3583#[cfg_attr(test, assert_instr(vpcmp))]
3584pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3585    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3586}
3587
3588/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3591#[inline]
3592#[target_feature(enable = "avx512bw")]
3593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3594#[cfg_attr(test, assert_instr(vpcmp))]
3595pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3596    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3597}
3598
3599/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3602#[inline]
3603#[target_feature(enable = "avx512bw")]
3604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3605#[cfg_attr(test, assert_instr(vpcmp))]
3606pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3607    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3608}
3609
3610/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3613#[inline]
3614#[target_feature(enable = "avx512bw,avx512vl")]
3615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616#[cfg_attr(test, assert_instr(vpcmp))]
3617pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3618    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3619}
3620
3621/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3624#[inline]
3625#[target_feature(enable = "avx512bw,avx512vl")]
3626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3627#[cfg_attr(test, assert_instr(vpcmp))]
3628pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3629    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3630}
3631
3632/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3635#[inline]
3636#[target_feature(enable = "avx512bw,avx512vl")]
3637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638#[cfg_attr(test, assert_instr(vpcmp))]
3639pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3640    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3641}
3642
3643/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3646#[inline]
3647#[target_feature(enable = "avx512bw,avx512vl")]
3648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3649#[cfg_attr(test, assert_instr(vpcmp))]
3650pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3651    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3652}
3653
3654/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3657#[inline]
3658#[target_feature(enable = "avx512bw")]
3659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3660#[cfg_attr(test, assert_instr(vpcmp))]
3661pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3662    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3663}
3664
3665/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3668#[inline]
3669#[target_feature(enable = "avx512bw")]
3670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3671#[cfg_attr(test, assert_instr(vpcmp))]
3672pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3673    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3674}
3675
3676/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3679#[inline]
3680#[target_feature(enable = "avx512bw,avx512vl")]
3681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3682#[cfg_attr(test, assert_instr(vpcmp))]
3683pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3684    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3685}
3686
3687/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3690#[inline]
3691#[target_feature(enable = "avx512bw,avx512vl")]
3692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693#[cfg_attr(test, assert_instr(vpcmp))]
3694pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3695    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3696}
3697
3698/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3701#[inline]
3702#[target_feature(enable = "avx512bw,avx512vl")]
3703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3704#[cfg_attr(test, assert_instr(vpcmp))]
3705pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3706    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3707}
3708
3709/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3712#[inline]
3713#[target_feature(enable = "avx512bw,avx512vl")]
3714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3715#[cfg_attr(test, assert_instr(vpcmp))]
3716pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3717    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3718}
3719
3720/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
3723#[inline]
3724#[target_feature(enable = "avx512bw")]
3725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3726#[cfg_attr(test, assert_instr(vpcmp))]
3727pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3728    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
3729}
3730
3731/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3734#[inline]
3735#[target_feature(enable = "avx512bw")]
3736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3737#[cfg_attr(test, assert_instr(vpcmp))]
3738pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3739    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3740}
3741
3742/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
3745#[inline]
3746#[target_feature(enable = "avx512bw,avx512vl")]
3747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3748#[cfg_attr(test, assert_instr(vpcmp))]
3749pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3750    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
3751}
3752
3753/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3756#[inline]
3757#[target_feature(enable = "avx512bw,avx512vl")]
3758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3759#[cfg_attr(test, assert_instr(vpcmp))]
3760pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3761    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3762}
3763
3764/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
3767#[inline]
3768#[target_feature(enable = "avx512bw,avx512vl")]
3769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3770#[cfg_attr(test, assert_instr(vpcmp))]
3771pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3772    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
3773}
3774
3775/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3778#[inline]
3779#[target_feature(enable = "avx512bw,avx512vl")]
3780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781#[cfg_attr(test, assert_instr(vpcmp))]
3782pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3783    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3784}
3785
3786/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
3789#[inline]
3790#[target_feature(enable = "avx512bw")]
3791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792#[cfg_attr(test, assert_instr(vpcmp))]
3793pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3794    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
3795}
3796
3797/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3800#[inline]
3801#[target_feature(enable = "avx512bw")]
3802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3803#[cfg_attr(test, assert_instr(vpcmp))]
3804pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3805    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3806}
3807
3808/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
3811#[inline]
3812#[target_feature(enable = "avx512bw,avx512vl")]
3813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3814#[cfg_attr(test, assert_instr(vpcmp))]
3815pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3816    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
3817}
3818
3819/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3822#[inline]
3823#[target_feature(enable = "avx512bw,avx512vl")]
3824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3825#[cfg_attr(test, assert_instr(vpcmp))]
3826pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3827    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3828}
3829
3830/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
3833#[inline]
3834#[target_feature(enable = "avx512bw,avx512vl")]
3835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3836#[cfg_attr(test, assert_instr(vpcmp))]
3837pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3838    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
3839}
3840
3841/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3844#[inline]
3845#[target_feature(enable = "avx512bw,avx512vl")]
3846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3847#[cfg_attr(test, assert_instr(vpcmp))]
3848pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3849    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3850}
3851
3852/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
3855#[inline]
3856#[target_feature(enable = "avx512bw")]
3857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3858#[cfg_attr(test, assert_instr(vpcmp))]
3859pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3860    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
3861}
3862
3863/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3866#[inline]
3867#[target_feature(enable = "avx512bw")]
3868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3869#[cfg_attr(test, assert_instr(vpcmp))]
3870pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3871    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3872}
3873
3874/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
3877#[inline]
3878#[target_feature(enable = "avx512bw,avx512vl")]
3879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3880#[cfg_attr(test, assert_instr(vpcmp))]
3881pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3882    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
3883}
3884
3885/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3888#[inline]
3889#[target_feature(enable = "avx512bw,avx512vl")]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891#[cfg_attr(test, assert_instr(vpcmp))]
3892pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3893    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3894}
3895
3896/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
3899#[inline]
3900#[target_feature(enable = "avx512bw,avx512vl")]
3901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3902#[cfg_attr(test, assert_instr(vpcmp))]
3903pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3904    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
3905}
3906
3907/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3910#[inline]
3911#[target_feature(enable = "avx512bw,avx512vl")]
3912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3913#[cfg_attr(test, assert_instr(vpcmp))]
3914pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3915    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3916}
3917
3918/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
3921#[inline]
3922#[target_feature(enable = "avx512bw")]
3923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3924#[cfg_attr(test, assert_instr(vpcmp))]
3925pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3926    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
3927}
3928
3929/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3932#[inline]
3933#[target_feature(enable = "avx512bw")]
3934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3935#[cfg_attr(test, assert_instr(vpcmp))]
3936pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3937    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3938}
3939
3940/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
3943#[inline]
3944#[target_feature(enable = "avx512bw,avx512vl")]
3945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946#[cfg_attr(test, assert_instr(vpcmp))]
3947pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3948    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
3949}
3950
3951/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3954#[inline]
3955#[target_feature(enable = "avx512bw,avx512vl")]
3956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3957#[cfg_attr(test, assert_instr(vpcmp))]
3958pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3959    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3960}
3961
3962/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
3965#[inline]
3966#[target_feature(enable = "avx512bw,avx512vl")]
3967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3968#[cfg_attr(test, assert_instr(vpcmp))]
3969pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3970    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
3971}
3972
3973/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3976#[inline]
3977#[target_feature(enable = "avx512bw,avx512vl")]
3978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3979#[cfg_attr(test, assert_instr(vpcmp))]
3980pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3981    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3982}
3983
3984/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
3987#[inline]
3988#[target_feature(enable = "avx512bw")]
3989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3990#[cfg_attr(test, assert_instr(vpcmp))]
3991pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3992    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
3993}
3994
3995/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3996///
3997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3998#[inline]
3999#[target_feature(enable = "avx512bw")]
4000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4001#[cfg_attr(test, assert_instr(vpcmp))]
4002pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4003    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4004}
4005
4006/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4007///
4008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4009#[inline]
4010#[target_feature(enable = "avx512bw,avx512vl")]
4011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4012#[cfg_attr(test, assert_instr(vpcmp))]
4013pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4014    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4015}
4016
4017/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4018///
4019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4020#[inline]
4021#[target_feature(enable = "avx512bw,avx512vl")]
4022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4023#[cfg_attr(test, assert_instr(vpcmp))]
4024pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4025    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4026}
4027
4028/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4029///
4030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4031#[inline]
4032#[target_feature(enable = "avx512bw,avx512vl")]
4033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4034#[cfg_attr(test, assert_instr(vpcmp))]
4035pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4036    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4037}
4038
4039/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4040///
4041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4042#[inline]
4043#[target_feature(enable = "avx512bw,avx512vl")]
4044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4045#[cfg_attr(test, assert_instr(vpcmp))]
4046pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4047    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4048}
4049
4050/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4051///
4052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4053#[inline]
4054#[target_feature(enable = "avx512bw")]
4055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4056#[cfg_attr(test, assert_instr(vpcmp))]
4057pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4058    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4059}
4060
4061/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4062///
4063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4064#[inline]
4065#[target_feature(enable = "avx512bw")]
4066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4067#[cfg_attr(test, assert_instr(vpcmp))]
4068pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4069    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4070}
4071
4072/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4073///
4074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4075#[inline]
4076#[target_feature(enable = "avx512bw,avx512vl")]
4077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4078#[cfg_attr(test, assert_instr(vpcmp))]
4079pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4080    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4081}
4082
4083/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4084///
4085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4086#[inline]
4087#[target_feature(enable = "avx512bw,avx512vl")]
4088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4089#[cfg_attr(test, assert_instr(vpcmp))]
4090pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4091    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4092}
4093
4094/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4095///
4096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4097#[inline]
4098#[target_feature(enable = "avx512bw,avx512vl")]
4099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4100#[cfg_attr(test, assert_instr(vpcmp))]
4101pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4102    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4103}
4104
4105/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4106///
4107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4108#[inline]
4109#[target_feature(enable = "avx512bw,avx512vl")]
4110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4111#[cfg_attr(test, assert_instr(vpcmp))]
4112pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4113    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4114}
4115
4116/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4117///
4118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4119#[inline]
4120#[target_feature(enable = "avx512bw")]
4121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4122#[rustc_legacy_const_generics(2)]
4123#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4124pub fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4125    unsafe {
4126        static_assert_uimm_bits!(IMM8, 3);
4127        let a = a.as_u16x32();
4128        let b = b.as_u16x32();
4129        let r = match IMM8 {
4130            0 => simd_eq(a, b),
4131            1 => simd_lt(a, b),
4132            2 => simd_le(a, b),
4133            3 => i16x32::ZERO,
4134            4 => simd_ne(a, b),
4135            5 => simd_ge(a, b),
4136            6 => simd_gt(a, b),
4137            _ => i16x32::splat(-1),
4138        };
4139        simd_bitmask(r)
4140    }
4141}
4142
4143/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4144///
4145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4146#[inline]
4147#[target_feature(enable = "avx512bw")]
4148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4149#[rustc_legacy_const_generics(3)]
4150#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4151pub fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4152    k1: __mmask32,
4153    a: __m512i,
4154    b: __m512i,
4155) -> __mmask32 {
4156    unsafe {
4157        static_assert_uimm_bits!(IMM8, 3);
4158        let a = a.as_u16x32();
4159        let b = b.as_u16x32();
4160        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4161        let r = match IMM8 {
4162            0 => simd_and(k1, simd_eq(a, b)),
4163            1 => simd_and(k1, simd_lt(a, b)),
4164            2 => simd_and(k1, simd_le(a, b)),
4165            3 => i16x32::ZERO,
4166            4 => simd_and(k1, simd_ne(a, b)),
4167            5 => simd_and(k1, simd_ge(a, b)),
4168            6 => simd_and(k1, simd_gt(a, b)),
4169            _ => k1,
4170        };
4171        simd_bitmask(r)
4172    }
4173}
4174
4175/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4176///
4177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4178#[inline]
4179#[target_feature(enable = "avx512bw,avx512vl")]
4180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4181#[rustc_legacy_const_generics(2)]
4182#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4183pub fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4184    unsafe {
4185        static_assert_uimm_bits!(IMM8, 3);
4186        let a = a.as_u16x16();
4187        let b = b.as_u16x16();
4188        let r = match IMM8 {
4189            0 => simd_eq(a, b),
4190            1 => simd_lt(a, b),
4191            2 => simd_le(a, b),
4192            3 => i16x16::ZERO,
4193            4 => simd_ne(a, b),
4194            5 => simd_ge(a, b),
4195            6 => simd_gt(a, b),
4196            _ => i16x16::splat(-1),
4197        };
4198        simd_bitmask(r)
4199    }
4200}
4201
4202/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4203///
4204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4205#[inline]
4206#[target_feature(enable = "avx512bw,avx512vl")]
4207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4208#[rustc_legacy_const_generics(3)]
4209#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4210pub fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4211    k1: __mmask16,
4212    a: __m256i,
4213    b: __m256i,
4214) -> __mmask16 {
4215    unsafe {
4216        static_assert_uimm_bits!(IMM8, 3);
4217        let a = a.as_u16x16();
4218        let b = b.as_u16x16();
4219        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4220        let r = match IMM8 {
4221            0 => simd_and(k1, simd_eq(a, b)),
4222            1 => simd_and(k1, simd_lt(a, b)),
4223            2 => simd_and(k1, simd_le(a, b)),
4224            3 => i16x16::ZERO,
4225            4 => simd_and(k1, simd_ne(a, b)),
4226            5 => simd_and(k1, simd_ge(a, b)),
4227            6 => simd_and(k1, simd_gt(a, b)),
4228            _ => k1,
4229        };
4230        simd_bitmask(r)
4231    }
4232}
4233
4234/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4235///
4236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4237#[inline]
4238#[target_feature(enable = "avx512bw,avx512vl")]
4239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4240#[rustc_legacy_const_generics(2)]
4241#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4242pub fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4243    unsafe {
4244        static_assert_uimm_bits!(IMM8, 3);
4245        let a = a.as_u16x8();
4246        let b = b.as_u16x8();
4247        let r = match IMM8 {
4248            0 => simd_eq(a, b),
4249            1 => simd_lt(a, b),
4250            2 => simd_le(a, b),
4251            3 => i16x8::ZERO,
4252            4 => simd_ne(a, b),
4253            5 => simd_ge(a, b),
4254            6 => simd_gt(a, b),
4255            _ => i16x8::splat(-1),
4256        };
4257        simd_bitmask(r)
4258    }
4259}
4260
4261/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4262///
4263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4264#[inline]
4265#[target_feature(enable = "avx512bw,avx512vl")]
4266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4267#[rustc_legacy_const_generics(3)]
4268#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4269pub fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4270    unsafe {
4271        static_assert_uimm_bits!(IMM8, 3);
4272        let a = a.as_u16x8();
4273        let b = b.as_u16x8();
4274        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4275        let r = match IMM8 {
4276            0 => simd_and(k1, simd_eq(a, b)),
4277            1 => simd_and(k1, simd_lt(a, b)),
4278            2 => simd_and(k1, simd_le(a, b)),
4279            3 => i16x8::ZERO,
4280            4 => simd_and(k1, simd_ne(a, b)),
4281            5 => simd_and(k1, simd_ge(a, b)),
4282            6 => simd_and(k1, simd_gt(a, b)),
4283            _ => k1,
4284        };
4285        simd_bitmask(r)
4286    }
4287}
4288
4289/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4290///
4291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4292#[inline]
4293#[target_feature(enable = "avx512bw")]
4294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4295#[rustc_legacy_const_generics(2)]
4296#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4297pub fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4298    unsafe {
4299        static_assert_uimm_bits!(IMM8, 3);
4300        let a = a.as_u8x64();
4301        let b = b.as_u8x64();
4302        let r = match IMM8 {
4303            0 => simd_eq(a, b),
4304            1 => simd_lt(a, b),
4305            2 => simd_le(a, b),
4306            3 => i8x64::ZERO,
4307            4 => simd_ne(a, b),
4308            5 => simd_ge(a, b),
4309            6 => simd_gt(a, b),
4310            _ => i8x64::splat(-1),
4311        };
4312        simd_bitmask(r)
4313    }
4314}
4315
4316/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4317///
4318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4319#[inline]
4320#[target_feature(enable = "avx512bw")]
4321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4322#[rustc_legacy_const_generics(3)]
4323#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4324pub fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4325    k1: __mmask64,
4326    a: __m512i,
4327    b: __m512i,
4328) -> __mmask64 {
4329    unsafe {
4330        static_assert_uimm_bits!(IMM8, 3);
4331        let a = a.as_u8x64();
4332        let b = b.as_u8x64();
4333        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4334        let r = match IMM8 {
4335            0 => simd_and(k1, simd_eq(a, b)),
4336            1 => simd_and(k1, simd_lt(a, b)),
4337            2 => simd_and(k1, simd_le(a, b)),
4338            3 => i8x64::ZERO,
4339            4 => simd_and(k1, simd_ne(a, b)),
4340            5 => simd_and(k1, simd_ge(a, b)),
4341            6 => simd_and(k1, simd_gt(a, b)),
4342            _ => k1,
4343        };
4344        simd_bitmask(r)
4345    }
4346}
4347
4348/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4349///
4350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4351#[inline]
4352#[target_feature(enable = "avx512bw,avx512vl")]
4353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4354#[rustc_legacy_const_generics(2)]
4355#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4356pub fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4357    unsafe {
4358        static_assert_uimm_bits!(IMM8, 3);
4359        let a = a.as_u8x32();
4360        let b = b.as_u8x32();
4361        let r = match IMM8 {
4362            0 => simd_eq(a, b),
4363            1 => simd_lt(a, b),
4364            2 => simd_le(a, b),
4365            3 => i8x32::ZERO,
4366            4 => simd_ne(a, b),
4367            5 => simd_ge(a, b),
4368            6 => simd_gt(a, b),
4369            _ => i8x32::splat(-1),
4370        };
4371        simd_bitmask(r)
4372    }
4373}
4374
4375/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4376///
4377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4378#[inline]
4379#[target_feature(enable = "avx512bw,avx512vl")]
4380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4381#[rustc_legacy_const_generics(3)]
4382#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4383pub fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4384    k1: __mmask32,
4385    a: __m256i,
4386    b: __m256i,
4387) -> __mmask32 {
4388    unsafe {
4389        static_assert_uimm_bits!(IMM8, 3);
4390        let a = a.as_u8x32();
4391        let b = b.as_u8x32();
4392        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4393        let r = match IMM8 {
4394            0 => simd_and(k1, simd_eq(a, b)),
4395            1 => simd_and(k1, simd_lt(a, b)),
4396            2 => simd_and(k1, simd_le(a, b)),
4397            3 => i8x32::ZERO,
4398            4 => simd_and(k1, simd_ne(a, b)),
4399            5 => simd_and(k1, simd_ge(a, b)),
4400            6 => simd_and(k1, simd_gt(a, b)),
4401            _ => k1,
4402        };
4403        simd_bitmask(r)
4404    }
4405}
4406
4407/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4408///
4409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4410#[inline]
4411#[target_feature(enable = "avx512bw,avx512vl")]
4412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4413#[rustc_legacy_const_generics(2)]
4414#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4415pub fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4416    unsafe {
4417        static_assert_uimm_bits!(IMM8, 3);
4418        let a = a.as_u8x16();
4419        let b = b.as_u8x16();
4420        let r = match IMM8 {
4421            0 => simd_eq(a, b),
4422            1 => simd_lt(a, b),
4423            2 => simd_le(a, b),
4424            3 => i8x16::ZERO,
4425            4 => simd_ne(a, b),
4426            5 => simd_ge(a, b),
4427            6 => simd_gt(a, b),
4428            _ => i8x16::splat(-1),
4429        };
4430        simd_bitmask(r)
4431    }
4432}
4433
4434/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4435///
4436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4437#[inline]
4438#[target_feature(enable = "avx512bw,avx512vl")]
4439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4440#[rustc_legacy_const_generics(3)]
4441#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4442pub fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4443    unsafe {
4444        static_assert_uimm_bits!(IMM8, 3);
4445        let a = a.as_u8x16();
4446        let b = b.as_u8x16();
4447        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4448        let r = match IMM8 {
4449            0 => simd_and(k1, simd_eq(a, b)),
4450            1 => simd_and(k1, simd_lt(a, b)),
4451            2 => simd_and(k1, simd_le(a, b)),
4452            3 => i8x16::ZERO,
4453            4 => simd_and(k1, simd_ne(a, b)),
4454            5 => simd_and(k1, simd_ge(a, b)),
4455            6 => simd_and(k1, simd_gt(a, b)),
4456            _ => k1,
4457        };
4458        simd_bitmask(r)
4459    }
4460}
4461
4462/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4463///
4464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4465#[inline]
4466#[target_feature(enable = "avx512bw")]
4467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4468#[rustc_legacy_const_generics(2)]
4469#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4470pub fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4471    unsafe {
4472        static_assert_uimm_bits!(IMM8, 3);
4473        let a = a.as_i16x32();
4474        let b = b.as_i16x32();
4475        let r = match IMM8 {
4476            0 => simd_eq(a, b),
4477            1 => simd_lt(a, b),
4478            2 => simd_le(a, b),
4479            3 => i16x32::ZERO,
4480            4 => simd_ne(a, b),
4481            5 => simd_ge(a, b),
4482            6 => simd_gt(a, b),
4483            _ => i16x32::splat(-1),
4484        };
4485        simd_bitmask(r)
4486    }
4487}
4488
4489/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4490///
4491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4492#[inline]
4493#[target_feature(enable = "avx512bw")]
4494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4495#[rustc_legacy_const_generics(3)]
4496#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4497pub fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4498    k1: __mmask32,
4499    a: __m512i,
4500    b: __m512i,
4501) -> __mmask32 {
4502    unsafe {
4503        static_assert_uimm_bits!(IMM8, 3);
4504        let a = a.as_i16x32();
4505        let b = b.as_i16x32();
4506        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4507        let r = match IMM8 {
4508            0 => simd_and(k1, simd_eq(a, b)),
4509            1 => simd_and(k1, simd_lt(a, b)),
4510            2 => simd_and(k1, simd_le(a, b)),
4511            3 => i16x32::ZERO,
4512            4 => simd_and(k1, simd_ne(a, b)),
4513            5 => simd_and(k1, simd_ge(a, b)),
4514            6 => simd_and(k1, simd_gt(a, b)),
4515            _ => k1,
4516        };
4517        simd_bitmask(r)
4518    }
4519}
4520
4521/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4522///
4523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4524#[inline]
4525#[target_feature(enable = "avx512bw,avx512vl")]
4526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4527#[rustc_legacy_const_generics(2)]
4528#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4529pub fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4530    unsafe {
4531        static_assert_uimm_bits!(IMM8, 3);
4532        let a = a.as_i16x16();
4533        let b = b.as_i16x16();
4534        let r = match IMM8 {
4535            0 => simd_eq(a, b),
4536            1 => simd_lt(a, b),
4537            2 => simd_le(a, b),
4538            3 => i16x16::ZERO,
4539            4 => simd_ne(a, b),
4540            5 => simd_ge(a, b),
4541            6 => simd_gt(a, b),
4542            _ => i16x16::splat(-1),
4543        };
4544        simd_bitmask(r)
4545    }
4546}
4547
4548/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4549///
4550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4551#[inline]
4552#[target_feature(enable = "avx512bw,avx512vl")]
4553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4554#[rustc_legacy_const_generics(3)]
4555#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4556pub fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4557    k1: __mmask16,
4558    a: __m256i,
4559    b: __m256i,
4560) -> __mmask16 {
4561    unsafe {
4562        static_assert_uimm_bits!(IMM8, 3);
4563        let a = a.as_i16x16();
4564        let b = b.as_i16x16();
4565        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4566        let r = match IMM8 {
4567            0 => simd_and(k1, simd_eq(a, b)),
4568            1 => simd_and(k1, simd_lt(a, b)),
4569            2 => simd_and(k1, simd_le(a, b)),
4570            3 => i16x16::ZERO,
4571            4 => simd_and(k1, simd_ne(a, b)),
4572            5 => simd_and(k1, simd_ge(a, b)),
4573            6 => simd_and(k1, simd_gt(a, b)),
4574            _ => k1,
4575        };
4576        simd_bitmask(r)
4577    }
4578}
4579
4580/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4583#[inline]
4584#[target_feature(enable = "avx512bw,avx512vl")]
4585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586#[rustc_legacy_const_generics(2)]
4587#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4588pub fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4589    unsafe {
4590        static_assert_uimm_bits!(IMM8, 3);
4591        let a = a.as_i16x8();
4592        let b = b.as_i16x8();
4593        let r = match IMM8 {
4594            0 => simd_eq(a, b),
4595            1 => simd_lt(a, b),
4596            2 => simd_le(a, b),
4597            3 => i16x8::ZERO,
4598            4 => simd_ne(a, b),
4599            5 => simd_ge(a, b),
4600            6 => simd_gt(a, b),
4601            _ => i16x8::splat(-1),
4602        };
4603        simd_bitmask(r)
4604    }
4605}
4606
4607/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4608///
4609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4610#[inline]
4611#[target_feature(enable = "avx512bw,avx512vl")]
4612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4613#[rustc_legacy_const_generics(3)]
4614#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4615pub fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4616    unsafe {
4617        static_assert_uimm_bits!(IMM8, 3);
4618        let a = a.as_i16x8();
4619        let b = b.as_i16x8();
4620        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4621        let r = match IMM8 {
4622            0 => simd_and(k1, simd_eq(a, b)),
4623            1 => simd_and(k1, simd_lt(a, b)),
4624            2 => simd_and(k1, simd_le(a, b)),
4625            3 => i16x8::ZERO,
4626            4 => simd_and(k1, simd_ne(a, b)),
4627            5 => simd_and(k1, simd_ge(a, b)),
4628            6 => simd_and(k1, simd_gt(a, b)),
4629            _ => k1,
4630        };
4631        simd_bitmask(r)
4632    }
4633}
4634
4635/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4638#[inline]
4639#[target_feature(enable = "avx512bw")]
4640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4641#[rustc_legacy_const_generics(2)]
4642#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4643pub fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4644    unsafe {
4645        static_assert_uimm_bits!(IMM8, 3);
4646        let a = a.as_i8x64();
4647        let b = b.as_i8x64();
4648        let r = match IMM8 {
4649            0 => simd_eq(a, b),
4650            1 => simd_lt(a, b),
4651            2 => simd_le(a, b),
4652            3 => i8x64::ZERO,
4653            4 => simd_ne(a, b),
4654            5 => simd_ge(a, b),
4655            6 => simd_gt(a, b),
4656            _ => i8x64::splat(-1),
4657        };
4658        simd_bitmask(r)
4659    }
4660}
4661
4662/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4663///
4664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
4665#[inline]
4666#[target_feature(enable = "avx512bw")]
4667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4668#[rustc_legacy_const_generics(3)]
4669#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4670pub fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
4671    k1: __mmask64,
4672    a: __m512i,
4673    b: __m512i,
4674) -> __mmask64 {
4675    unsafe {
4676        static_assert_uimm_bits!(IMM8, 3);
4677        let a = a.as_i8x64();
4678        let b = b.as_i8x64();
4679        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4680        let r = match IMM8 {
4681            0 => simd_and(k1, simd_eq(a, b)),
4682            1 => simd_and(k1, simd_lt(a, b)),
4683            2 => simd_and(k1, simd_le(a, b)),
4684            3 => i8x64::ZERO,
4685            4 => simd_and(k1, simd_ne(a, b)),
4686            5 => simd_and(k1, simd_ge(a, b)),
4687            6 => simd_and(k1, simd_gt(a, b)),
4688            _ => k1,
4689        };
4690        simd_bitmask(r)
4691    }
4692}
4693
4694/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4695///
4696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
4697#[inline]
4698#[target_feature(enable = "avx512bw,avx512vl")]
4699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4700#[rustc_legacy_const_generics(2)]
4701#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4702pub fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4703    unsafe {
4704        static_assert_uimm_bits!(IMM8, 3);
4705        let a = a.as_i8x32();
4706        let b = b.as_i8x32();
4707        let r = match IMM8 {
4708            0 => simd_eq(a, b),
4709            1 => simd_lt(a, b),
4710            2 => simd_le(a, b),
4711            3 => i8x32::ZERO,
4712            4 => simd_ne(a, b),
4713            5 => simd_ge(a, b),
4714            6 => simd_gt(a, b),
4715            _ => i8x32::splat(-1),
4716        };
4717        simd_bitmask(r)
4718    }
4719}
4720
4721/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4722///
4723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
4724#[inline]
4725#[target_feature(enable = "avx512bw,avx512vl")]
4726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4727#[rustc_legacy_const_generics(3)]
4728#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4729pub fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
4730    k1: __mmask32,
4731    a: __m256i,
4732    b: __m256i,
4733) -> __mmask32 {
4734    unsafe {
4735        static_assert_uimm_bits!(IMM8, 3);
4736        let a = a.as_i8x32();
4737        let b = b.as_i8x32();
4738        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4739        let r = match IMM8 {
4740            0 => simd_and(k1, simd_eq(a, b)),
4741            1 => simd_and(k1, simd_lt(a, b)),
4742            2 => simd_and(k1, simd_le(a, b)),
4743            3 => i8x32::ZERO,
4744            4 => simd_and(k1, simd_ne(a, b)),
4745            5 => simd_and(k1, simd_ge(a, b)),
4746            6 => simd_and(k1, simd_gt(a, b)),
4747            _ => k1,
4748        };
4749        simd_bitmask(r)
4750    }
4751}
4752
4753/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4754///
4755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
4756#[inline]
4757#[target_feature(enable = "avx512bw,avx512vl")]
4758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4759#[rustc_legacy_const_generics(2)]
4760#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4761pub fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4762    unsafe {
4763        static_assert_uimm_bits!(IMM8, 3);
4764        let a = a.as_i8x16();
4765        let b = b.as_i8x16();
4766        let r = match IMM8 {
4767            0 => simd_eq(a, b),
4768            1 => simd_lt(a, b),
4769            2 => simd_le(a, b),
4770            3 => i8x16::ZERO,
4771            4 => simd_ne(a, b),
4772            5 => simd_ge(a, b),
4773            6 => simd_gt(a, b),
4774            _ => i8x16::splat(-1),
4775        };
4776        simd_bitmask(r)
4777    }
4778}
4779
4780/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4781///
4782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
4783#[inline]
4784#[target_feature(enable = "avx512bw,avx512vl")]
4785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4786#[rustc_legacy_const_generics(3)]
4787#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4788pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4789    unsafe {
4790        static_assert_uimm_bits!(IMM8, 3);
4791        let a = a.as_i8x16();
4792        let b = b.as_i8x16();
4793        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4794        let r = match IMM8 {
4795            0 => simd_and(k1, simd_eq(a, b)),
4796            1 => simd_and(k1, simd_lt(a, b)),
4797            2 => simd_and(k1, simd_le(a, b)),
4798            3 => i8x16::ZERO,
4799            4 => simd_and(k1, simd_ne(a, b)),
4800            5 => simd_and(k1, simd_ge(a, b)),
4801            6 => simd_and(k1, simd_gt(a, b)),
4802            _ => k1,
4803        };
4804        simd_bitmask(r)
4805    }
4806}
4807
4808/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4809///
4810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
4811#[inline]
4812#[target_feature(enable = "avx512bw,avx512vl")]
4813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4814pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4815    unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
4816}
4817
4818/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4819///
4820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
4821#[inline]
4822#[target_feature(enable = "avx512bw,avx512vl")]
4823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4824pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4825    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
4826}
4827
4828/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4829///
4830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
4831#[inline]
4832#[target_feature(enable = "avx512bw,avx512vl")]
4833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4834pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4835    unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
4836}
4837
4838/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4839///
4840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
4841#[inline]
4842#[target_feature(enable = "avx512bw,avx512vl")]
4843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4844pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4845    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
4846}
4847
4848/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4849///
4850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
4851#[inline]
4852#[target_feature(enable = "avx512bw,avx512vl")]
4853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4854pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4855    unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
4856}
4857
4858/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4859///
4860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
4861#[inline]
4862#[target_feature(enable = "avx512bw,avx512vl")]
4863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4864pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4865    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
4866}
4867
4868/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4869///
4870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
4871#[inline]
4872#[target_feature(enable = "avx512bw,avx512vl")]
4873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4874pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4875    unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
4876}
4877
4878/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4879///
4880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
4881#[inline]
4882#[target_feature(enable = "avx512bw,avx512vl")]
4883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4884pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4885    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
4886}
4887
4888/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
4891#[inline]
4892#[target_feature(enable = "avx512bw,avx512vl")]
4893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4894pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
4895    unsafe { simd_reduce_and(a.as_i16x16()) }
4896}
4897
4898/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4899///
4900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
4901#[inline]
4902#[target_feature(enable = "avx512bw,avx512vl")]
4903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4904pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
4905    unsafe {
4906        simd_reduce_and(simd_select_bitmask(
4907            k,
4908            a.as_i16x16(),
4909            _mm256_set1_epi64x(-1).as_i16x16(),
4910        ))
4911    }
4912}
4913
4914/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4915///
4916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
4917#[inline]
4918#[target_feature(enable = "avx512bw,avx512vl")]
4919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4920pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
4921    unsafe { simd_reduce_and(a.as_i16x8()) }
4922}
4923
4924/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4925///
4926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
4927#[inline]
4928#[target_feature(enable = "avx512bw,avx512vl")]
4929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4930pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
4931    unsafe {
4932        simd_reduce_and(simd_select_bitmask(
4933            k,
4934            a.as_i16x8(),
4935            _mm_set1_epi64x(-1).as_i16x8(),
4936        ))
4937    }
4938}
4939
4940/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4941///
4942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
4943#[inline]
4944#[target_feature(enable = "avx512bw,avx512vl")]
4945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4946pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
4947    unsafe { simd_reduce_and(a.as_i8x32()) }
4948}
4949
4950/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4951///
4952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
4953#[inline]
4954#[target_feature(enable = "avx512bw,avx512vl")]
4955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4956pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
4957    unsafe {
4958        simd_reduce_and(simd_select_bitmask(
4959            k,
4960            a.as_i8x32(),
4961            _mm256_set1_epi64x(-1).as_i8x32(),
4962        ))
4963    }
4964}
4965
4966/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4967///
4968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
4969#[inline]
4970#[target_feature(enable = "avx512bw,avx512vl")]
4971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4972pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
4973    unsafe { simd_reduce_and(a.as_i8x16()) }
4974}
4975
4976/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
4979#[inline]
4980#[target_feature(enable = "avx512bw,avx512vl")]
4981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
4983    unsafe {
4984        simd_reduce_and(simd_select_bitmask(
4985            k,
4986            a.as_i8x16(),
4987            _mm_set1_epi64x(-1).as_i8x16(),
4988        ))
4989    }
4990}
4991
4992/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4993///
4994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
4995#[inline]
4996#[target_feature(enable = "avx512bw,avx512vl")]
4997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4998pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
4999    unsafe { simd_reduce_max(a.as_i16x16()) }
5000}
5001
5002/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5003///
5004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5005#[inline]
5006#[target_feature(enable = "avx512bw,avx512vl")]
5007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5008pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5009    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5010}
5011
5012/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5013///
5014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5015#[inline]
5016#[target_feature(enable = "avx512bw,avx512vl")]
5017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5018pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5019    unsafe { simd_reduce_max(a.as_i16x8()) }
5020}
5021
5022/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5023///
5024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5025#[inline]
5026#[target_feature(enable = "avx512bw,avx512vl")]
5027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5028pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5029    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5030}
5031
5032/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5033///
5034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5035#[inline]
5036#[target_feature(enable = "avx512bw,avx512vl")]
5037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5038pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5039    unsafe { simd_reduce_max(a.as_i8x32()) }
5040}
5041
5042/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5045#[inline]
5046#[target_feature(enable = "avx512bw,avx512vl")]
5047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5048pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5049    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5050}
5051
5052/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5053///
5054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5055#[inline]
5056#[target_feature(enable = "avx512bw,avx512vl")]
5057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5058pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5059    unsafe { simd_reduce_max(a.as_i8x16()) }
5060}
5061
5062/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5063///
5064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5065#[inline]
5066#[target_feature(enable = "avx512bw,avx512vl")]
5067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5068pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5069    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5070}
5071
5072/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5073///
5074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5075#[inline]
5076#[target_feature(enable = "avx512bw,avx512vl")]
5077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5078pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5079    unsafe { simd_reduce_max(a.as_u16x16()) }
5080}
5081
5082/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5083///
5084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5085#[inline]
5086#[target_feature(enable = "avx512bw,avx512vl")]
5087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5088pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5089    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5090}
5091
5092/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5093///
5094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5095#[inline]
5096#[target_feature(enable = "avx512bw,avx512vl")]
5097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5098pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5099    unsafe { simd_reduce_max(a.as_u16x8()) }
5100}
5101
5102/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5103///
5104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5105#[inline]
5106#[target_feature(enable = "avx512bw,avx512vl")]
5107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5108pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5109    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5110}
5111
5112/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5113///
5114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5115#[inline]
5116#[target_feature(enable = "avx512bw,avx512vl")]
5117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5118pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5119    unsafe { simd_reduce_max(a.as_u8x32()) }
5120}
5121
5122/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5123///
5124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5125#[inline]
5126#[target_feature(enable = "avx512bw,avx512vl")]
5127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5128pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5129    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5130}
5131
5132/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5133///
5134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5135#[inline]
5136#[target_feature(enable = "avx512bw,avx512vl")]
5137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5138pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5139    unsafe { simd_reduce_max(a.as_u8x16()) }
5140}
5141
5142/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5143///
5144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5145#[inline]
5146#[target_feature(enable = "avx512bw,avx512vl")]
5147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5148pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5149    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5150}
5151
5152/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5155#[inline]
5156#[target_feature(enable = "avx512bw,avx512vl")]
5157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5158pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5159    unsafe { simd_reduce_min(a.as_i16x16()) }
5160}
5161
5162/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5163///
5164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5165#[inline]
5166#[target_feature(enable = "avx512bw,avx512vl")]
5167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5168pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5169    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5170}
5171
5172/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5173///
5174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5175#[inline]
5176#[target_feature(enable = "avx512bw,avx512vl")]
5177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5178pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5179    unsafe { simd_reduce_min(a.as_i16x8()) }
5180}
5181
5182/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5183///
5184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5185#[inline]
5186#[target_feature(enable = "avx512bw,avx512vl")]
5187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5188pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5189    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5190}
5191
5192/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5193///
5194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5195#[inline]
5196#[target_feature(enable = "avx512bw,avx512vl")]
5197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5198pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5199    unsafe { simd_reduce_min(a.as_i8x32()) }
5200}
5201
5202/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5203///
5204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5205#[inline]
5206#[target_feature(enable = "avx512bw,avx512vl")]
5207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5208pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5209    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5210}
5211
5212/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5213///
5214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5215#[inline]
5216#[target_feature(enable = "avx512bw,avx512vl")]
5217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5218pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5219    unsafe { simd_reduce_min(a.as_i8x16()) }
5220}
5221
5222/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5223///
5224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5225#[inline]
5226#[target_feature(enable = "avx512bw,avx512vl")]
5227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5228pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5229    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5230}
5231
5232/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5233///
5234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5235#[inline]
5236#[target_feature(enable = "avx512bw,avx512vl")]
5237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5238pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5239    unsafe { simd_reduce_min(a.as_u16x16()) }
5240}
5241
5242/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5243///
5244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5245#[inline]
5246#[target_feature(enable = "avx512bw,avx512vl")]
5247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5248pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5249    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5250}
5251
5252/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5253///
5254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5255#[inline]
5256#[target_feature(enable = "avx512bw,avx512vl")]
5257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5258pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5259    unsafe { simd_reduce_min(a.as_u16x8()) }
5260}
5261
5262/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5265#[inline]
5266#[target_feature(enable = "avx512bw,avx512vl")]
5267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5268pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5269    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5270}
5271
5272/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5273///
5274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5275#[inline]
5276#[target_feature(enable = "avx512bw,avx512vl")]
5277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5278pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5279    unsafe { simd_reduce_min(a.as_u8x32()) }
5280}
5281
5282/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5283///
5284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5285#[inline]
5286#[target_feature(enable = "avx512bw,avx512vl")]
5287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5288pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5289    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5290}
5291
5292/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5293///
5294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5295#[inline]
5296#[target_feature(enable = "avx512bw,avx512vl")]
5297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5298pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5299    unsafe { simd_reduce_min(a.as_u8x16()) }
5300}
5301
5302/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5305#[inline]
5306#[target_feature(enable = "avx512bw,avx512vl")]
5307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5308pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5309    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5310}
5311
5312/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5313///
5314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5315#[inline]
5316#[target_feature(enable = "avx512bw,avx512vl")]
5317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5318pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5319    unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
5320}
5321
5322/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5323///
5324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5325#[inline]
5326#[target_feature(enable = "avx512bw,avx512vl")]
5327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5328pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5329    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
5330}
5331
5332/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5333///
5334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5335#[inline]
5336#[target_feature(enable = "avx512bw,avx512vl")]
5337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5338pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5339    unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
5340}
5341
5342/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5343///
5344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5345#[inline]
5346#[target_feature(enable = "avx512bw,avx512vl")]
5347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5348pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5349    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
5350}
5351
5352/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5353///
5354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5355#[inline]
5356#[target_feature(enable = "avx512bw,avx512vl")]
5357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5358pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5359    unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
5360}
5361
5362/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5363///
5364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5365#[inline]
5366#[target_feature(enable = "avx512bw,avx512vl")]
5367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5368pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5369    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
5370}
5371
5372/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5373///
5374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5375#[inline]
5376#[target_feature(enable = "avx512bw,avx512vl")]
5377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5378pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5379    unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
5380}
5381
5382/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5385#[inline]
5386#[target_feature(enable = "avx512bw,avx512vl")]
5387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5388pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5389    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
5390}
5391
5392/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5393///
5394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5395#[inline]
5396#[target_feature(enable = "avx512bw,avx512vl")]
5397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5398pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5399    unsafe { simd_reduce_or(a.as_i16x16()) }
5400}
5401
5402/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5403///
5404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5405#[inline]
5406#[target_feature(enable = "avx512bw,avx512vl")]
5407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5408pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5409    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5410}
5411
5412/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5413///
5414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5415#[inline]
5416#[target_feature(enable = "avx512bw,avx512vl")]
5417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5418pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5419    unsafe { simd_reduce_or(a.as_i16x8()) }
5420}
5421
5422/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5425#[inline]
5426#[target_feature(enable = "avx512bw,avx512vl")]
5427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5429    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5430}
5431
5432/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5433///
5434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5435#[inline]
5436#[target_feature(enable = "avx512bw,avx512vl")]
5437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5438pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5439    unsafe { simd_reduce_or(a.as_i8x32()) }
5440}
5441
5442/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5443///
5444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5445#[inline]
5446#[target_feature(enable = "avx512bw,avx512vl")]
5447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5448pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5449    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5450}
5451
5452/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5453///
5454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5455#[inline]
5456#[target_feature(enable = "avx512bw,avx512vl")]
5457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5458pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5459    unsafe { simd_reduce_or(a.as_i8x16()) }
5460}
5461
5462/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5463///
5464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5465#[inline]
5466#[target_feature(enable = "avx512bw,avx512vl")]
5467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5468pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5469    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5470}
5471
5472/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5473///
5474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5475#[inline]
5476#[target_feature(enable = "avx512bw")]
5477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5478#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5479pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5480    ptr::read_unaligned(mem_addr as *const __m512i)
5481}
5482
5483/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5484///
5485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5486#[inline]
5487#[target_feature(enable = "avx512bw,avx512vl")]
5488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5489#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5490pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5491    ptr::read_unaligned(mem_addr as *const __m256i)
5492}
5493
5494/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5495///
5496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5497#[inline]
5498#[target_feature(enable = "avx512bw,avx512vl")]
5499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5500#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5501pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5502    ptr::read_unaligned(mem_addr as *const __m128i)
5503}
5504
5505/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5506///
5507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5508#[inline]
5509#[target_feature(enable = "avx512bw")]
5510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5511#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5512pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5513    ptr::read_unaligned(mem_addr as *const __m512i)
5514}
5515
5516/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5517///
5518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5519#[inline]
5520#[target_feature(enable = "avx512bw,avx512vl")]
5521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5522#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5523pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5524    ptr::read_unaligned(mem_addr as *const __m256i)
5525}
5526
5527/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5528///
5529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5530#[inline]
5531#[target_feature(enable = "avx512bw,avx512vl")]
5532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5533#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5534pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5535    ptr::read_unaligned(mem_addr as *const __m128i)
5536}
5537
5538/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5539///
5540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5541#[inline]
5542#[target_feature(enable = "avx512bw")]
5543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5544#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5545pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5546    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5547}
5548
5549/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5550///
5551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5552#[inline]
5553#[target_feature(enable = "avx512bw,avx512vl")]
5554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5555#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5556pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5557    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5558}
5559
5560/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5561///
5562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5563#[inline]
5564#[target_feature(enable = "avx512bw,avx512vl")]
5565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5566#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5567pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5568    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5569}
5570
5571/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5572///
5573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
5574#[inline]
5575#[target_feature(enable = "avx512bw")]
5576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5577#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5578pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
5579    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5580}
5581
5582/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5583///
5584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
5585#[inline]
5586#[target_feature(enable = "avx512bw,avx512vl")]
5587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5588#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5589pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
5590    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5591}
5592
5593/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5594///
5595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
5596#[inline]
5597#[target_feature(enable = "avx512bw,avx512vl")]
5598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5599#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5600pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
5601    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5602}
5603
5604/// Load packed 16-bit integers from memory into dst using writemask k
5605/// (elements are copied from src when the corresponding mask bit is not set).
5606/// mem_addr does not need to be aligned on any particular boundary.
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
5609#[inline]
5610#[target_feature(enable = "avx512bw")]
5611#[cfg_attr(test, assert_instr(vmovdqu16))]
5612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5613pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
5614    transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k))
5615}
5616
5617/// Load packed 16-bit integers from memory into dst using zeromask k
5618/// (elements are zeroed out when the corresponding mask bit is not set).
5619/// mem_addr does not need to be aligned on any particular boundary.
5620///
5621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
5622#[inline]
5623#[target_feature(enable = "avx512bw")]
5624#[cfg_attr(test, assert_instr(vmovdqu16))]
5625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5626pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
5627    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
5628}
5629
5630/// Load packed 8-bit integers from memory into dst using writemask k
5631/// (elements are copied from src when the corresponding mask bit is not set).
5632/// mem_addr does not need to be aligned on any particular boundary.
5633///
5634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
5635#[inline]
5636#[target_feature(enable = "avx512bw")]
5637#[cfg_attr(test, assert_instr(vmovdqu8))]
5638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5639pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
5640    transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k))
5641}
5642
5643/// Load packed 8-bit integers from memory into dst using zeromask k
5644/// (elements are zeroed out when the corresponding mask bit is not set).
5645/// mem_addr does not need to be aligned on any particular boundary.
5646///
5647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
5648#[inline]
5649#[target_feature(enable = "avx512bw")]
5650#[cfg_attr(test, assert_instr(vmovdqu8))]
5651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5652pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
5653    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
5654}
5655
5656/// Load packed 16-bit integers from memory into dst using writemask k
5657/// (elements are copied from src when the corresponding mask bit is not set).
5658/// mem_addr does not need to be aligned on any particular boundary.
5659///
5660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
5661#[inline]
5662#[target_feature(enable = "avx512bw,avx512vl")]
5663#[cfg_attr(test, assert_instr(vmovdqu16))]
5664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5665pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
5666    transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k))
5667}
5668
5669/// Load packed 16-bit integers from memory into dst using zeromask k
5670/// (elements are zeroed out when the corresponding mask bit is not set).
5671/// mem_addr does not need to be aligned on any particular boundary.
5672///
5673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
5674#[inline]
5675#[target_feature(enable = "avx512bw,avx512vl")]
5676#[cfg_attr(test, assert_instr(vmovdqu16))]
5677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5678pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
5679    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
5680}
5681
5682/// Load packed 8-bit integers from memory into dst using writemask k
5683/// (elements are copied from src when the corresponding mask bit is not set).
5684/// mem_addr does not need to be aligned on any particular boundary.
5685///
5686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
5687#[inline]
5688#[target_feature(enable = "avx512bw,avx512vl")]
5689#[cfg_attr(test, assert_instr(vmovdqu8))]
5690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5691pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
5692    transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k))
5693}
5694
5695/// Load packed 8-bit integers from memory into dst using zeromask k
5696/// (elements are zeroed out when the corresponding mask bit is not set).
5697/// mem_addr does not need to be aligned on any particular boundary.
5698///
5699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
5700#[inline]
5701#[target_feature(enable = "avx512bw,avx512vl")]
5702#[cfg_attr(test, assert_instr(vmovdqu8))]
5703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5704pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
5705    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
5706}
5707
5708/// Load packed 16-bit integers from memory into dst using writemask k
5709/// (elements are copied from src when the corresponding mask bit is not set).
5710/// mem_addr does not need to be aligned on any particular boundary.
5711///
5712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
5713#[inline]
5714#[target_feature(enable = "avx512bw,avx512vl")]
5715#[cfg_attr(test, assert_instr(vmovdqu16))]
5716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5717pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
5718    transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k))
5719}
5720
5721/// Load packed 16-bit integers from memory into dst using zeromask k
5722/// (elements are zeroed out when the corresponding mask bit is not set).
5723/// mem_addr does not need to be aligned on any particular boundary.
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
5726#[inline]
5727#[target_feature(enable = "avx512bw,avx512vl")]
5728#[cfg_attr(test, assert_instr(vmovdqu16))]
5729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5730pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
5731    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
5732}
5733
5734/// Load packed 8-bit integers from memory into dst using writemask k
5735/// (elements are copied from src when the corresponding mask bit is not set).
5736/// mem_addr does not need to be aligned on any particular boundary.
5737///
5738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
5739#[inline]
5740#[target_feature(enable = "avx512bw,avx512vl")]
5741#[cfg_attr(test, assert_instr(vmovdqu8))]
5742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5743pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
5744    transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k))
5745}
5746
5747/// Load packed 8-bit integers from memory into dst using zeromask k
5748/// (elements are zeroed out when the corresponding mask bit is not set).
5749/// mem_addr does not need to be aligned on any particular boundary.
5750///
5751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
5752#[inline]
5753#[target_feature(enable = "avx512bw,avx512vl")]
5754#[cfg_attr(test, assert_instr(vmovdqu8))]
5755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5756pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
5757    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
5758}
5759
5760/// Store packed 16-bit integers from a into memory using writemask k.
5761/// mem_addr does not need to be aligned on any particular boundary.
5762///
5763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
5764#[inline]
5765#[target_feature(enable = "avx512bw")]
5766#[cfg_attr(test, assert_instr(vmovdqu16))]
5767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5768pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
5769    storedqu16_512(mem_addr, a.as_i16x32(), mask)
5770}
5771
5772/// Store packed 8-bit integers from a into memory using writemask k.
5773/// mem_addr does not need to be aligned on any particular boundary.
5774///
5775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
5776#[inline]
5777#[target_feature(enable = "avx512bw")]
5778#[cfg_attr(test, assert_instr(vmovdqu8))]
5779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5780pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
5781    storedqu8_512(mem_addr, a.as_i8x64(), mask)
5782}
5783
5784/// Store packed 16-bit integers from a into memory using writemask k.
5785/// mem_addr does not need to be aligned on any particular boundary.
5786///
5787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
5788#[inline]
5789#[target_feature(enable = "avx512bw,avx512vl")]
5790#[cfg_attr(test, assert_instr(vmovdqu16))]
5791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5792pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
5793    storedqu16_256(mem_addr, a.as_i16x16(), mask)
5794}
5795
5796/// Store packed 8-bit integers from a into memory using writemask k.
5797/// mem_addr does not need to be aligned on any particular boundary.
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
5800#[inline]
5801#[target_feature(enable = "avx512bw,avx512vl")]
5802#[cfg_attr(test, assert_instr(vmovdqu8))]
5803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5804pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
5805    storedqu8_256(mem_addr, a.as_i8x32(), mask)
5806}
5807
5808/// Store packed 16-bit integers from a into memory using writemask k.
5809/// mem_addr does not need to be aligned on any particular boundary.
5810///
5811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
5812#[inline]
5813#[target_feature(enable = "avx512bw,avx512vl")]
5814#[cfg_attr(test, assert_instr(vmovdqu16))]
5815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5816pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
5817    storedqu16_128(mem_addr, a.as_i16x8(), mask)
5818}
5819
5820/// Store packed 8-bit integers from a into memory using writemask k.
5821/// mem_addr does not need to be aligned on any particular boundary.
5822///
5823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
5824#[inline]
5825#[target_feature(enable = "avx512bw,avx512vl")]
5826#[cfg_attr(test, assert_instr(vmovdqu8))]
5827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5828pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
5829    storedqu8_128(mem_addr, a.as_i8x16(), mask)
5830}
5831
5832/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
5833///
5834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
5835#[inline]
5836#[target_feature(enable = "avx512bw")]
5837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5838#[cfg_attr(test, assert_instr(vpmaddwd))]
5839pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5840    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
5841}
5842
5843/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
5846#[inline]
5847#[target_feature(enable = "avx512bw")]
5848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5849#[cfg_attr(test, assert_instr(vpmaddwd))]
5850pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5851    unsafe {
5852        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5853        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
5854    }
5855}
5856
5857/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5858///
5859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
5860#[inline]
5861#[target_feature(enable = "avx512bw")]
5862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5863#[cfg_attr(test, assert_instr(vpmaddwd))]
5864pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5865    unsafe {
5866        let madd = _mm512_madd_epi16(a, b).as_i32x16();
5867        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
5868    }
5869}
5870
5871/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
5874#[inline]
5875#[target_feature(enable = "avx512bw,avx512vl")]
5876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5877#[cfg_attr(test, assert_instr(vpmaddwd))]
5878pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5879    unsafe {
5880        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5881        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
5882    }
5883}
5884
5885/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5886///
5887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
5888#[inline]
5889#[target_feature(enable = "avx512bw,avx512vl")]
5890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5891#[cfg_attr(test, assert_instr(vpmaddwd))]
5892pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5893    unsafe {
5894        let madd = _mm256_madd_epi16(a, b).as_i32x8();
5895        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
5896    }
5897}
5898
5899/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5900///
5901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
5902#[inline]
5903#[target_feature(enable = "avx512bw,avx512vl")]
5904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5905#[cfg_attr(test, assert_instr(vpmaddwd))]
5906pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5907    unsafe {
5908        let madd = _mm_madd_epi16(a, b).as_i32x4();
5909        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
5910    }
5911}
5912
5913/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5914///
5915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
5916#[inline]
5917#[target_feature(enable = "avx512bw,avx512vl")]
5918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5919#[cfg_attr(test, assert_instr(vpmaddwd))]
5920pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5921    unsafe {
5922        let madd = _mm_madd_epi16(a, b).as_i32x4();
5923        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
5924    }
5925}
5926
5927/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
5928///
5929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
5930#[inline]
5931#[target_feature(enable = "avx512bw")]
5932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5933#[cfg_attr(test, assert_instr(vpmaddubsw))]
5934pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
5935    unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
5936}
5937
5938/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5939///
5940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
5941#[inline]
5942#[target_feature(enable = "avx512bw")]
5943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5944#[cfg_attr(test, assert_instr(vpmaddubsw))]
5945pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5946    unsafe {
5947        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5948        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
5949    }
5950}
5951
5952/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5953///
5954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
5955#[inline]
5956#[target_feature(enable = "avx512bw")]
5957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5958#[cfg_attr(test, assert_instr(vpmaddubsw))]
5959pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5960    unsafe {
5961        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5962        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
5963    }
5964}
5965
5966/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5967///
5968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
5969#[inline]
5970#[target_feature(enable = "avx512bw,avx512vl")]
5971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5972#[cfg_attr(test, assert_instr(vpmaddubsw))]
5973pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5974    unsafe {
5975        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5976        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
5977    }
5978}
5979
5980/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5981///
5982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
5983#[inline]
5984#[target_feature(enable = "avx512bw,avx512vl")]
5985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5986#[cfg_attr(test, assert_instr(vpmaddubsw))]
5987pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5988    unsafe {
5989        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5990        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
5991    }
5992}
5993
5994/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5995///
5996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
5997#[inline]
5998#[target_feature(enable = "avx512bw,avx512vl")]
5999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6000#[cfg_attr(test, assert_instr(vpmaddubsw))]
6001pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6002    unsafe {
6003        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6004        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6005    }
6006}
6007
6008/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6009///
6010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6011#[inline]
6012#[target_feature(enable = "avx512bw,avx512vl")]
6013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6014#[cfg_attr(test, assert_instr(vpmaddubsw))]
6015pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6016    unsafe {
6017        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6018        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6019    }
6020}
6021
6022/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6023///
6024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6025#[inline]
6026#[target_feature(enable = "avx512bw")]
6027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6028#[cfg_attr(test, assert_instr(vpackssdw))]
6029pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6030    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6031}
6032
6033/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6036#[inline]
6037#[target_feature(enable = "avx512bw")]
6038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6039#[cfg_attr(test, assert_instr(vpackssdw))]
6040pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6041    unsafe {
6042        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6043        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6044    }
6045}
6046
6047/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6048///
6049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6050#[inline]
6051#[target_feature(enable = "avx512bw")]
6052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6053#[cfg_attr(test, assert_instr(vpackssdw))]
6054pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6055    unsafe {
6056        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6057        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6058    }
6059}
6060
6061/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6062///
6063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6064#[inline]
6065#[target_feature(enable = "avx512bw,avx512vl")]
6066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6067#[cfg_attr(test, assert_instr(vpackssdw))]
6068pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6069    unsafe {
6070        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6071        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6072    }
6073}
6074
6075/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6076///
6077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6078#[inline]
6079#[target_feature(enable = "avx512bw,avx512vl")]
6080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6081#[cfg_attr(test, assert_instr(vpackssdw))]
6082pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6083    unsafe {
6084        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6085        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6086    }
6087}
6088
6089/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6092#[inline]
6093#[target_feature(enable = "avx512bw,avx512vl")]
6094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6095#[cfg_attr(test, assert_instr(vpackssdw))]
6096pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6097    unsafe {
6098        let pack = _mm_packs_epi32(a, b).as_i16x8();
6099        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6100    }
6101}
6102
6103/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6104///
6105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6106#[inline]
6107#[target_feature(enable = "avx512bw,avx512vl")]
6108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6109#[cfg_attr(test, assert_instr(vpackssdw))]
6110pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6111    unsafe {
6112        let pack = _mm_packs_epi32(a, b).as_i16x8();
6113        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6114    }
6115}
6116
6117/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6118///
6119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6120#[inline]
6121#[target_feature(enable = "avx512bw")]
6122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6123#[cfg_attr(test, assert_instr(vpacksswb))]
6124pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6125    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6126}
6127
6128/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6129///
6130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6131#[inline]
6132#[target_feature(enable = "avx512bw")]
6133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6134#[cfg_attr(test, assert_instr(vpacksswb))]
6135pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6136    unsafe {
6137        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6138        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6139    }
6140}
6141
6142/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6143///
6144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6145#[inline]
6146#[target_feature(enable = "avx512bw")]
6147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6148#[cfg_attr(test, assert_instr(vpacksswb))]
6149pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6150    unsafe {
6151        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6152        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6153    }
6154}
6155
6156/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6157///
6158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6159#[inline]
6160#[target_feature(enable = "avx512bw,avx512vl")]
6161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6162#[cfg_attr(test, assert_instr(vpacksswb))]
6163pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6164    unsafe {
6165        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6166        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6167    }
6168}
6169
6170/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6173#[inline]
6174#[target_feature(enable = "avx512bw,avx512vl")]
6175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6176#[cfg_attr(test, assert_instr(vpacksswb))]
6177pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6178    unsafe {
6179        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6180        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6181    }
6182}
6183
6184/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6185///
6186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6187#[inline]
6188#[target_feature(enable = "avx512bw,avx512vl")]
6189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6190#[cfg_attr(test, assert_instr(vpacksswb))]
6191pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6192    unsafe {
6193        let pack = _mm_packs_epi16(a, b).as_i8x16();
6194        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6195    }
6196}
6197
6198/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6199///
6200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6201#[inline]
6202#[target_feature(enable = "avx512bw,avx512vl")]
6203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6204#[cfg_attr(test, assert_instr(vpacksswb))]
6205pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6206    unsafe {
6207        let pack = _mm_packs_epi16(a, b).as_i8x16();
6208        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6209    }
6210}
6211
6212/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6213///
6214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6215#[inline]
6216#[target_feature(enable = "avx512bw")]
6217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6218#[cfg_attr(test, assert_instr(vpackusdw))]
6219pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6220    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6221}
6222
6223/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6224///
6225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6226#[inline]
6227#[target_feature(enable = "avx512bw")]
6228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6229#[cfg_attr(test, assert_instr(vpackusdw))]
6230pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6231    unsafe {
6232        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6233        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6234    }
6235}
6236
6237/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6238///
6239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6240#[inline]
6241#[target_feature(enable = "avx512bw")]
6242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6243#[cfg_attr(test, assert_instr(vpackusdw))]
6244pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6245    unsafe {
6246        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6247        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6248    }
6249}
6250
6251/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6254#[inline]
6255#[target_feature(enable = "avx512bw,avx512vl")]
6256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6257#[cfg_attr(test, assert_instr(vpackusdw))]
6258pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6259    unsafe {
6260        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6261        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6262    }
6263}
6264
6265/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6266///
6267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6268#[inline]
6269#[target_feature(enable = "avx512bw,avx512vl")]
6270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6271#[cfg_attr(test, assert_instr(vpackusdw))]
6272pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6273    unsafe {
6274        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6275        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6276    }
6277}
6278
6279/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6280///
6281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6282#[inline]
6283#[target_feature(enable = "avx512bw,avx512vl")]
6284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6285#[cfg_attr(test, assert_instr(vpackusdw))]
6286pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6287    unsafe {
6288        let pack = _mm_packus_epi32(a, b).as_i16x8();
6289        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6290    }
6291}
6292
6293/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6294///
6295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6296#[inline]
6297#[target_feature(enable = "avx512bw,avx512vl")]
6298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6299#[cfg_attr(test, assert_instr(vpackusdw))]
6300pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6301    unsafe {
6302        let pack = _mm_packus_epi32(a, b).as_i16x8();
6303        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6304    }
6305}
6306
6307/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6308///
6309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6310#[inline]
6311#[target_feature(enable = "avx512bw")]
6312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6313#[cfg_attr(test, assert_instr(vpackuswb))]
6314pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6315    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6316}
6317
6318/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6321#[inline]
6322#[target_feature(enable = "avx512bw")]
6323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324#[cfg_attr(test, assert_instr(vpackuswb))]
6325pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6326    unsafe {
6327        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6328        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6329    }
6330}
6331
6332/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6333///
6334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6335#[inline]
6336#[target_feature(enable = "avx512bw")]
6337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6338#[cfg_attr(test, assert_instr(vpackuswb))]
6339pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6340    unsafe {
6341        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6342        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6343    }
6344}
6345
6346/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6347///
6348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6349#[inline]
6350#[target_feature(enable = "avx512bw,avx512vl")]
6351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6352#[cfg_attr(test, assert_instr(vpackuswb))]
6353pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6354    unsafe {
6355        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6356        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6357    }
6358}
6359
6360/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6361///
6362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6363#[inline]
6364#[target_feature(enable = "avx512bw,avx512vl")]
6365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6366#[cfg_attr(test, assert_instr(vpackuswb))]
6367pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6368    unsafe {
6369        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6370        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6371    }
6372}
6373
6374/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6375///
6376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6377#[inline]
6378#[target_feature(enable = "avx512bw,avx512vl")]
6379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6380#[cfg_attr(test, assert_instr(vpackuswb))]
6381pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6382    unsafe {
6383        let pack = _mm_packus_epi16(a, b).as_i8x16();
6384        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6385    }
6386}
6387
6388/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6389///
6390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6391#[inline]
6392#[target_feature(enable = "avx512bw,avx512vl")]
6393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6394#[cfg_attr(test, assert_instr(vpackuswb))]
6395pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6396    unsafe {
6397        let pack = _mm_packus_epi16(a, b).as_i8x16();
6398        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6399    }
6400}
6401
6402/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6403///
6404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6405#[inline]
6406#[target_feature(enable = "avx512bw")]
6407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6408#[cfg_attr(test, assert_instr(vpavgw))]
6409pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6410    unsafe {
6411        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6412        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6413        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6414        transmute(simd_cast::<_, u16x32>(r))
6415    }
6416}
6417
6418/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6419///
6420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6421#[inline]
6422#[target_feature(enable = "avx512bw")]
6423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6424#[cfg_attr(test, assert_instr(vpavgw))]
6425pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6426    unsafe {
6427        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6428        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6429    }
6430}
6431
6432/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6433///
6434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6435#[inline]
6436#[target_feature(enable = "avx512bw")]
6437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6438#[cfg_attr(test, assert_instr(vpavgw))]
6439pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6440    unsafe {
6441        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6442        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6443    }
6444}
6445
6446/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6447///
6448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6449#[inline]
6450#[target_feature(enable = "avx512bw,avx512vl")]
6451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6452#[cfg_attr(test, assert_instr(vpavgw))]
6453pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6454    unsafe {
6455        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6456        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6457    }
6458}
6459
6460/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6461///
6462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6463#[inline]
6464#[target_feature(enable = "avx512bw,avx512vl")]
6465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6466#[cfg_attr(test, assert_instr(vpavgw))]
6467pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6468    unsafe {
6469        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6470        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6471    }
6472}
6473
6474/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6475///
6476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6477#[inline]
6478#[target_feature(enable = "avx512bw,avx512vl")]
6479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6480#[cfg_attr(test, assert_instr(vpavgw))]
6481pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6482    unsafe {
6483        let avg = _mm_avg_epu16(a, b).as_u16x8();
6484        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6485    }
6486}
6487
6488/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6489///
6490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6491#[inline]
6492#[target_feature(enable = "avx512bw,avx512vl")]
6493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6494#[cfg_attr(test, assert_instr(vpavgw))]
6495pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6496    unsafe {
6497        let avg = _mm_avg_epu16(a, b).as_u16x8();
6498        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
6499    }
6500}
6501
6502/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
6503///
6504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
6505#[inline]
6506#[target_feature(enable = "avx512bw")]
6507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6508#[cfg_attr(test, assert_instr(vpavgb))]
6509pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
6510    unsafe {
6511        let a = simd_cast::<_, u16x64>(a.as_u8x64());
6512        let b = simd_cast::<_, u16x64>(b.as_u8x64());
6513        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
6514        transmute(simd_cast::<_, u8x64>(r))
6515    }
6516}
6517
6518/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6519///
6520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
6521#[inline]
6522#[target_feature(enable = "avx512bw")]
6523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6524#[cfg_attr(test, assert_instr(vpavgb))]
6525pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6526    unsafe {
6527        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6528        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
6529    }
6530}
6531
6532/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6533///
6534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
6535#[inline]
6536#[target_feature(enable = "avx512bw")]
6537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6538#[cfg_attr(test, assert_instr(vpavgb))]
6539pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6540    unsafe {
6541        let avg = _mm512_avg_epu8(a, b).as_u8x64();
6542        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
6543    }
6544}
6545
6546/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6547///
6548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
6549#[inline]
6550#[target_feature(enable = "avx512bw,avx512vl")]
6551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6552#[cfg_attr(test, assert_instr(vpavgb))]
6553pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6554    unsafe {
6555        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6556        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
6557    }
6558}
6559
6560/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6561///
6562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
6563#[inline]
6564#[target_feature(enable = "avx512bw,avx512vl")]
6565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6566#[cfg_attr(test, assert_instr(vpavgb))]
6567pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6568    unsafe {
6569        let avg = _mm256_avg_epu8(a, b).as_u8x32();
6570        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
6571    }
6572}
6573
6574/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6575///
6576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
6577#[inline]
6578#[target_feature(enable = "avx512bw,avx512vl")]
6579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6580#[cfg_attr(test, assert_instr(vpavgb))]
6581pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6582    unsafe {
6583        let avg = _mm_avg_epu8(a, b).as_u8x16();
6584        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
6585    }
6586}
6587
6588/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6589///
6590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
6591#[inline]
6592#[target_feature(enable = "avx512bw,avx512vl")]
6593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6594#[cfg_attr(test, assert_instr(vpavgb))]
6595pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6596    unsafe {
6597        let avg = _mm_avg_epu8(a, b).as_u8x16();
6598        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
6599    }
6600}
6601
6602/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
6603///
6604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
6605#[inline]
6606#[target_feature(enable = "avx512bw")]
6607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6608#[cfg_attr(test, assert_instr(vpsllw))]
6609pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
6610    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
6611}
6612
6613/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6614///
6615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
6616#[inline]
6617#[target_feature(enable = "avx512bw")]
6618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6619#[cfg_attr(test, assert_instr(vpsllw))]
6620pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6621    unsafe {
6622        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6623        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6624    }
6625}
6626
6627/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6628///
6629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
6630#[inline]
6631#[target_feature(enable = "avx512bw")]
6632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6633#[cfg_attr(test, assert_instr(vpsllw))]
6634pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6635    unsafe {
6636        let shf = _mm512_sll_epi16(a, count).as_i16x32();
6637        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6638    }
6639}
6640
6641/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6642///
6643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
6644#[inline]
6645#[target_feature(enable = "avx512bw,avx512vl")]
6646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6647#[cfg_attr(test, assert_instr(vpsllw))]
6648pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6649    unsafe {
6650        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6651        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6652    }
6653}
6654
6655/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6656///
6657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
6658#[inline]
6659#[target_feature(enable = "avx512bw,avx512vl")]
6660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6661#[cfg_attr(test, assert_instr(vpsllw))]
6662pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6663    unsafe {
6664        let shf = _mm256_sll_epi16(a, count).as_i16x16();
6665        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6666    }
6667}
6668
6669/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6670///
6671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
6672#[inline]
6673#[target_feature(enable = "avx512bw,avx512vl")]
6674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6675#[cfg_attr(test, assert_instr(vpsllw))]
6676pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6677    unsafe {
6678        let shf = _mm_sll_epi16(a, count).as_i16x8();
6679        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6680    }
6681}
6682
6683/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6684///
6685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
6686#[inline]
6687#[target_feature(enable = "avx512bw,avx512vl")]
6688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6689#[cfg_attr(test, assert_instr(vpsllw))]
6690pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6691    unsafe {
6692        let shf = _mm_sll_epi16(a, count).as_i16x8();
6693        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6694    }
6695}
6696
6697/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6698///
6699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
6700#[inline]
6701#[target_feature(enable = "avx512bw")]
6702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6703#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6704#[rustc_legacy_const_generics(1)]
6705pub fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
6706    unsafe {
6707        static_assert_uimm_bits!(IMM8, 8);
6708        if IMM8 >= 16 {
6709            _mm512_setzero_si512()
6710        } else {
6711            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
6712        }
6713    }
6714}
6715
6716/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6717///
6718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
6719#[inline]
6720#[target_feature(enable = "avx512bw")]
6721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6722#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6723#[rustc_legacy_const_generics(3)]
6724pub fn _mm512_mask_slli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
6725    unsafe {
6726        static_assert_uimm_bits!(IMM8, 8);
6727        let shf = if IMM8 >= 16 {
6728            u16x32::ZERO
6729        } else {
6730            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
6731        };
6732        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
6733    }
6734}
6735
6736/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6737///
6738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
6739#[inline]
6740#[target_feature(enable = "avx512bw")]
6741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6742#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6743#[rustc_legacy_const_generics(2)]
6744pub fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
6745    unsafe {
6746        static_assert_uimm_bits!(IMM8, 8);
6747        if IMM8 >= 16 {
6748            _mm512_setzero_si512()
6749        } else {
6750            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
6751            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
6752        }
6753    }
6754}
6755
6756/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6757///
6758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
6759#[inline]
6760#[target_feature(enable = "avx512bw,avx512vl")]
6761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6762#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6763#[rustc_legacy_const_generics(3)]
6764pub fn _mm256_mask_slli_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
6765    unsafe {
6766        static_assert_uimm_bits!(IMM8, 8);
6767        let shf = if IMM8 >= 16 {
6768            u16x16::ZERO
6769        } else {
6770            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
6771        };
6772        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
6773    }
6774}
6775
6776/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6777///
6778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
6779#[inline]
6780#[target_feature(enable = "avx512bw,avx512vl")]
6781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6782#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6783#[rustc_legacy_const_generics(2)]
6784pub fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
6785    unsafe {
6786        static_assert_uimm_bits!(IMM8, 8);
6787        if IMM8 >= 16 {
6788            _mm256_setzero_si256()
6789        } else {
6790            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
6791            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
6792        }
6793    }
6794}
6795
6796/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6797///
6798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
6799#[inline]
6800#[target_feature(enable = "avx512bw,avx512vl")]
6801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6802#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6803#[rustc_legacy_const_generics(3)]
6804pub fn _mm_mask_slli_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6805    unsafe {
6806        static_assert_uimm_bits!(IMM8, 8);
6807        let shf = if IMM8 >= 16 {
6808            u16x8::ZERO
6809        } else {
6810            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
6811        };
6812        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
6813    }
6814}
6815
6816/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6817///
6818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
6819#[inline]
6820#[target_feature(enable = "avx512bw,avx512vl")]
6821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6822#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6823#[rustc_legacy_const_generics(2)]
6824pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
6825    unsafe {
6826        static_assert_uimm_bits!(IMM8, 8);
6827        if IMM8 >= 16 {
6828            _mm_setzero_si128()
6829        } else {
6830            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
6831            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
6832        }
6833    }
6834}
6835
6836/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6837///
6838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
6839#[inline]
6840#[target_feature(enable = "avx512bw")]
6841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6842#[cfg_attr(test, assert_instr(vpsllvw))]
6843pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
6844    unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
6845}
6846
6847/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6848///
6849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
6850#[inline]
6851#[target_feature(enable = "avx512bw")]
6852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6853#[cfg_attr(test, assert_instr(vpsllvw))]
6854pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6855    unsafe {
6856        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6857        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6858    }
6859}
6860
6861/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6862///
6863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
6864#[inline]
6865#[target_feature(enable = "avx512bw")]
6866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6867#[cfg_attr(test, assert_instr(vpsllvw))]
6868pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6869    unsafe {
6870        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6871        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6872    }
6873}
6874
6875/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6876///
6877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
6878#[inline]
6879#[target_feature(enable = "avx512bw,avx512vl")]
6880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6881#[cfg_attr(test, assert_instr(vpsllvw))]
6882pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
6883    unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
6884}
6885
6886/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6887///
6888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
6889#[inline]
6890#[target_feature(enable = "avx512bw,avx512vl")]
6891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6892#[cfg_attr(test, assert_instr(vpsllvw))]
6893pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6894    unsafe {
6895        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6896        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6897    }
6898}
6899
6900/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6901///
6902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
6903#[inline]
6904#[target_feature(enable = "avx512bw,avx512vl")]
6905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6906#[cfg_attr(test, assert_instr(vpsllvw))]
6907pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6908    unsafe {
6909        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6910        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6911    }
6912}
6913
6914/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6915///
6916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
6917#[inline]
6918#[target_feature(enable = "avx512bw,avx512vl")]
6919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6920#[cfg_attr(test, assert_instr(vpsllvw))]
6921pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
6922    unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
6923}
6924
6925/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6926///
6927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
6928#[inline]
6929#[target_feature(enable = "avx512bw,avx512vl")]
6930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6931#[cfg_attr(test, assert_instr(vpsllvw))]
6932pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6933    unsafe {
6934        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6935        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6936    }
6937}
6938
6939/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6940///
6941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
6942#[inline]
6943#[target_feature(enable = "avx512bw,avx512vl")]
6944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6945#[cfg_attr(test, assert_instr(vpsllvw))]
6946pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6947    unsafe {
6948        let shf = _mm_sllv_epi16(a, count).as_i16x8();
6949        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6950    }
6951}
6952
6953/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
6954///
6955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
6956#[inline]
6957#[target_feature(enable = "avx512bw")]
6958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6959#[cfg_attr(test, assert_instr(vpsrlw))]
6960pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
6961    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
6962}
6963
6964/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6965///
6966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
6967#[inline]
6968#[target_feature(enable = "avx512bw")]
6969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6970#[cfg_attr(test, assert_instr(vpsrlw))]
6971pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6972    unsafe {
6973        let shf = _mm512_srl_epi16(a, count).as_i16x32();
6974        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6975    }
6976}
6977
6978/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6979///
6980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
6981#[inline]
6982#[target_feature(enable = "avx512bw")]
6983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6984#[cfg_attr(test, assert_instr(vpsrlw))]
6985pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6986    unsafe {
6987        let shf = _mm512_srl_epi16(a, count).as_i16x32();
6988        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6989    }
6990}
6991
6992/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6993///
6994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
6995#[inline]
6996#[target_feature(enable = "avx512bw,avx512vl")]
6997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6998#[cfg_attr(test, assert_instr(vpsrlw))]
6999pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7000    unsafe {
7001        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7002        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7003    }
7004}
7005
7006/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7007///
7008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7009#[inline]
7010#[target_feature(enable = "avx512bw,avx512vl")]
7011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7012#[cfg_attr(test, assert_instr(vpsrlw))]
7013pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7014    unsafe {
7015        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7016        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7017    }
7018}
7019
7020/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7021///
7022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7023#[inline]
7024#[target_feature(enable = "avx512bw,avx512vl")]
7025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7026#[cfg_attr(test, assert_instr(vpsrlw))]
7027pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7028    unsafe {
7029        let shf = _mm_srl_epi16(a, count).as_i16x8();
7030        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7031    }
7032}
7033
7034/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7035///
7036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7037#[inline]
7038#[target_feature(enable = "avx512bw,avx512vl")]
7039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7040#[cfg_attr(test, assert_instr(vpsrlw))]
7041pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7042    unsafe {
7043        let shf = _mm_srl_epi16(a, count).as_i16x8();
7044        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7045    }
7046}
7047
7048/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7049///
7050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7051#[inline]
7052#[target_feature(enable = "avx512bw")]
7053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7054#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7055#[rustc_legacy_const_generics(1)]
7056pub fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7057    unsafe {
7058        static_assert_uimm_bits!(IMM8, 8);
7059        if IMM8 >= 16 {
7060            _mm512_setzero_si512()
7061        } else {
7062            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7063        }
7064    }
7065}
7066
7067/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7068///
7069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7070#[inline]
7071#[target_feature(enable = "avx512bw")]
7072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7073#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7074#[rustc_legacy_const_generics(3)]
7075pub fn _mm512_mask_srli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7076    unsafe {
7077        static_assert_uimm_bits!(IMM8, 8);
7078        let shf = if IMM8 >= 16 {
7079            u16x32::ZERO
7080        } else {
7081            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7082        };
7083        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7084    }
7085}
7086
7087/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7088///
7089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7090#[inline]
7091#[target_feature(enable = "avx512bw")]
7092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7093#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7094#[rustc_legacy_const_generics(2)]
7095pub fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7096    unsafe {
7097        static_assert_uimm_bits!(IMM8, 8);
7098        //imm8 should be u32, it seems the document to verify is incorrect
7099        if IMM8 >= 16 {
7100            _mm512_setzero_si512()
7101        } else {
7102            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7103            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7104        }
7105    }
7106}
7107
7108/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7109///
7110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7111#[inline]
7112#[target_feature(enable = "avx512bw,avx512vl")]
7113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7114#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7115#[rustc_legacy_const_generics(3)]
7116pub fn _mm256_mask_srli_epi16<const IMM8: i32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7117    unsafe {
7118        static_assert_uimm_bits!(IMM8, 8);
7119        let shf = _mm256_srli_epi16::<IMM8>(a);
7120        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7121    }
7122}
7123
7124/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7125///
7126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7127#[inline]
7128#[target_feature(enable = "avx512bw,avx512vl")]
7129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7130#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7131#[rustc_legacy_const_generics(2)]
7132pub fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7133    unsafe {
7134        static_assert_uimm_bits!(IMM8, 8);
7135        let shf = _mm256_srli_epi16::<IMM8>(a);
7136        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7137    }
7138}
7139
7140/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7141///
7142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7143#[inline]
7144#[target_feature(enable = "avx512bw,avx512vl")]
7145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7146#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7147#[rustc_legacy_const_generics(3)]
7148pub fn _mm_mask_srli_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7149    unsafe {
7150        static_assert_uimm_bits!(IMM8, 8);
7151        let shf = _mm_srli_epi16::<IMM8>(a);
7152        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7153    }
7154}
7155
7156/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7157///
7158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7159#[inline]
7160#[target_feature(enable = "avx512bw,avx512vl")]
7161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7162#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7163#[rustc_legacy_const_generics(2)]
7164pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7165    unsafe {
7166        static_assert_uimm_bits!(IMM8, 8);
7167        let shf = _mm_srli_epi16::<IMM8>(a);
7168        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7169    }
7170}
7171
7172/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7173///
7174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7175#[inline]
7176#[target_feature(enable = "avx512bw")]
7177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7178#[cfg_attr(test, assert_instr(vpsrlvw))]
7179pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7180    unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
7181}
7182
7183/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7184///
7185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7186#[inline]
7187#[target_feature(enable = "avx512bw")]
7188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7189#[cfg_attr(test, assert_instr(vpsrlvw))]
7190pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7191    unsafe {
7192        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7193        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7194    }
7195}
7196
7197/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7198///
7199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7200#[inline]
7201#[target_feature(enable = "avx512bw")]
7202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7203#[cfg_attr(test, assert_instr(vpsrlvw))]
7204pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7205    unsafe {
7206        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7207        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7208    }
7209}
7210
7211/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7212///
7213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7214#[inline]
7215#[target_feature(enable = "avx512bw,avx512vl")]
7216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7217#[cfg_attr(test, assert_instr(vpsrlvw))]
7218pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7219    unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
7220}
7221
7222/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7223///
7224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7225#[inline]
7226#[target_feature(enable = "avx512bw,avx512vl")]
7227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7228#[cfg_attr(test, assert_instr(vpsrlvw))]
7229pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7230    unsafe {
7231        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7232        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7233    }
7234}
7235
7236/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7237///
7238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7239#[inline]
7240#[target_feature(enable = "avx512bw,avx512vl")]
7241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7242#[cfg_attr(test, assert_instr(vpsrlvw))]
7243pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7244    unsafe {
7245        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7246        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7247    }
7248}
7249
7250/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7251///
7252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7253#[inline]
7254#[target_feature(enable = "avx512bw,avx512vl")]
7255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7256#[cfg_attr(test, assert_instr(vpsrlvw))]
7257pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7258    unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
7259}
7260
7261/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7262///
7263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7264#[inline]
7265#[target_feature(enable = "avx512bw,avx512vl")]
7266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7267#[cfg_attr(test, assert_instr(vpsrlvw))]
7268pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7269    unsafe {
7270        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7271        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7272    }
7273}
7274
7275/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7276///
7277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7278#[inline]
7279#[target_feature(enable = "avx512bw,avx512vl")]
7280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7281#[cfg_attr(test, assert_instr(vpsrlvw))]
7282pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7283    unsafe {
7284        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7285        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7286    }
7287}
7288
7289/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7290///
7291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7292#[inline]
7293#[target_feature(enable = "avx512bw")]
7294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7295#[cfg_attr(test, assert_instr(vpsraw))]
7296pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7297    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7298}
7299
7300/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7301///
7302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7303#[inline]
7304#[target_feature(enable = "avx512bw")]
7305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7306#[cfg_attr(test, assert_instr(vpsraw))]
7307pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7308    unsafe {
7309        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7310        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7311    }
7312}
7313
7314/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7315///
7316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7317#[inline]
7318#[target_feature(enable = "avx512bw")]
7319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7320#[cfg_attr(test, assert_instr(vpsraw))]
7321pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7322    unsafe {
7323        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7324        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7325    }
7326}
7327
7328/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7329///
7330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7331#[inline]
7332#[target_feature(enable = "avx512bw,avx512vl")]
7333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7334#[cfg_attr(test, assert_instr(vpsraw))]
7335pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7336    unsafe {
7337        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7338        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7339    }
7340}
7341
7342/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7343///
7344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7345#[inline]
7346#[target_feature(enable = "avx512bw,avx512vl")]
7347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7348#[cfg_attr(test, assert_instr(vpsraw))]
7349pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7350    unsafe {
7351        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7352        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7353    }
7354}
7355
7356/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7357///
7358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7359#[inline]
7360#[target_feature(enable = "avx512bw,avx512vl")]
7361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7362#[cfg_attr(test, assert_instr(vpsraw))]
7363pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7364    unsafe {
7365        let shf = _mm_sra_epi16(a, count).as_i16x8();
7366        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7367    }
7368}
7369
7370/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7371///
7372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7373#[inline]
7374#[target_feature(enable = "avx512bw,avx512vl")]
7375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7376#[cfg_attr(test, assert_instr(vpsraw))]
7377pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7378    unsafe {
7379        let shf = _mm_sra_epi16(a, count).as_i16x8();
7380        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7381    }
7382}
7383
7384/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7385///
7386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
7387#[inline]
7388#[target_feature(enable = "avx512bw")]
7389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7390#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7391#[rustc_legacy_const_generics(1)]
7392pub fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7393    unsafe {
7394        static_assert_uimm_bits!(IMM8, 8);
7395        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
7396    }
7397}
7398
7399/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7400///
7401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
7402#[inline]
7403#[target_feature(enable = "avx512bw")]
7404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7405#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7406#[rustc_legacy_const_generics(3)]
7407pub fn _mm512_mask_srai_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7408    unsafe {
7409        static_assert_uimm_bits!(IMM8, 8);
7410        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7411        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7412    }
7413}
7414
7415/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7416///
7417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
7418#[inline]
7419#[target_feature(enable = "avx512bw")]
7420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7421#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7422#[rustc_legacy_const_generics(2)]
7423pub fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7424    unsafe {
7425        static_assert_uimm_bits!(IMM8, 8);
7426        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7427        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7428    }
7429}
7430
7431/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7432///
7433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
7434#[inline]
7435#[target_feature(enable = "avx512bw,avx512vl")]
7436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7437#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7438#[rustc_legacy_const_generics(3)]
7439pub fn _mm256_mask_srai_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7440    unsafe {
7441        static_assert_uimm_bits!(IMM8, 8);
7442        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7443        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7444    }
7445}
7446
7447/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7448///
7449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
7450#[inline]
7451#[target_feature(enable = "avx512bw,avx512vl")]
7452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7453#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7454#[rustc_legacy_const_generics(2)]
7455pub fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7456    unsafe {
7457        static_assert_uimm_bits!(IMM8, 8);
7458        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7459        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
7460    }
7461}
7462
7463/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7464///
7465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
7466#[inline]
7467#[target_feature(enable = "avx512bw,avx512vl")]
7468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7469#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7470#[rustc_legacy_const_generics(3)]
7471pub fn _mm_mask_srai_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7472    unsafe {
7473        static_assert_uimm_bits!(IMM8, 8);
7474        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7475        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7476    }
7477}
7478
7479/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7480///
7481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
7482#[inline]
7483#[target_feature(enable = "avx512bw,avx512vl")]
7484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7485#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7486#[rustc_legacy_const_generics(2)]
7487pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7488    unsafe {
7489        static_assert_uimm_bits!(IMM8, 8);
7490        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7491        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
7492    }
7493}
7494
7495/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7496///
7497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
7498#[inline]
7499#[target_feature(enable = "avx512bw")]
7500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7501#[cfg_attr(test, assert_instr(vpsravw))]
7502pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
7503    unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
7504}
7505
7506/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7507///
7508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
7509#[inline]
7510#[target_feature(enable = "avx512bw")]
7511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7512#[cfg_attr(test, assert_instr(vpsravw))]
7513pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7514    unsafe {
7515        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7516        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7517    }
7518}
7519
7520/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7521///
7522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
7523#[inline]
7524#[target_feature(enable = "avx512bw")]
7525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7526#[cfg_attr(test, assert_instr(vpsravw))]
7527pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7528    unsafe {
7529        let shf = _mm512_srav_epi16(a, count).as_i16x32();
7530        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7531    }
7532}
7533
7534/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7535///
7536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
7537#[inline]
7538#[target_feature(enable = "avx512bw,avx512vl")]
7539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7540#[cfg_attr(test, assert_instr(vpsravw))]
7541pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
7542    unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
7543}
7544
7545/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7546///
7547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
7548#[inline]
7549#[target_feature(enable = "avx512bw,avx512vl")]
7550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7551#[cfg_attr(test, assert_instr(vpsravw))]
7552pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7553    unsafe {
7554        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7555        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7556    }
7557}
7558
7559/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7560///
7561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
7562#[inline]
7563#[target_feature(enable = "avx512bw,avx512vl")]
7564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7565#[cfg_attr(test, assert_instr(vpsravw))]
7566pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7567    unsafe {
7568        let shf = _mm256_srav_epi16(a, count).as_i16x16();
7569        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7570    }
7571}
7572
7573/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7574///
7575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
7576#[inline]
7577#[target_feature(enable = "avx512bw,avx512vl")]
7578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7579#[cfg_attr(test, assert_instr(vpsravw))]
7580pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
7581    unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
7582}
7583
7584/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7585///
7586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
7587#[inline]
7588#[target_feature(enable = "avx512bw,avx512vl")]
7589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7590#[cfg_attr(test, assert_instr(vpsravw))]
7591pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7592    unsafe {
7593        let shf = _mm_srav_epi16(a, count).as_i16x8();
7594        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7595    }
7596}
7597
7598/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7599///
7600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
7601#[inline]
7602#[target_feature(enable = "avx512bw,avx512vl")]
7603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7604#[cfg_attr(test, assert_instr(vpsravw))]
7605pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7606    unsafe {
7607        let shf = _mm_srav_epi16(a, count).as_i16x8();
7608        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7609    }
7610}
7611
7612/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7613///
7614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
7615#[inline]
7616#[target_feature(enable = "avx512bw")]
7617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7618#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7619pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
7620    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
7621}
7622
7623/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7624///
7625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
7626#[inline]
7627#[target_feature(enable = "avx512bw")]
7628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7629#[cfg_attr(test, assert_instr(vpermt2w))]
7630pub fn _mm512_mask_permutex2var_epi16(
7631    a: __m512i,
7632    k: __mmask32,
7633    idx: __m512i,
7634    b: __m512i,
7635) -> __m512i {
7636    unsafe {
7637        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7638        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
7639    }
7640}
7641
7642/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7643///
7644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
7645#[inline]
7646#[target_feature(enable = "avx512bw")]
7647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7648#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7649pub fn _mm512_maskz_permutex2var_epi16(
7650    k: __mmask32,
7651    a: __m512i,
7652    idx: __m512i,
7653    b: __m512i,
7654) -> __m512i {
7655    unsafe {
7656        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7657        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7658    }
7659}
7660
7661/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7662///
7663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
7664#[inline]
7665#[target_feature(enable = "avx512bw")]
7666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7667#[cfg_attr(test, assert_instr(vpermi2w))]
7668pub fn _mm512_mask2_permutex2var_epi16(
7669    a: __m512i,
7670    idx: __m512i,
7671    k: __mmask32,
7672    b: __m512i,
7673) -> __m512i {
7674    unsafe {
7675        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7676        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
7677    }
7678}
7679
7680/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7681///
7682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
7683#[inline]
7684#[target_feature(enable = "avx512bw,avx512vl")]
7685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7686#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7687pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
7688    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
7689}
7690
7691/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7692///
7693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
7694#[inline]
7695#[target_feature(enable = "avx512bw,avx512vl")]
7696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7697#[cfg_attr(test, assert_instr(vpermt2w))]
7698pub fn _mm256_mask_permutex2var_epi16(
7699    a: __m256i,
7700    k: __mmask16,
7701    idx: __m256i,
7702    b: __m256i,
7703) -> __m256i {
7704    unsafe {
7705        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7706        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
7707    }
7708}
7709
7710/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7711///
7712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
7713#[inline]
7714#[target_feature(enable = "avx512bw,avx512vl")]
7715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7716#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7717pub fn _mm256_maskz_permutex2var_epi16(
7718    k: __mmask16,
7719    a: __m256i,
7720    idx: __m256i,
7721    b: __m256i,
7722) -> __m256i {
7723    unsafe {
7724        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7725        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7726    }
7727}
7728
7729/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7730///
7731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
7732#[inline]
7733#[target_feature(enable = "avx512bw,avx512vl")]
7734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7735#[cfg_attr(test, assert_instr(vpermi2w))]
7736pub fn _mm256_mask2_permutex2var_epi16(
7737    a: __m256i,
7738    idx: __m256i,
7739    k: __mmask16,
7740    b: __m256i,
7741) -> __m256i {
7742    unsafe {
7743        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7744        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
7745    }
7746}
7747
7748/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7749///
7750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
7751#[inline]
7752#[target_feature(enable = "avx512bw,avx512vl")]
7753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7754#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7755pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7756    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
7757}
7758
7759/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7760///
7761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
7762#[inline]
7763#[target_feature(enable = "avx512bw,avx512vl")]
7764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7765#[cfg_attr(test, assert_instr(vpermt2w))]
7766pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
7767    unsafe {
7768        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7769        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
7770    }
7771}
7772
7773/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7774///
7775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
7776#[inline]
7777#[target_feature(enable = "avx512bw,avx512vl")]
7778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7779#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7780pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7781    unsafe {
7782        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7783        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7784    }
7785}
7786
7787/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7788///
7789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
7790#[inline]
7791#[target_feature(enable = "avx512bw,avx512vl")]
7792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7793#[cfg_attr(test, assert_instr(vpermi2w))]
7794pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
7795    unsafe {
7796        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7797        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
7798    }
7799}
7800
7801/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7802///
7803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
7804#[inline]
7805#[target_feature(enable = "avx512bw")]
7806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7807#[cfg_attr(test, assert_instr(vpermw))]
7808pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
7809    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
7810}
7811
7812/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7813///
7814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
7815#[inline]
7816#[target_feature(enable = "avx512bw")]
7817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7818#[cfg_attr(test, assert_instr(vpermw))]
7819pub fn _mm512_mask_permutexvar_epi16(
7820    src: __m512i,
7821    k: __mmask32,
7822    idx: __m512i,
7823    a: __m512i,
7824) -> __m512i {
7825    unsafe {
7826        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7827        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
7828    }
7829}
7830
7831/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7832///
7833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
7834#[inline]
7835#[target_feature(enable = "avx512bw")]
7836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7837#[cfg_attr(test, assert_instr(vpermw))]
7838pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
7839    unsafe {
7840        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7841        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7842    }
7843}
7844
7845/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7846///
7847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
7848#[inline]
7849#[target_feature(enable = "avx512bw,avx512vl")]
7850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7851#[cfg_attr(test, assert_instr(vpermw))]
7852pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
7853    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
7854}
7855
7856/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7857///
7858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
7859#[inline]
7860#[target_feature(enable = "avx512bw,avx512vl")]
7861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7862#[cfg_attr(test, assert_instr(vpermw))]
7863pub fn _mm256_mask_permutexvar_epi16(
7864    src: __m256i,
7865    k: __mmask16,
7866    idx: __m256i,
7867    a: __m256i,
7868) -> __m256i {
7869    unsafe {
7870        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7871        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
7872    }
7873}
7874
7875/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7876///
7877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
7878#[inline]
7879#[target_feature(enable = "avx512bw,avx512vl")]
7880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7881#[cfg_attr(test, assert_instr(vpermw))]
7882pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
7883    unsafe {
7884        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7885        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7886    }
7887}
7888
7889/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7890///
7891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
7892#[inline]
7893#[target_feature(enable = "avx512bw,avx512vl")]
7894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7895#[cfg_attr(test, assert_instr(vpermw))]
7896pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
7897    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
7898}
7899
7900/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7901///
7902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
7903#[inline]
7904#[target_feature(enable = "avx512bw,avx512vl")]
7905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7906#[cfg_attr(test, assert_instr(vpermw))]
7907pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7908    unsafe {
7909        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7910        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
7911    }
7912}
7913
7914/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7915///
7916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
7917#[inline]
7918#[target_feature(enable = "avx512bw,avx512vl")]
7919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7920#[cfg_attr(test, assert_instr(vpermw))]
7921pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7922    unsafe {
7923        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7924        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7925    }
7926}
7927
7928/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
7931#[inline]
7932#[target_feature(enable = "avx512bw")]
7933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7935pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
7936    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
7937}
7938
7939/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7940///
7941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
7942#[inline]
7943#[target_feature(enable = "avx512bw,avx512vl")]
7944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7945#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7946pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7947    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
7948}
7949
7950/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7951///
7952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
7953#[inline]
7954#[target_feature(enable = "avx512bw,avx512vl")]
7955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7956#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7957pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7958    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
7959}
7960
7961/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7962///
7963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
7964#[inline]
7965#[target_feature(enable = "avx512bw")]
7966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7967#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7968pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7969    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
7970}
7971
7972/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7973///
7974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
7975#[inline]
7976#[target_feature(enable = "avx512bw,avx512vl")]
7977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7978#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7979pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7980    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
7981}
7982
7983/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7984///
7985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
7986#[inline]
7987#[target_feature(enable = "avx512bw,avx512vl")]
7988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7989#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7990pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7991    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
7992}
7993
7994/// Broadcast the low packed 16-bit integer from a to all elements of dst.
7995///
7996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
7997#[inline]
7998#[target_feature(enable = "avx512bw")]
7999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8000#[cfg_attr(test, assert_instr(vpbroadcastw))]
8001pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8002    unsafe {
8003        let a = _mm512_castsi128_si512(a).as_i16x32();
8004        let ret: i16x32 = simd_shuffle!(
8005            a,
8006            a,
8007            [
8008                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8009                0, 0, 0, 0,
8010            ],
8011        );
8012        transmute(ret)
8013    }
8014}
8015
8016/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8017///
8018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8019#[inline]
8020#[target_feature(enable = "avx512bw")]
8021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8022#[cfg_attr(test, assert_instr(vpbroadcastw))]
8023pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8024    unsafe {
8025        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8026        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8027    }
8028}
8029
8030/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8031///
8032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8033#[inline]
8034#[target_feature(enable = "avx512bw")]
8035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8036#[cfg_attr(test, assert_instr(vpbroadcastw))]
8037pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8038    unsafe {
8039        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8040        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8041    }
8042}
8043
8044/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8045///
8046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8047#[inline]
8048#[target_feature(enable = "avx512bw,avx512vl")]
8049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8050#[cfg_attr(test, assert_instr(vpbroadcastw))]
8051pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8052    unsafe {
8053        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8054        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8055    }
8056}
8057
8058/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8059///
8060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8061#[inline]
8062#[target_feature(enable = "avx512bw,avx512vl")]
8063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8064#[cfg_attr(test, assert_instr(vpbroadcastw))]
8065pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8066    unsafe {
8067        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8068        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8069    }
8070}
8071
8072/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8073///
8074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8075#[inline]
8076#[target_feature(enable = "avx512bw,avx512vl")]
8077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8078#[cfg_attr(test, assert_instr(vpbroadcastw))]
8079pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8080    unsafe {
8081        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8082        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8083    }
8084}
8085
8086/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8087///
8088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8089#[inline]
8090#[target_feature(enable = "avx512bw,avx512vl")]
8091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8092#[cfg_attr(test, assert_instr(vpbroadcastw))]
8093pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8094    unsafe {
8095        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8096        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8097    }
8098}
8099
8100/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8101///
8102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8103#[inline]
8104#[target_feature(enable = "avx512bw")]
8105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8106#[cfg_attr(test, assert_instr(vpbroadcastb))]
8107pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8108    unsafe {
8109        let a = _mm512_castsi128_si512(a).as_i8x64();
8110        let ret: i8x64 = simd_shuffle!(
8111            a,
8112            a,
8113            [
8114                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8115                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8116                0, 0, 0, 0, 0, 0, 0, 0,
8117            ],
8118        );
8119        transmute(ret)
8120    }
8121}
8122
8123/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8124///
8125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8126#[inline]
8127#[target_feature(enable = "avx512bw")]
8128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8129#[cfg_attr(test, assert_instr(vpbroadcastb))]
8130pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8131    unsafe {
8132        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8133        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8134    }
8135}
8136
8137/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8138///
8139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8140#[inline]
8141#[target_feature(enable = "avx512bw")]
8142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8143#[cfg_attr(test, assert_instr(vpbroadcastb))]
8144pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8145    unsafe {
8146        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8147        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8148    }
8149}
8150
8151/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8152///
8153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8154#[inline]
8155#[target_feature(enable = "avx512bw,avx512vl")]
8156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8157#[cfg_attr(test, assert_instr(vpbroadcastb))]
8158pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8159    unsafe {
8160        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8161        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8162    }
8163}
8164
8165/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8166///
8167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8168#[inline]
8169#[target_feature(enable = "avx512bw,avx512vl")]
8170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8171#[cfg_attr(test, assert_instr(vpbroadcastb))]
8172pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8173    unsafe {
8174        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8175        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8176    }
8177}
8178
8179/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8180///
8181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8182#[inline]
8183#[target_feature(enable = "avx512bw,avx512vl")]
8184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8185#[cfg_attr(test, assert_instr(vpbroadcastb))]
8186pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8187    unsafe {
8188        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8189        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8190    }
8191}
8192
8193/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8194///
8195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8196#[inline]
8197#[target_feature(enable = "avx512bw,avx512vl")]
8198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8199#[cfg_attr(test, assert_instr(vpbroadcastb))]
8200pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8201    unsafe {
8202        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8203        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8204    }
8205}
8206
8207/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8208///
8209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8210#[inline]
8211#[target_feature(enable = "avx512bw")]
8212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8213#[cfg_attr(test, assert_instr(vpunpckhwd))]
8214pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8215    unsafe {
8216        let a = a.as_i16x32();
8217        let b = b.as_i16x32();
8218        #[rustfmt::skip]
8219        let r: i16x32 = simd_shuffle!(
8220            a,
8221            b,
8222            [
8223                4, 32 + 4, 5, 32 + 5,
8224                6, 32 + 6, 7, 32 + 7,
8225                12, 32 + 12, 13, 32 + 13,
8226                14, 32 + 14, 15, 32 + 15,
8227                20, 32 + 20, 21, 32 + 21,
8228                22, 32 + 22, 23, 32 + 23,
8229                28, 32 + 28, 29, 32 + 29,
8230                30, 32 + 30, 31, 32 + 31,
8231            ],
8232        );
8233        transmute(r)
8234    }
8235}
8236
8237/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8238///
8239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8240#[inline]
8241#[target_feature(enable = "avx512bw")]
8242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8243#[cfg_attr(test, assert_instr(vpunpckhwd))]
8244pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8245    unsafe {
8246        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8247        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8248    }
8249}
8250
8251/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8252///
8253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8254#[inline]
8255#[target_feature(enable = "avx512bw")]
8256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8257#[cfg_attr(test, assert_instr(vpunpckhwd))]
8258pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8259    unsafe {
8260        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8261        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8262    }
8263}
8264
8265/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8266///
8267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8268#[inline]
8269#[target_feature(enable = "avx512bw,avx512vl")]
8270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8271#[cfg_attr(test, assert_instr(vpunpckhwd))]
8272pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8273    unsafe {
8274        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8275        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8276    }
8277}
8278
8279/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8280///
8281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8282#[inline]
8283#[target_feature(enable = "avx512bw,avx512vl")]
8284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8285#[cfg_attr(test, assert_instr(vpunpckhwd))]
8286pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8287    unsafe {
8288        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8289        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8290    }
8291}
8292
8293/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
8296#[inline]
8297#[target_feature(enable = "avx512bw,avx512vl")]
8298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8299#[cfg_attr(test, assert_instr(vpunpckhwd))]
8300pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8301    unsafe {
8302        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8303        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
8304    }
8305}
8306
8307/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8308///
8309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
8310#[inline]
8311#[target_feature(enable = "avx512bw,avx512vl")]
8312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8313#[cfg_attr(test, assert_instr(vpunpckhwd))]
8314pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8315    unsafe {
8316        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
8317        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
8318    }
8319}
8320
8321/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8322///
8323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
8324#[inline]
8325#[target_feature(enable = "avx512bw")]
8326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8327#[cfg_attr(test, assert_instr(vpunpckhbw))]
8328pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
8329    unsafe {
8330        let a = a.as_i8x64();
8331        let b = b.as_i8x64();
8332        #[rustfmt::skip]
8333        let r: i8x64 = simd_shuffle!(
8334            a,
8335            b,
8336            [
8337                8, 64 + 8, 9, 64 + 9,
8338                10, 64 + 10, 11, 64 + 11,
8339                12, 64 + 12, 13, 64 + 13,
8340                14, 64 + 14, 15, 64 + 15,
8341                24, 64 + 24, 25, 64 + 25,
8342                26, 64 + 26, 27, 64 + 27,
8343                28, 64 + 28, 29, 64 + 29,
8344                30, 64 + 30, 31, 64 + 31,
8345                40, 64 + 40, 41, 64 + 41,
8346                42, 64 + 42, 43, 64 + 43,
8347                44, 64 + 44, 45, 64 + 45,
8348                46, 64 + 46, 47, 64 + 47,
8349                56, 64 + 56, 57, 64 + 57,
8350                58, 64 + 58, 59, 64 + 59,
8351                60, 64 + 60, 61, 64 + 61,
8352                62, 64 + 62, 63, 64 + 63,
8353            ],
8354        );
8355        transmute(r)
8356    }
8357}
8358
8359/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8360///
8361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
8362#[inline]
8363#[target_feature(enable = "avx512bw")]
8364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8365#[cfg_attr(test, assert_instr(vpunpckhbw))]
8366pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8367    unsafe {
8368        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8369        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
8370    }
8371}
8372
8373/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8374///
8375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
8376#[inline]
8377#[target_feature(enable = "avx512bw")]
8378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8379#[cfg_attr(test, assert_instr(vpunpckhbw))]
8380pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8381    unsafe {
8382        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8383        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
8384    }
8385}
8386
8387/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8388///
8389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
8390#[inline]
8391#[target_feature(enable = "avx512bw,avx512vl")]
8392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8393#[cfg_attr(test, assert_instr(vpunpckhbw))]
8394pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8395    unsafe {
8396        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8397        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
8398    }
8399}
8400
8401/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8402///
8403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
8404#[inline]
8405#[target_feature(enable = "avx512bw,avx512vl")]
8406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8407#[cfg_attr(test, assert_instr(vpunpckhbw))]
8408pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8409    unsafe {
8410        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8411        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
8412    }
8413}
8414
8415/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8416///
8417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
8418#[inline]
8419#[target_feature(enable = "avx512bw,avx512vl")]
8420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8421#[cfg_attr(test, assert_instr(vpunpckhbw))]
8422pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8423    unsafe {
8424        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8425        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
8426    }
8427}
8428
8429/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8430///
8431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
8432#[inline]
8433#[target_feature(enable = "avx512bw,avx512vl")]
8434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8435#[cfg_attr(test, assert_instr(vpunpckhbw))]
8436pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8437    unsafe {
8438        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8439        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
8440    }
8441}
8442
8443/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8444///
8445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
8446#[inline]
8447#[target_feature(enable = "avx512bw")]
8448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8449#[cfg_attr(test, assert_instr(vpunpcklwd))]
8450pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
8451    unsafe {
8452        let a = a.as_i16x32();
8453        let b = b.as_i16x32();
8454        #[rustfmt::skip]
8455        let r: i16x32 = simd_shuffle!(
8456            a,
8457            b,
8458            [
8459               0,  32+0,   1, 32+1,
8460               2,  32+2,   3, 32+3,
8461               8,  32+8,   9, 32+9,
8462               10, 32+10, 11, 32+11,
8463               16, 32+16, 17, 32+17,
8464               18, 32+18, 19, 32+19,
8465               24, 32+24, 25, 32+25,
8466               26, 32+26, 27, 32+27
8467            ],
8468        );
8469        transmute(r)
8470    }
8471}
8472
8473/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8474///
8475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
8476#[inline]
8477#[target_feature(enable = "avx512bw")]
8478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8479#[cfg_attr(test, assert_instr(vpunpcklwd))]
8480pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8481    unsafe {
8482        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8483        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
8484    }
8485}
8486
8487/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8488///
8489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
8490#[inline]
8491#[target_feature(enable = "avx512bw")]
8492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8493#[cfg_attr(test, assert_instr(vpunpcklwd))]
8494pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8495    unsafe {
8496        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8497        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
8498    }
8499}
8500
8501/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8502///
8503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
8504#[inline]
8505#[target_feature(enable = "avx512bw,avx512vl")]
8506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8507#[cfg_attr(test, assert_instr(vpunpcklwd))]
8508pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8509    unsafe {
8510        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8511        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
8512    }
8513}
8514
8515/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8516///
8517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
8518#[inline]
8519#[target_feature(enable = "avx512bw,avx512vl")]
8520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8521#[cfg_attr(test, assert_instr(vpunpcklwd))]
8522pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8523    unsafe {
8524        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8525        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
8526    }
8527}
8528
8529/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8530///
8531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
8532#[inline]
8533#[target_feature(enable = "avx512bw,avx512vl")]
8534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8535#[cfg_attr(test, assert_instr(vpunpcklwd))]
8536pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8537    unsafe {
8538        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8539        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
8540    }
8541}
8542
8543/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8544///
8545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
8546#[inline]
8547#[target_feature(enable = "avx512bw,avx512vl")]
8548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8549#[cfg_attr(test, assert_instr(vpunpcklwd))]
8550pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8551    unsafe {
8552        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8553        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
8554    }
8555}
8556
8557/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8558///
8559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
8560#[inline]
8561#[target_feature(enable = "avx512bw")]
8562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8563#[cfg_attr(test, assert_instr(vpunpcklbw))]
8564pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
8565    unsafe {
8566        let a = a.as_i8x64();
8567        let b = b.as_i8x64();
8568        #[rustfmt::skip]
8569        let r: i8x64 = simd_shuffle!(
8570            a,
8571            b,
8572            [
8573                0,  64+0,   1, 64+1,
8574                2,  64+2,   3, 64+3,
8575                4,  64+4,   5, 64+5,
8576                6,  64+6,   7, 64+7,
8577                16, 64+16, 17, 64+17,
8578                18, 64+18, 19, 64+19,
8579                20, 64+20, 21, 64+21,
8580                22, 64+22, 23, 64+23,
8581                32, 64+32, 33, 64+33,
8582                34, 64+34, 35, 64+35,
8583                36, 64+36, 37, 64+37,
8584                38, 64+38, 39, 64+39,
8585                48, 64+48, 49, 64+49,
8586                50, 64+50, 51, 64+51,
8587                52, 64+52, 53, 64+53,
8588                54, 64+54, 55, 64+55,
8589            ],
8590        );
8591        transmute(r)
8592    }
8593}
8594
8595/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8596///
8597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
8598#[inline]
8599#[target_feature(enable = "avx512bw")]
8600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8601#[cfg_attr(test, assert_instr(vpunpcklbw))]
8602pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8603    unsafe {
8604        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8605        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
8606    }
8607}
8608
8609/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8610///
8611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
8612#[inline]
8613#[target_feature(enable = "avx512bw")]
8614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8615#[cfg_attr(test, assert_instr(vpunpcklbw))]
8616pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8617    unsafe {
8618        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8619        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
8620    }
8621}
8622
8623/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8624///
8625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
8626#[inline]
8627#[target_feature(enable = "avx512bw,avx512vl")]
8628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8629#[cfg_attr(test, assert_instr(vpunpcklbw))]
8630pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8631    unsafe {
8632        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8633        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
8634    }
8635}
8636
8637/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8638///
8639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
8640#[inline]
8641#[target_feature(enable = "avx512bw,avx512vl")]
8642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8643#[cfg_attr(test, assert_instr(vpunpcklbw))]
8644pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8645    unsafe {
8646        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8647        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
8648    }
8649}
8650
8651/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8652///
8653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
8654#[inline]
8655#[target_feature(enable = "avx512bw,avx512vl")]
8656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8657#[cfg_attr(test, assert_instr(vpunpcklbw))]
8658pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8659    unsafe {
8660        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8661        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
8662    }
8663}
8664
8665/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8666///
8667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
8668#[inline]
8669#[target_feature(enable = "avx512bw,avx512vl")]
8670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8671#[cfg_attr(test, assert_instr(vpunpcklbw))]
8672pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8673    unsafe {
8674        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8675        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
8676    }
8677}
8678
8679/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8680///
8681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
8682#[inline]
8683#[target_feature(enable = "avx512bw")]
8684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8685#[cfg_attr(test, assert_instr(vmovdqu16))]
8686pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
8687    unsafe {
8688        let mov = a.as_i16x32();
8689        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
8690    }
8691}
8692
8693/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8694///
8695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
8696#[inline]
8697#[target_feature(enable = "avx512bw")]
8698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8699#[cfg_attr(test, assert_instr(vmovdqu16))]
8700pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
8701    unsafe {
8702        let mov = a.as_i16x32();
8703        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
8704    }
8705}
8706
8707/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8708///
8709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
8710#[inline]
8711#[target_feature(enable = "avx512bw,avx512vl")]
8712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8713#[cfg_attr(test, assert_instr(vmovdqu16))]
8714pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
8715    unsafe {
8716        let mov = a.as_i16x16();
8717        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
8718    }
8719}
8720
8721/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8722///
8723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
8724#[inline]
8725#[target_feature(enable = "avx512bw,avx512vl")]
8726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8727#[cfg_attr(test, assert_instr(vmovdqu16))]
8728pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
8729    unsafe {
8730        let mov = a.as_i16x16();
8731        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
8732    }
8733}
8734
8735/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8736///
8737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
8738#[inline]
8739#[target_feature(enable = "avx512bw,avx512vl")]
8740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8741#[cfg_attr(test, assert_instr(vmovdqu16))]
8742pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8743    unsafe {
8744        let mov = a.as_i16x8();
8745        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
8746    }
8747}
8748
8749/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8750///
8751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
8752#[inline]
8753#[target_feature(enable = "avx512bw,avx512vl")]
8754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8755#[cfg_attr(test, assert_instr(vmovdqu16))]
8756pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
8757    unsafe {
8758        let mov = a.as_i16x8();
8759        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
8760    }
8761}
8762
8763/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8764///
8765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
8766#[inline]
8767#[target_feature(enable = "avx512bw")]
8768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8769#[cfg_attr(test, assert_instr(vmovdqu8))]
8770pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
8771    unsafe {
8772        let mov = a.as_i8x64();
8773        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
8774    }
8775}
8776
8777/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8778///
8779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
8780#[inline]
8781#[target_feature(enable = "avx512bw")]
8782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8783#[cfg_attr(test, assert_instr(vmovdqu8))]
8784pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
8785    unsafe {
8786        let mov = a.as_i8x64();
8787        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
8788    }
8789}
8790
8791/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8792///
8793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
8794#[inline]
8795#[target_feature(enable = "avx512bw,avx512vl")]
8796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8797#[cfg_attr(test, assert_instr(vmovdqu8))]
8798pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
8799    unsafe {
8800        let mov = a.as_i8x32();
8801        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
8802    }
8803}
8804
8805/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8806///
8807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
8808#[inline]
8809#[target_feature(enable = "avx512bw,avx512vl")]
8810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8811#[cfg_attr(test, assert_instr(vmovdqu8))]
8812pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
8813    unsafe {
8814        let mov = a.as_i8x32();
8815        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
8816    }
8817}
8818
8819/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8820///
8821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
8822#[inline]
8823#[target_feature(enable = "avx512bw,avx512vl")]
8824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8825#[cfg_attr(test, assert_instr(vmovdqu8))]
8826pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8827    unsafe {
8828        let mov = a.as_i8x16();
8829        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
8830    }
8831}
8832
8833/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8834///
8835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
8836#[inline]
8837#[target_feature(enable = "avx512bw,avx512vl")]
8838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8839#[cfg_attr(test, assert_instr(vmovdqu8))]
8840pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
8841    unsafe {
8842        let mov = a.as_i8x16();
8843        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
8844    }
8845}
8846
8847/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8848///
8849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
8850#[inline]
8851#[target_feature(enable = "avx512bw")]
8852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8853#[cfg_attr(test, assert_instr(vpbroadcastw))]
8854pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
8855    unsafe {
8856        let r = _mm512_set1_epi16(a).as_i16x32();
8857        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
8858    }
8859}
8860
8861/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8862///
8863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
8864#[inline]
8865#[target_feature(enable = "avx512bw")]
8866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8867#[cfg_attr(test, assert_instr(vpbroadcastw))]
8868pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
8869    unsafe {
8870        let r = _mm512_set1_epi16(a).as_i16x32();
8871        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
8872    }
8873}
8874
8875/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8876///
8877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
8878#[inline]
8879#[target_feature(enable = "avx512bw,avx512vl")]
8880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8881#[cfg_attr(test, assert_instr(vpbroadcastw))]
8882pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
8883    unsafe {
8884        let r = _mm256_set1_epi16(a).as_i16x16();
8885        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8886    }
8887}
8888
8889/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8890///
8891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
8892#[inline]
8893#[target_feature(enable = "avx512bw,avx512vl")]
8894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8895#[cfg_attr(test, assert_instr(vpbroadcastw))]
8896pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
8897    unsafe {
8898        let r = _mm256_set1_epi16(a).as_i16x16();
8899        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8900    }
8901}
8902
8903/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8904///
8905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
8906#[inline]
8907#[target_feature(enable = "avx512bw,avx512vl")]
8908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8909#[cfg_attr(test, assert_instr(vpbroadcastw))]
8910pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
8911    unsafe {
8912        let r = _mm_set1_epi16(a).as_i16x8();
8913        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8914    }
8915}
8916
8917/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8918///
8919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
8920#[inline]
8921#[target_feature(enable = "avx512bw,avx512vl")]
8922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8923#[cfg_attr(test, assert_instr(vpbroadcastw))]
8924pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
8925    unsafe {
8926        let r = _mm_set1_epi16(a).as_i16x8();
8927        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8928    }
8929}
8930
8931/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8932///
8933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
8934#[inline]
8935#[target_feature(enable = "avx512bw")]
8936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8937#[cfg_attr(test, assert_instr(vpbroadcast))]
8938pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
8939    unsafe {
8940        let r = _mm512_set1_epi8(a).as_i8x64();
8941        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
8942    }
8943}
8944
8945/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8946///
8947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
8948#[inline]
8949#[target_feature(enable = "avx512bw")]
8950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8951#[cfg_attr(test, assert_instr(vpbroadcast))]
8952pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
8953    unsafe {
8954        let r = _mm512_set1_epi8(a).as_i8x64();
8955        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
8956    }
8957}
8958
8959/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8960///
8961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
8962#[inline]
8963#[target_feature(enable = "avx512bw,avx512vl")]
8964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8965#[cfg_attr(test, assert_instr(vpbroadcast))]
8966pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
8967    unsafe {
8968        let r = _mm256_set1_epi8(a).as_i8x32();
8969        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
8970    }
8971}
8972
8973/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8974///
8975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
8976#[inline]
8977#[target_feature(enable = "avx512bw,avx512vl")]
8978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8979#[cfg_attr(test, assert_instr(vpbroadcast))]
8980pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
8981    unsafe {
8982        let r = _mm256_set1_epi8(a).as_i8x32();
8983        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
8984    }
8985}
8986
8987/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8988///
8989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
8990#[inline]
8991#[target_feature(enable = "avx512bw,avx512vl")]
8992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8993#[cfg_attr(test, assert_instr(vpbroadcast))]
8994pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
8995    unsafe {
8996        let r = _mm_set1_epi8(a).as_i8x16();
8997        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
8998    }
8999}
9000
9001/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9002///
9003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9004#[inline]
9005#[target_feature(enable = "avx512bw,avx512vl")]
9006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9007#[cfg_attr(test, assert_instr(vpbroadcast))]
9008pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9009    unsafe {
9010        let r = _mm_set1_epi8(a).as_i8x16();
9011        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9012    }
9013}
9014
9015/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9016///
9017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9018#[inline]
9019#[target_feature(enable = "avx512bw")]
9020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9021#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9022#[rustc_legacy_const_generics(1)]
9023pub fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9024    unsafe {
9025        static_assert_uimm_bits!(IMM8, 8);
9026        let a = a.as_i16x32();
9027        let r: i16x32 = simd_shuffle!(
9028            a,
9029            a,
9030            [
9031                IMM8 as u32 & 0b11,
9032                (IMM8 as u32 >> 2) & 0b11,
9033                (IMM8 as u32 >> 4) & 0b11,
9034                (IMM8 as u32 >> 6) & 0b11,
9035                4,
9036                5,
9037                6,
9038                7,
9039                (IMM8 as u32 & 0b11) + 8,
9040                ((IMM8 as u32 >> 2) & 0b11) + 8,
9041                ((IMM8 as u32 >> 4) & 0b11) + 8,
9042                ((IMM8 as u32 >> 6) & 0b11) + 8,
9043                12,
9044                13,
9045                14,
9046                15,
9047                (IMM8 as u32 & 0b11) + 16,
9048                ((IMM8 as u32 >> 2) & 0b11) + 16,
9049                ((IMM8 as u32 >> 4) & 0b11) + 16,
9050                ((IMM8 as u32 >> 6) & 0b11) + 16,
9051                20,
9052                21,
9053                22,
9054                23,
9055                (IMM8 as u32 & 0b11) + 24,
9056                ((IMM8 as u32 >> 2) & 0b11) + 24,
9057                ((IMM8 as u32 >> 4) & 0b11) + 24,
9058                ((IMM8 as u32 >> 6) & 0b11) + 24,
9059                28,
9060                29,
9061                30,
9062                31,
9063            ],
9064        );
9065        transmute(r)
9066    }
9067}
9068
9069/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9070///
9071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9072#[inline]
9073#[target_feature(enable = "avx512bw")]
9074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9075#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9076#[rustc_legacy_const_generics(3)]
9077pub fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9078    src: __m512i,
9079    k: __mmask32,
9080    a: __m512i,
9081) -> __m512i {
9082    unsafe {
9083        static_assert_uimm_bits!(IMM8, 8);
9084        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9085        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9086    }
9087}
9088
9089/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9090///
9091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9092#[inline]
9093#[target_feature(enable = "avx512bw")]
9094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9095#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9096#[rustc_legacy_const_generics(2)]
9097pub fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9098    unsafe {
9099        static_assert_uimm_bits!(IMM8, 8);
9100        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9101        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9102    }
9103}
9104
9105/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9106///
9107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9108#[inline]
9109#[target_feature(enable = "avx512bw,avx512vl")]
9110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9111#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9112#[rustc_legacy_const_generics(3)]
9113pub fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9114    src: __m256i,
9115    k: __mmask16,
9116    a: __m256i,
9117) -> __m256i {
9118    unsafe {
9119        static_assert_uimm_bits!(IMM8, 8);
9120        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9121        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9122    }
9123}
9124
9125/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9128#[inline]
9129#[target_feature(enable = "avx512bw,avx512vl")]
9130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9131#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9132#[rustc_legacy_const_generics(2)]
9133pub fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9134    unsafe {
9135        static_assert_uimm_bits!(IMM8, 8);
9136        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9137        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9138    }
9139}
9140
9141/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9142///
9143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9144#[inline]
9145#[target_feature(enable = "avx512bw,avx512vl")]
9146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9147#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9148#[rustc_legacy_const_generics(3)]
9149pub fn _mm_mask_shufflelo_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9150    unsafe {
9151        static_assert_uimm_bits!(IMM8, 8);
9152        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9153        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9154    }
9155}
9156
9157/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9158///
9159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9160#[inline]
9161#[target_feature(enable = "avx512bw,avx512vl")]
9162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9163#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9164#[rustc_legacy_const_generics(2)]
9165pub fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9166    unsafe {
9167        static_assert_uimm_bits!(IMM8, 8);
9168        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9169        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9170    }
9171}
9172
9173/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9174///
9175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9176#[inline]
9177#[target_feature(enable = "avx512bw")]
9178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9179#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9180#[rustc_legacy_const_generics(1)]
9181pub fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9182    unsafe {
9183        static_assert_uimm_bits!(IMM8, 8);
9184        let a = a.as_i16x32();
9185        let r: i16x32 = simd_shuffle!(
9186            a,
9187            a,
9188            [
9189                0,
9190                1,
9191                2,
9192                3,
9193                (IMM8 as u32 & 0b11) + 4,
9194                ((IMM8 as u32 >> 2) & 0b11) + 4,
9195                ((IMM8 as u32 >> 4) & 0b11) + 4,
9196                ((IMM8 as u32 >> 6) & 0b11) + 4,
9197                8,
9198                9,
9199                10,
9200                11,
9201                (IMM8 as u32 & 0b11) + 12,
9202                ((IMM8 as u32 >> 2) & 0b11) + 12,
9203                ((IMM8 as u32 >> 4) & 0b11) + 12,
9204                ((IMM8 as u32 >> 6) & 0b11) + 12,
9205                16,
9206                17,
9207                18,
9208                19,
9209                (IMM8 as u32 & 0b11) + 20,
9210                ((IMM8 as u32 >> 2) & 0b11) + 20,
9211                ((IMM8 as u32 >> 4) & 0b11) + 20,
9212                ((IMM8 as u32 >> 6) & 0b11) + 20,
9213                24,
9214                25,
9215                26,
9216                27,
9217                (IMM8 as u32 & 0b11) + 28,
9218                ((IMM8 as u32 >> 2) & 0b11) + 28,
9219                ((IMM8 as u32 >> 4) & 0b11) + 28,
9220                ((IMM8 as u32 >> 6) & 0b11) + 28,
9221            ],
9222        );
9223        transmute(r)
9224    }
9225}
9226
9227/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9228///
9229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
9230#[inline]
9231#[target_feature(enable = "avx512bw")]
9232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9233#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9234#[rustc_legacy_const_generics(3)]
9235pub fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
9236    src: __m512i,
9237    k: __mmask32,
9238    a: __m512i,
9239) -> __m512i {
9240    unsafe {
9241        static_assert_uimm_bits!(IMM8, 8);
9242        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9243        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9244    }
9245}
9246
9247/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9248///
9249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
9250#[inline]
9251#[target_feature(enable = "avx512bw")]
9252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9253#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9254#[rustc_legacy_const_generics(2)]
9255pub fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9256    unsafe {
9257        static_assert_uimm_bits!(IMM8, 8);
9258        let r = _mm512_shufflehi_epi16::<IMM8>(a);
9259        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9260    }
9261}
9262
9263/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
9266#[inline]
9267#[target_feature(enable = "avx512bw,avx512vl")]
9268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9269#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9270#[rustc_legacy_const_generics(3)]
9271pub fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
9272    src: __m256i,
9273    k: __mmask16,
9274    a: __m256i,
9275) -> __m256i {
9276    unsafe {
9277        static_assert_uimm_bits!(IMM8, 8);
9278        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9279        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9280    }
9281}
9282
9283/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9284///
9285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
9286#[inline]
9287#[target_feature(enable = "avx512bw,avx512vl")]
9288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9289#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9290#[rustc_legacy_const_generics(2)]
9291pub fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9292    unsafe {
9293        static_assert_uimm_bits!(IMM8, 8);
9294        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
9295        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9296    }
9297}
9298
9299/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9300///
9301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
9302#[inline]
9303#[target_feature(enable = "avx512bw,avx512vl")]
9304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9305#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9306#[rustc_legacy_const_generics(3)]
9307pub fn _mm_mask_shufflehi_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9308    unsafe {
9309        static_assert_uimm_bits!(IMM8, 8);
9310        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9311        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9312    }
9313}
9314
9315/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9316///
9317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
9318#[inline]
9319#[target_feature(enable = "avx512bw,avx512vl")]
9320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9321#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
9322#[rustc_legacy_const_generics(2)]
9323pub fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9324    unsafe {
9325        static_assert_uimm_bits!(IMM8, 8);
9326        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
9327        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9328    }
9329}
9330
9331/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
9332///
9333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
9334#[inline]
9335#[target_feature(enable = "avx512bw")]
9336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9337#[cfg_attr(test, assert_instr(vpshufb))]
9338pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
9339    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
9340}
9341
9342/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9343///
9344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
9345#[inline]
9346#[target_feature(enable = "avx512bw")]
9347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9348#[cfg_attr(test, assert_instr(vpshufb))]
9349pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9350    unsafe {
9351        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9352        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
9353    }
9354}
9355
9356/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9357///
9358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
9359#[inline]
9360#[target_feature(enable = "avx512bw")]
9361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9362#[cfg_attr(test, assert_instr(vpshufb))]
9363pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9364    unsafe {
9365        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
9366        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
9367    }
9368}
9369
9370/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9371///
9372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
9373#[inline]
9374#[target_feature(enable = "avx512bw,avx512vl")]
9375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9376#[cfg_attr(test, assert_instr(vpshufb))]
9377pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9378    unsafe {
9379        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9380        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
9381    }
9382}
9383
9384/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9385///
9386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
9387#[inline]
9388#[target_feature(enable = "avx512bw,avx512vl")]
9389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9390#[cfg_attr(test, assert_instr(vpshufb))]
9391pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9392    unsafe {
9393        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
9394        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
9395    }
9396}
9397
9398/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9399///
9400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
9401#[inline]
9402#[target_feature(enable = "avx512bw,avx512vl")]
9403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9404#[cfg_attr(test, assert_instr(vpshufb))]
9405pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9406    unsafe {
9407        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9408        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
9409    }
9410}
9411
9412/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9413///
9414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
9415#[inline]
9416#[target_feature(enable = "avx512bw,avx512vl")]
9417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9418#[cfg_attr(test, assert_instr(vpshufb))]
9419pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9420    unsafe {
9421        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
9422        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
9423    }
9424}
9425
9426/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9427///
9428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
9429#[inline]
9430#[target_feature(enable = "avx512bw")]
9431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9432#[cfg_attr(test, assert_instr(vptestmw))]
9433pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9434    let and = _mm512_and_si512(a, b);
9435    let zero = _mm512_setzero_si512();
9436    _mm512_cmpneq_epi16_mask(and, zero)
9437}
9438
9439/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9440///
9441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
9442#[inline]
9443#[target_feature(enable = "avx512bw")]
9444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9445#[cfg_attr(test, assert_instr(vptestmw))]
9446pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9447    let and = _mm512_and_si512(a, b);
9448    let zero = _mm512_setzero_si512();
9449    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
9450}
9451
9452/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9453///
9454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
9455#[inline]
9456#[target_feature(enable = "avx512bw,avx512vl")]
9457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9458#[cfg_attr(test, assert_instr(vptestmw))]
9459pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9460    let and = _mm256_and_si256(a, b);
9461    let zero = _mm256_setzero_si256();
9462    _mm256_cmpneq_epi16_mask(and, zero)
9463}
9464
9465/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9466///
9467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
9468#[inline]
9469#[target_feature(enable = "avx512bw,avx512vl")]
9470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9471#[cfg_attr(test, assert_instr(vptestmw))]
9472pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9473    let and = _mm256_and_si256(a, b);
9474    let zero = _mm256_setzero_si256();
9475    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
9476}
9477
9478/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9479///
9480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
9481#[inline]
9482#[target_feature(enable = "avx512bw,avx512vl")]
9483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9484#[cfg_attr(test, assert_instr(vptestmw))]
9485pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9486    let and = _mm_and_si128(a, b);
9487    let zero = _mm_setzero_si128();
9488    _mm_cmpneq_epi16_mask(and, zero)
9489}
9490
9491/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9492///
9493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
9494#[inline]
9495#[target_feature(enable = "avx512bw,avx512vl")]
9496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9497#[cfg_attr(test, assert_instr(vptestmw))]
9498pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9499    let and = _mm_and_si128(a, b);
9500    let zero = _mm_setzero_si128();
9501    _mm_mask_cmpneq_epi16_mask(k, and, zero)
9502}
9503
9504/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9505///
9506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
9507#[inline]
9508#[target_feature(enable = "avx512bw")]
9509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9510#[cfg_attr(test, assert_instr(vptestmb))]
9511pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9512    let and = _mm512_and_si512(a, b);
9513    let zero = _mm512_setzero_si512();
9514    _mm512_cmpneq_epi8_mask(and, zero)
9515}
9516
9517/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9518///
9519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
9520#[inline]
9521#[target_feature(enable = "avx512bw")]
9522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9523#[cfg_attr(test, assert_instr(vptestmb))]
9524pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9525    let and = _mm512_and_si512(a, b);
9526    let zero = _mm512_setzero_si512();
9527    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
9528}
9529
9530/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9531///
9532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
9533#[inline]
9534#[target_feature(enable = "avx512bw,avx512vl")]
9535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9536#[cfg_attr(test, assert_instr(vptestmb))]
9537pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9538    let and = _mm256_and_si256(a, b);
9539    let zero = _mm256_setzero_si256();
9540    _mm256_cmpneq_epi8_mask(and, zero)
9541}
9542
9543/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9544///
9545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
9546#[inline]
9547#[target_feature(enable = "avx512bw,avx512vl")]
9548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9549#[cfg_attr(test, assert_instr(vptestmb))]
9550pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9551    let and = _mm256_and_si256(a, b);
9552    let zero = _mm256_setzero_si256();
9553    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
9554}
9555
9556/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9557///
9558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
9559#[inline]
9560#[target_feature(enable = "avx512bw,avx512vl")]
9561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9562#[cfg_attr(test, assert_instr(vptestmb))]
9563pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9564    let and = _mm_and_si128(a, b);
9565    let zero = _mm_setzero_si128();
9566    _mm_cmpneq_epi8_mask(and, zero)
9567}
9568
9569/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9570///
9571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
9572#[inline]
9573#[target_feature(enable = "avx512bw,avx512vl")]
9574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9575#[cfg_attr(test, assert_instr(vptestmb))]
9576pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9577    let and = _mm_and_si128(a, b);
9578    let zero = _mm_setzero_si128();
9579    _mm_mask_cmpneq_epi8_mask(k, and, zero)
9580}
9581
9582/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9583///
9584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
9585#[inline]
9586#[target_feature(enable = "avx512bw")]
9587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9588#[cfg_attr(test, assert_instr(vptestnmw))]
9589pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9590    let and = _mm512_and_si512(a, b);
9591    let zero = _mm512_setzero_si512();
9592    _mm512_cmpeq_epi16_mask(and, zero)
9593}
9594
9595/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9596///
9597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
9598#[inline]
9599#[target_feature(enable = "avx512bw")]
9600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9601#[cfg_attr(test, assert_instr(vptestnmw))]
9602pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9603    let and = _mm512_and_si512(a, b);
9604    let zero = _mm512_setzero_si512();
9605    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
9606}
9607
9608/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9609///
9610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
9611#[inline]
9612#[target_feature(enable = "avx512bw,avx512vl")]
9613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9614#[cfg_attr(test, assert_instr(vptestnmw))]
9615pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9616    let and = _mm256_and_si256(a, b);
9617    let zero = _mm256_setzero_si256();
9618    _mm256_cmpeq_epi16_mask(and, zero)
9619}
9620
9621/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9622///
9623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
9624#[inline]
9625#[target_feature(enable = "avx512bw,avx512vl")]
9626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9627#[cfg_attr(test, assert_instr(vptestnmw))]
9628pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9629    let and = _mm256_and_si256(a, b);
9630    let zero = _mm256_setzero_si256();
9631    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
9632}
9633
9634/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9635///
9636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
9637#[inline]
9638#[target_feature(enable = "avx512bw,avx512vl")]
9639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9640#[cfg_attr(test, assert_instr(vptestnmw))]
9641pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9642    let and = _mm_and_si128(a, b);
9643    let zero = _mm_setzero_si128();
9644    _mm_cmpeq_epi16_mask(and, zero)
9645}
9646
9647/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9648///
9649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
9650#[inline]
9651#[target_feature(enable = "avx512bw,avx512vl")]
9652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9653#[cfg_attr(test, assert_instr(vptestnmw))]
9654pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9655    let and = _mm_and_si128(a, b);
9656    let zero = _mm_setzero_si128();
9657    _mm_mask_cmpeq_epi16_mask(k, and, zero)
9658}
9659
9660/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9661///
9662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
9663#[inline]
9664#[target_feature(enable = "avx512bw")]
9665#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9666#[cfg_attr(test, assert_instr(vptestnmb))]
9667pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9668    let and = _mm512_and_si512(a, b);
9669    let zero = _mm512_setzero_si512();
9670    _mm512_cmpeq_epi8_mask(and, zero)
9671}
9672
9673/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9674///
9675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
9676#[inline]
9677#[target_feature(enable = "avx512bw")]
9678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9679#[cfg_attr(test, assert_instr(vptestnmb))]
9680pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9681    let and = _mm512_and_si512(a, b);
9682    let zero = _mm512_setzero_si512();
9683    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
9684}
9685
9686/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9687///
9688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
9689#[inline]
9690#[target_feature(enable = "avx512bw,avx512vl")]
9691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9692#[cfg_attr(test, assert_instr(vptestnmb))]
9693pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9694    let and = _mm256_and_si256(a, b);
9695    let zero = _mm256_setzero_si256();
9696    _mm256_cmpeq_epi8_mask(and, zero)
9697}
9698
9699/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9700///
9701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
9702#[inline]
9703#[target_feature(enable = "avx512bw,avx512vl")]
9704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9705#[cfg_attr(test, assert_instr(vptestnmb))]
9706pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9707    let and = _mm256_and_si256(a, b);
9708    let zero = _mm256_setzero_si256();
9709    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
9710}
9711
9712/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9713///
9714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
9715#[inline]
9716#[target_feature(enable = "avx512bw,avx512vl")]
9717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9718#[cfg_attr(test, assert_instr(vptestnmb))]
9719pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9720    let and = _mm_and_si128(a, b);
9721    let zero = _mm_setzero_si128();
9722    _mm_cmpeq_epi8_mask(and, zero)
9723}
9724
9725/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9726///
9727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
9728#[inline]
9729#[target_feature(enable = "avx512bw,avx512vl")]
9730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9731#[cfg_attr(test, assert_instr(vptestnmb))]
9732pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9733    let and = _mm_and_si128(a, b);
9734    let zero = _mm_setzero_si128();
9735    _mm_mask_cmpeq_epi8_mask(k, and, zero)
9736}
9737
9738/// Store 64-bit mask from a into memory.
9739///
9740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
9741#[inline]
9742#[target_feature(enable = "avx512bw")]
9743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9744#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9745pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
9746    ptr::write(mem_addr as *mut __mmask64, a);
9747}
9748
9749/// Store 32-bit mask from a into memory.
9750///
9751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
9752#[inline]
9753#[target_feature(enable = "avx512bw")]
9754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9755#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9756pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
9757    ptr::write(mem_addr as *mut __mmask32, a);
9758}
9759
9760/// Load 64-bit mask from memory into k.
9761///
9762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
9763#[inline]
9764#[target_feature(enable = "avx512bw")]
9765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9766#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9767pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
9768    ptr::read(mem_addr as *const __mmask64)
9769}
9770
9771/// Load 32-bit mask from memory into k.
9772///
9773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
9774#[inline]
9775#[target_feature(enable = "avx512bw")]
9776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9777#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9778pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
9779    ptr::read(mem_addr as *const __mmask32)
9780}
9781
9782/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
9783///
9784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
9785#[inline]
9786#[target_feature(enable = "avx512bw")]
9787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9788#[cfg_attr(test, assert_instr(vpsadbw))]
9789pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
9790    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
9791}
9792
9793/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9794///
9795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
9796#[inline]
9797#[target_feature(enable = "avx512bw")]
9798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9799#[rustc_legacy_const_generics(2)]
9800#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9801pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
9802    unsafe {
9803        static_assert_uimm_bits!(IMM8, 8);
9804        let a = a.as_u8x64();
9805        let b = b.as_u8x64();
9806        let r = vdbpsadbw(a, b, IMM8);
9807        transmute(r)
9808    }
9809}
9810
9811/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9812///
9813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
9814#[inline]
9815#[target_feature(enable = "avx512bw")]
9816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9817#[rustc_legacy_const_generics(4)]
9818#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9819pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
9820    src: __m512i,
9821    k: __mmask32,
9822    a: __m512i,
9823    b: __m512i,
9824) -> __m512i {
9825    unsafe {
9826        static_assert_uimm_bits!(IMM8, 8);
9827        let a = a.as_u8x64();
9828        let b = b.as_u8x64();
9829        let r = vdbpsadbw(a, b, IMM8);
9830        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
9831    }
9832}
9833
9834/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9835///
9836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
9837#[inline]
9838#[target_feature(enable = "avx512bw")]
9839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9840#[rustc_legacy_const_generics(3)]
9841#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9842pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9843    unsafe {
9844        static_assert_uimm_bits!(IMM8, 8);
9845        let a = a.as_u8x64();
9846        let b = b.as_u8x64();
9847        let r = vdbpsadbw(a, b, IMM8);
9848        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
9849    }
9850}
9851
9852/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9853///
9854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
9855#[inline]
9856#[target_feature(enable = "avx512bw,avx512vl")]
9857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9858#[rustc_legacy_const_generics(2)]
9859#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9860pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
9861    unsafe {
9862        static_assert_uimm_bits!(IMM8, 8);
9863        let a = a.as_u8x32();
9864        let b = b.as_u8x32();
9865        let r = vdbpsadbw256(a, b, IMM8);
9866        transmute(r)
9867    }
9868}
9869
9870/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9871///
9872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
9873#[inline]
9874#[target_feature(enable = "avx512bw,avx512vl")]
9875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9876#[rustc_legacy_const_generics(4)]
9877#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9878pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
9879    src: __m256i,
9880    k: __mmask16,
9881    a: __m256i,
9882    b: __m256i,
9883) -> __m256i {
9884    unsafe {
9885        static_assert_uimm_bits!(IMM8, 8);
9886        let a = a.as_u8x32();
9887        let b = b.as_u8x32();
9888        let r = vdbpsadbw256(a, b, IMM8);
9889        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
9890    }
9891}
9892
9893/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9894///
9895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
9896#[inline]
9897#[target_feature(enable = "avx512bw,avx512vl")]
9898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9899#[rustc_legacy_const_generics(3)]
9900#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9901pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9902    unsafe {
9903        static_assert_uimm_bits!(IMM8, 8);
9904        let a = a.as_u8x32();
9905        let b = b.as_u8x32();
9906        let r = vdbpsadbw256(a, b, IMM8);
9907        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
9908    }
9909}
9910
9911/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9912///
9913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
9914#[inline]
9915#[target_feature(enable = "avx512bw,avx512vl")]
9916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9917#[rustc_legacy_const_generics(2)]
9918#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9919pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
9920    unsafe {
9921        static_assert_uimm_bits!(IMM8, 8);
9922        let a = a.as_u8x16();
9923        let b = b.as_u8x16();
9924        let r = vdbpsadbw128(a, b, IMM8);
9925        transmute(r)
9926    }
9927}
9928
9929/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9930///
9931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
9932#[inline]
9933#[target_feature(enable = "avx512bw,avx512vl")]
9934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9935#[rustc_legacy_const_generics(4)]
9936#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9937pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
9938    src: __m128i,
9939    k: __mmask8,
9940    a: __m128i,
9941    b: __m128i,
9942) -> __m128i {
9943    unsafe {
9944        static_assert_uimm_bits!(IMM8, 8);
9945        let a = a.as_u8x16();
9946        let b = b.as_u8x16();
9947        let r = vdbpsadbw128(a, b, IMM8);
9948        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
9949    }
9950}
9951
9952/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9953///
9954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
9955#[inline]
9956#[target_feature(enable = "avx512bw,avx512vl")]
9957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9958#[rustc_legacy_const_generics(3)]
9959#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9960pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9961    unsafe {
9962        static_assert_uimm_bits!(IMM8, 8);
9963        let a = a.as_u8x16();
9964        let b = b.as_u8x16();
9965        let r = vdbpsadbw128(a, b, IMM8);
9966        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
9967    }
9968}
9969
9970/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9971///
9972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
9973#[inline]
9974#[target_feature(enable = "avx512bw")]
9975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9976#[cfg_attr(test, assert_instr(vpmovw2m))]
9977pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
9978    let filter = _mm512_set1_epi16(1 << 15);
9979    let a = _mm512_and_si512(a, filter);
9980    _mm512_cmpeq_epi16_mask(a, filter)
9981}
9982
9983/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9984///
9985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
9986#[inline]
9987#[target_feature(enable = "avx512bw,avx512vl")]
9988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9989#[cfg_attr(test, assert_instr(vpmovw2m))]
9990pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
9991    let filter = _mm256_set1_epi16(1 << 15);
9992    let a = _mm256_and_si256(a, filter);
9993    _mm256_cmpeq_epi16_mask(a, filter)
9994}
9995
9996/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9997///
9998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
9999#[inline]
10000#[target_feature(enable = "avx512bw,avx512vl")]
10001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10002#[cfg_attr(test, assert_instr(vpmovw2m))]
10003pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10004    let filter = _mm_set1_epi16(1 << 15);
10005    let a = _mm_and_si128(a, filter);
10006    _mm_cmpeq_epi16_mask(a, filter)
10007}
10008
10009/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10010///
10011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10012#[inline]
10013#[target_feature(enable = "avx512bw")]
10014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10015#[cfg_attr(test, assert_instr(vpmovb2m))]
10016pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10017    let filter = _mm512_set1_epi8(1 << 7);
10018    let a = _mm512_and_si512(a, filter);
10019    _mm512_cmpeq_epi8_mask(a, filter)
10020}
10021
10022/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10025#[inline]
10026#[target_feature(enable = "avx512bw,avx512vl")]
10027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10028#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10029// using vpmovb2m plus converting the mask register to a standard register.
10030pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10031    let filter = _mm256_set1_epi8(1 << 7);
10032    let a = _mm256_and_si256(a, filter);
10033    _mm256_cmpeq_epi8_mask(a, filter)
10034}
10035
10036/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10037///
10038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10039#[inline]
10040#[target_feature(enable = "avx512bw,avx512vl")]
10041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10042#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10043// using vpmovb2m plus converting the mask register to a standard register.
10044pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10045    let filter = _mm_set1_epi8(1 << 7);
10046    let a = _mm_and_si128(a, filter);
10047    _mm_cmpeq_epi8_mask(a, filter)
10048}
10049
10050/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10051///
10052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10053#[inline]
10054#[target_feature(enable = "avx512bw")]
10055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10056#[cfg_attr(test, assert_instr(vpmovm2w))]
10057pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10058    unsafe {
10059        let one = _mm512_set1_epi16(
10060            1 << 15
10061                | 1 << 14
10062                | 1 << 13
10063                | 1 << 12
10064                | 1 << 11
10065                | 1 << 10
10066                | 1 << 9
10067                | 1 << 8
10068                | 1 << 7
10069                | 1 << 6
10070                | 1 << 5
10071                | 1 << 4
10072                | 1 << 3
10073                | 1 << 2
10074                | 1 << 1
10075                | 1 << 0,
10076        )
10077        .as_i16x32();
10078        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10079    }
10080}
10081
10082/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10083///
10084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10085#[inline]
10086#[target_feature(enable = "avx512bw,avx512vl")]
10087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10088#[cfg_attr(test, assert_instr(vpmovm2w))]
10089pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10090    unsafe {
10091        let one = _mm256_set1_epi16(
10092            1 << 15
10093                | 1 << 14
10094                | 1 << 13
10095                | 1 << 12
10096                | 1 << 11
10097                | 1 << 10
10098                | 1 << 9
10099                | 1 << 8
10100                | 1 << 7
10101                | 1 << 6
10102                | 1 << 5
10103                | 1 << 4
10104                | 1 << 3
10105                | 1 << 2
10106                | 1 << 1
10107                | 1 << 0,
10108        )
10109        .as_i16x16();
10110        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10111    }
10112}
10113
10114/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10115///
10116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10117#[inline]
10118#[target_feature(enable = "avx512bw,avx512vl")]
10119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10120#[cfg_attr(test, assert_instr(vpmovm2w))]
10121pub fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10122    unsafe {
10123        let one = _mm_set1_epi16(
10124            1 << 15
10125                | 1 << 14
10126                | 1 << 13
10127                | 1 << 12
10128                | 1 << 11
10129                | 1 << 10
10130                | 1 << 9
10131                | 1 << 8
10132                | 1 << 7
10133                | 1 << 6
10134                | 1 << 5
10135                | 1 << 4
10136                | 1 << 3
10137                | 1 << 2
10138                | 1 << 1
10139                | 1 << 0,
10140        )
10141        .as_i16x8();
10142        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10143    }
10144}
10145
10146/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10147///
10148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10149#[inline]
10150#[target_feature(enable = "avx512bw")]
10151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10152#[cfg_attr(test, assert_instr(vpmovm2b))]
10153pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10154    unsafe {
10155        let one =
10156            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10157                .as_i8x64();
10158        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
10159    }
10160}
10161
10162/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10163///
10164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
10165#[inline]
10166#[target_feature(enable = "avx512bw,avx512vl")]
10167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10168#[cfg_attr(test, assert_instr(vpmovm2b))]
10169pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
10170    unsafe {
10171        let one =
10172            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10173                .as_i8x32();
10174        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
10175    }
10176}
10177
10178/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10179///
10180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
10181#[inline]
10182#[target_feature(enable = "avx512bw,avx512vl")]
10183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10184#[cfg_attr(test, assert_instr(vpmovm2b))]
10185pub fn _mm_movm_epi8(k: __mmask16) -> __m128i {
10186    unsafe {
10187        let one =
10188            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
10189                .as_i8x16();
10190        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
10191    }
10192}
10193
10194/// Convert 32-bit mask a into an integer value, and store the result in dst.
10195///
10196/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
10197#[inline]
10198#[target_feature(enable = "avx512bw")]
10199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10200pub fn _cvtmask32_u32(a: __mmask32) -> u32 {
10201    a
10202}
10203
10204/// Convert integer value a into an 32-bit mask, and store the result in k.
10205///
10206/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
10207#[inline]
10208#[target_feature(enable = "avx512bw")]
10209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10210pub fn _cvtu32_mask32(a: u32) -> __mmask32 {
10211    a
10212}
10213
10214/// Add 32-bit masks in a and b, and store the result in k.
10215///
10216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
10217#[inline]
10218#[target_feature(enable = "avx512bw")]
10219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10220pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10221    a + b
10222}
10223
10224/// Add 64-bit masks in a and b, and store the result in k.
10225///
10226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
10227#[inline]
10228#[target_feature(enable = "avx512bw")]
10229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10230pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10231    a + b
10232}
10233
10234/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
10235///
10236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
10237#[inline]
10238#[target_feature(enable = "avx512bw")]
10239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10240pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10241    a & b
10242}
10243
10244/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
10245///
10246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
10247#[inline]
10248#[target_feature(enable = "avx512bw")]
10249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10250pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10251    a & b
10252}
10253
10254/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
10255///
10256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
10257#[inline]
10258#[target_feature(enable = "avx512bw")]
10259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10260pub fn _knot_mask32(a: __mmask32) -> __mmask32 {
10261    !a
10262}
10263
10264/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
10265///
10266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
10267#[inline]
10268#[target_feature(enable = "avx512bw")]
10269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10270pub fn _knot_mask64(a: __mmask64) -> __mmask64 {
10271    !a
10272}
10273
10274/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
10275///
10276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
10277#[inline]
10278#[target_feature(enable = "avx512bw")]
10279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10280pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10281    _knot_mask32(a) & b
10282}
10283
10284/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
10285///
10286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
10287#[inline]
10288#[target_feature(enable = "avx512bw")]
10289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10290pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10291    _knot_mask64(a) & b
10292}
10293
10294/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
10295///
10296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
10297#[inline]
10298#[target_feature(enable = "avx512bw")]
10299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10300pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10301    a | b
10302}
10303
10304/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
10305///
10306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
10307#[inline]
10308#[target_feature(enable = "avx512bw")]
10309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10310pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10311    a | b
10312}
10313
10314/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
10315///
10316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
10317#[inline]
10318#[target_feature(enable = "avx512bw")]
10319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10320pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10321    a ^ b
10322}
10323
10324/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
10325///
10326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
10327#[inline]
10328#[target_feature(enable = "avx512bw")]
10329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10330pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10331    a ^ b
10332}
10333
10334/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
10335///
10336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
10337#[inline]
10338#[target_feature(enable = "avx512bw")]
10339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10340pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
10341    _knot_mask32(a ^ b)
10342}
10343
10344/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
10345///
10346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
10347#[inline]
10348#[target_feature(enable = "avx512bw")]
10349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10350pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
10351    _knot_mask64(a ^ b)
10352}
10353
10354/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10355/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10356///
10357/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
10358#[inline]
10359#[target_feature(enable = "avx512bw")]
10360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10361pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
10362    let tmp = _kor_mask32(a, b);
10363    *all_ones = (tmp == 0xffffffff) as u8;
10364    (tmp == 0) as u8
10365}
10366
10367/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10368/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
10369///
10370/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
10371#[inline]
10372#[target_feature(enable = "avx512bw")]
10373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10374pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
10375    let tmp = _kor_mask64(a, b);
10376    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
10377    (tmp == 0) as u8
10378}
10379
10380/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10381/// store 0 in dst.
10382///
10383/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
10384#[inline]
10385#[target_feature(enable = "avx512bw")]
10386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10387pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10388    (_kor_mask32(a, b) == 0xffffffff) as u8
10389}
10390
10391/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
10392/// store 0 in dst.
10393///
10394/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
10395#[inline]
10396#[target_feature(enable = "avx512bw")]
10397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10398pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10399    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
10400}
10401
10402/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10403/// store 0 in dst.
10404///
10405/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
10406#[inline]
10407#[target_feature(enable = "avx512bw")]
10408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10409pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10410    (_kor_mask32(a, b) == 0) as u8
10411}
10412
10413/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
10414/// store 0 in dst.
10415///
10416/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
10417#[inline]
10418#[target_feature(enable = "avx512bw")]
10419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10420pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10421    (_kor_mask64(a, b) == 0) as u8
10422}
10423
10424/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10425///
10426/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
10427#[inline]
10428#[target_feature(enable = "avx512bw")]
10429#[rustc_legacy_const_generics(1)]
10430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10431pub fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10432    a << COUNT
10433}
10434
10435/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10436///
10437/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
10438#[inline]
10439#[target_feature(enable = "avx512bw")]
10440#[rustc_legacy_const_generics(1)]
10441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10442pub fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10443    a << COUNT
10444}
10445
10446/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10447///
10448/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
10449#[inline]
10450#[target_feature(enable = "avx512bw")]
10451#[rustc_legacy_const_generics(1)]
10452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10453pub fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
10454    a >> COUNT
10455}
10456
10457/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
10458///
10459/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
10460#[inline]
10461#[target_feature(enable = "avx512bw")]
10462#[rustc_legacy_const_generics(1)]
10463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10464pub fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
10465    a >> COUNT
10466}
10467
10468/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
10469/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10470/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10471///
10472/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
10473#[inline]
10474#[target_feature(enable = "avx512bw")]
10475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10476pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
10477    *and_not = (_kandn_mask32(a, b) == 0) as u8;
10478    (_kand_mask32(a, b) == 0) as u8
10479}
10480
10481/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
10482/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10483/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10484///
10485/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
10486#[inline]
10487#[target_feature(enable = "avx512bw")]
10488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10489pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
10490    *and_not = (_kandn_mask64(a, b) == 0) as u8;
10491    (_kand_mask64(a, b) == 0) as u8
10492}
10493
10494/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
10495/// zeros, store 1 in dst, otherwise store 0 in dst.
10496///
10497/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
10498#[inline]
10499#[target_feature(enable = "avx512bw")]
10500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10501pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10502    (_kandn_mask32(a, b) == 0) as u8
10503}
10504
10505/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
10506/// zeros, store 1 in dst, otherwise store 0 in dst.
10507///
10508/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
10509#[inline]
10510#[target_feature(enable = "avx512bw")]
10511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10512pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10513    (_kandn_mask64(a, b) == 0) as u8
10514}
10515
10516/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10517/// store 0 in dst.
10518///
10519/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
10520#[inline]
10521#[target_feature(enable = "avx512bw")]
10522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10523pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10524    (_kand_mask32(a, b) == 0) as u8
10525}
10526
10527/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10528/// store 0 in dst.
10529///
10530/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
10531#[inline]
10532#[target_feature(enable = "avx512bw")]
10533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10534pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10535    (_kand_mask64(a, b) == 0) as u8
10536}
10537
10538/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
10539///
10540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
10541#[inline]
10542#[target_feature(enable = "avx512bw")]
10543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10544#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
10545pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
10546    ((a & 0xffff) << 16) | (b & 0xffff)
10547}
10548
10549/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
10550///
10551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
10552#[inline]
10553#[target_feature(enable = "avx512bw")]
10554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10555#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
10556pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
10557    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
10558}
10559
10560/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10561///
10562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
10563#[inline]
10564#[target_feature(enable = "avx512bw")]
10565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10566#[cfg_attr(test, assert_instr(vpmovwb))]
10567pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
10568    unsafe {
10569        let a = a.as_i16x32();
10570        transmute::<i8x32, _>(simd_cast(a))
10571    }
10572}
10573
10574/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10575///
10576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
10577#[inline]
10578#[target_feature(enable = "avx512bw")]
10579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10580#[cfg_attr(test, assert_instr(vpmovwb))]
10581pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10582    unsafe {
10583        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10584        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
10585    }
10586}
10587
10588/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10589///
10590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
10591#[inline]
10592#[target_feature(enable = "avx512bw")]
10593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10594#[cfg_attr(test, assert_instr(vpmovwb))]
10595pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10596    unsafe {
10597        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10598        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
10599    }
10600}
10601
10602/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10603///
10604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
10605#[inline]
10606#[target_feature(enable = "avx512bw,avx512vl")]
10607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10608#[cfg_attr(test, assert_instr(vpmovwb))]
10609pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
10610    unsafe {
10611        let a = a.as_i16x16();
10612        transmute::<i8x16, _>(simd_cast(a))
10613    }
10614}
10615
10616/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10617///
10618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
10619#[inline]
10620#[target_feature(enable = "avx512bw,avx512vl")]
10621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10622#[cfg_attr(test, assert_instr(vpmovwb))]
10623pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10624    unsafe {
10625        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10626        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10627    }
10628}
10629
10630/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10631///
10632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
10633#[inline]
10634#[target_feature(enable = "avx512bw,avx512vl")]
10635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10636#[cfg_attr(test, assert_instr(vpmovwb))]
10637pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10638    unsafe {
10639        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10640        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10641    }
10642}
10643
10644/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10645///
10646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
10647#[inline]
10648#[target_feature(enable = "avx512bw,avx512vl")]
10649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10650#[cfg_attr(test, assert_instr(vpmovwb))]
10651pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
10652    unsafe {
10653        let a = a.as_i16x8();
10654        let v256: i16x16 = simd_shuffle!(
10655            a,
10656            i16x8::ZERO,
10657            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
10658        );
10659        transmute::<i8x16, _>(simd_cast(v256))
10660    }
10661}
10662
10663/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10664///
10665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
10666#[inline]
10667#[target_feature(enable = "avx512bw,avx512vl")]
10668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10669#[cfg_attr(test, assert_instr(vpmovwb))]
10670pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10671    unsafe {
10672        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10673        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10674        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10675    }
10676}
10677
10678/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10679///
10680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
10681#[inline]
10682#[target_feature(enable = "avx512bw,avx512vl")]
10683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10684#[cfg_attr(test, assert_instr(vpmovwb))]
10685pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10686    unsafe {
10687        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10688        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10689        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10690    }
10691}
10692
10693/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10694///
10695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
10696#[inline]
10697#[target_feature(enable = "avx512bw")]
10698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10699#[cfg_attr(test, assert_instr(vpmovswb))]
10700pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
10701    unsafe {
10702        transmute(vpmovswb(
10703            a.as_i16x32(),
10704            i8x32::ZERO,
10705            0b11111111_11111111_11111111_11111111,
10706        ))
10707    }
10708}
10709
10710/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10711///
10712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
10713#[inline]
10714#[target_feature(enable = "avx512bw")]
10715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10716#[cfg_attr(test, assert_instr(vpmovswb))]
10717pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10718    unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
10719}
10720
10721/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10722///
10723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
10724#[inline]
10725#[target_feature(enable = "avx512bw")]
10726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10727#[cfg_attr(test, assert_instr(vpmovswb))]
10728pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10729    unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) }
10730}
10731
10732/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10733///
10734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
10735#[inline]
10736#[target_feature(enable = "avx512bw,avx512vl")]
10737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10738#[cfg_attr(test, assert_instr(vpmovswb))]
10739pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
10740    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
10741}
10742
10743/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10744///
10745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
10746#[inline]
10747#[target_feature(enable = "avx512bw,avx512vl")]
10748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10749#[cfg_attr(test, assert_instr(vpmovswb))]
10750pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10751    unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
10752}
10753
10754/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10755///
10756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
10757#[inline]
10758#[target_feature(enable = "avx512bw,avx512vl")]
10759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10760#[cfg_attr(test, assert_instr(vpmovswb))]
10761pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10762    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) }
10763}
10764
10765/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10766///
10767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
10768#[inline]
10769#[target_feature(enable = "avx512bw,avx512vl")]
10770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10771#[cfg_attr(test, assert_instr(vpmovswb))]
10772pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
10773    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
10774}
10775
10776/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10777///
10778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
10779#[inline]
10780#[target_feature(enable = "avx512bw,avx512vl")]
10781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10782#[cfg_attr(test, assert_instr(vpmovswb))]
10783pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10784    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
10785}
10786
10787/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10788///
10789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
10790#[inline]
10791#[target_feature(enable = "avx512bw,avx512vl")]
10792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10793#[cfg_attr(test, assert_instr(vpmovswb))]
10794pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10795    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
10796}
10797
10798/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10799///
10800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
10801#[inline]
10802#[target_feature(enable = "avx512bw")]
10803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10804#[cfg_attr(test, assert_instr(vpmovuswb))]
10805pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
10806    unsafe {
10807        transmute(vpmovuswb(
10808            a.as_u16x32(),
10809            u8x32::ZERO,
10810            0b11111111_11111111_11111111_11111111,
10811        ))
10812    }
10813}
10814
10815/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10816///
10817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
10818#[inline]
10819#[target_feature(enable = "avx512bw")]
10820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10821#[cfg_attr(test, assert_instr(vpmovuswb))]
10822pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10823    unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
10824}
10825
10826/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10827///
10828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
10829#[inline]
10830#[target_feature(enable = "avx512bw")]
10831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10832#[cfg_attr(test, assert_instr(vpmovuswb))]
10833pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10834    unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) }
10835}
10836
10837/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10838///
10839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
10840#[inline]
10841#[target_feature(enable = "avx512bw,avx512vl")]
10842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10843#[cfg_attr(test, assert_instr(vpmovuswb))]
10844pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
10845    unsafe {
10846        transmute(vpmovuswb256(
10847            a.as_u16x16(),
10848            u8x16::ZERO,
10849            0b11111111_11111111,
10850        ))
10851    }
10852}
10853
10854/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10855///
10856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
10857#[inline]
10858#[target_feature(enable = "avx512bw,avx512vl")]
10859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10860#[cfg_attr(test, assert_instr(vpmovuswb))]
10861pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10862    unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
10863}
10864
10865/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10866///
10867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
10868#[inline]
10869#[target_feature(enable = "avx512bw,avx512vl")]
10870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10871#[cfg_attr(test, assert_instr(vpmovuswb))]
10872pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10873    unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) }
10874}
10875
10876/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10877///
10878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
10879#[inline]
10880#[target_feature(enable = "avx512bw,avx512vl")]
10881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10882#[cfg_attr(test, assert_instr(vpmovuswb))]
10883pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
10884    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
10885}
10886
10887/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10888///
10889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
10890#[inline]
10891#[target_feature(enable = "avx512bw,avx512vl")]
10892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10893#[cfg_attr(test, assert_instr(vpmovuswb))]
10894pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10895    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
10896}
10897
10898/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10899///
10900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
10901#[inline]
10902#[target_feature(enable = "avx512bw,avx512vl")]
10903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10904#[cfg_attr(test, assert_instr(vpmovuswb))]
10905pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10906    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
10907}
10908
10909/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
10910///
10911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
10912#[inline]
10913#[target_feature(enable = "avx512bw")]
10914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10915#[cfg_attr(test, assert_instr(vpmovsxbw))]
10916pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
10917    unsafe {
10918        let a = a.as_i8x32();
10919        transmute::<i16x32, _>(simd_cast(a))
10920    }
10921}
10922
10923/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10924///
10925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
10926#[inline]
10927#[target_feature(enable = "avx512bw")]
10928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10929#[cfg_attr(test, assert_instr(vpmovsxbw))]
10930pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
10931    unsafe {
10932        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10933        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
10934    }
10935}
10936
10937/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10938///
10939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
10940#[inline]
10941#[target_feature(enable = "avx512bw")]
10942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10943#[cfg_attr(test, assert_instr(vpmovsxbw))]
10944pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
10945    unsafe {
10946        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10947        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
10948    }
10949}
10950
10951/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10952///
10953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
10954#[inline]
10955#[target_feature(enable = "avx512bw,avx512vl")]
10956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10957#[cfg_attr(test, assert_instr(vpmovsxbw))]
10958pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
10959    unsafe {
10960        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10961        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
10962    }
10963}
10964
10965/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10966///
10967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
10968#[inline]
10969#[target_feature(enable = "avx512bw,avx512vl")]
10970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10971#[cfg_attr(test, assert_instr(vpmovsxbw))]
10972pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
10973    unsafe {
10974        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10975        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
10976    }
10977}
10978
10979/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10980///
10981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
10982#[inline]
10983#[target_feature(enable = "avx512bw,avx512vl")]
10984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10985#[cfg_attr(test, assert_instr(vpmovsxbw))]
10986pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10987    unsafe {
10988        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
10989        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
10990    }
10991}
10992
10993/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10994///
10995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
10996#[inline]
10997#[target_feature(enable = "avx512bw,avx512vl")]
10998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10999#[cfg_attr(test, assert_instr(vpmovsxbw))]
11000pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11001    unsafe {
11002        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11003        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11004    }
11005}
11006
11007/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11008///
11009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11010#[inline]
11011#[target_feature(enable = "avx512bw")]
11012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11013#[cfg_attr(test, assert_instr(vpmovzxbw))]
11014pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11015    unsafe {
11016        let a = a.as_u8x32();
11017        transmute::<i16x32, _>(simd_cast(a))
11018    }
11019}
11020
11021/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11022///
11023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11024#[inline]
11025#[target_feature(enable = "avx512bw")]
11026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11027#[cfg_attr(test, assert_instr(vpmovzxbw))]
11028pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11029    unsafe {
11030        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11031        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11032    }
11033}
11034
11035/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11036///
11037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11038#[inline]
11039#[target_feature(enable = "avx512bw")]
11040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11041#[cfg_attr(test, assert_instr(vpmovzxbw))]
11042pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11043    unsafe {
11044        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11045        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11046    }
11047}
11048
11049/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11050///
11051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11052#[inline]
11053#[target_feature(enable = "avx512bw,avx512vl")]
11054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11055#[cfg_attr(test, assert_instr(vpmovzxbw))]
11056pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11057    unsafe {
11058        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11059        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11060    }
11061}
11062
11063/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11064///
11065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11066#[inline]
11067#[target_feature(enable = "avx512bw,avx512vl")]
11068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11069#[cfg_attr(test, assert_instr(vpmovzxbw))]
11070pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11071    unsafe {
11072        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11073        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11074    }
11075}
11076
11077/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11078///
11079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11080#[inline]
11081#[target_feature(enable = "avx512bw,avx512vl")]
11082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11083#[cfg_attr(test, assert_instr(vpmovzxbw))]
11084pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11085    unsafe {
11086        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11087        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11088    }
11089}
11090
11091/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11092///
11093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
11094#[inline]
11095#[target_feature(enable = "avx512bw,avx512vl")]
11096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11097#[cfg_attr(test, assert_instr(vpmovzxbw))]
11098pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11099    unsafe {
11100        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
11101        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11102    }
11103}
11104
11105/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
11106///
11107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
11108#[inline]
11109#[target_feature(enable = "avx512bw")]
11110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11111#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
11112#[rustc_legacy_const_generics(1)]
11113pub fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11114    unsafe {
11115        static_assert_uimm_bits!(IMM8, 8);
11116        const fn mask(shift: i32, i: u32) -> u32 {
11117            let shift = shift as u32 & 0xff;
11118            if shift > 15 || i % 16 < shift {
11119                0
11120            } else {
11121                64 + (i - shift)
11122            }
11123        }
11124        let a = a.as_i8x64();
11125        let zero = i8x64::ZERO;
11126        let r: i8x64 = simd_shuffle!(
11127            zero,
11128            a,
11129            [
11130                mask(IMM8, 0),
11131                mask(IMM8, 1),
11132                mask(IMM8, 2),
11133                mask(IMM8, 3),
11134                mask(IMM8, 4),
11135                mask(IMM8, 5),
11136                mask(IMM8, 6),
11137                mask(IMM8, 7),
11138                mask(IMM8, 8),
11139                mask(IMM8, 9),
11140                mask(IMM8, 10),
11141                mask(IMM8, 11),
11142                mask(IMM8, 12),
11143                mask(IMM8, 13),
11144                mask(IMM8, 14),
11145                mask(IMM8, 15),
11146                mask(IMM8, 16),
11147                mask(IMM8, 17),
11148                mask(IMM8, 18),
11149                mask(IMM8, 19),
11150                mask(IMM8, 20),
11151                mask(IMM8, 21),
11152                mask(IMM8, 22),
11153                mask(IMM8, 23),
11154                mask(IMM8, 24),
11155                mask(IMM8, 25),
11156                mask(IMM8, 26),
11157                mask(IMM8, 27),
11158                mask(IMM8, 28),
11159                mask(IMM8, 29),
11160                mask(IMM8, 30),
11161                mask(IMM8, 31),
11162                mask(IMM8, 32),
11163                mask(IMM8, 33),
11164                mask(IMM8, 34),
11165                mask(IMM8, 35),
11166                mask(IMM8, 36),
11167                mask(IMM8, 37),
11168                mask(IMM8, 38),
11169                mask(IMM8, 39),
11170                mask(IMM8, 40),
11171                mask(IMM8, 41),
11172                mask(IMM8, 42),
11173                mask(IMM8, 43),
11174                mask(IMM8, 44),
11175                mask(IMM8, 45),
11176                mask(IMM8, 46),
11177                mask(IMM8, 47),
11178                mask(IMM8, 48),
11179                mask(IMM8, 49),
11180                mask(IMM8, 50),
11181                mask(IMM8, 51),
11182                mask(IMM8, 52),
11183                mask(IMM8, 53),
11184                mask(IMM8, 54),
11185                mask(IMM8, 55),
11186                mask(IMM8, 56),
11187                mask(IMM8, 57),
11188                mask(IMM8, 58),
11189                mask(IMM8, 59),
11190                mask(IMM8, 60),
11191                mask(IMM8, 61),
11192                mask(IMM8, 62),
11193                mask(IMM8, 63),
11194            ],
11195        );
11196        transmute(r)
11197    }
11198}
11199
11200/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
11201///
11202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
11203#[inline]
11204#[target_feature(enable = "avx512bw")]
11205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11206#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
11207#[rustc_legacy_const_generics(1)]
11208pub fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
11209    unsafe {
11210        static_assert_uimm_bits!(IMM8, 8);
11211        let a = a.as_i8x64();
11212        let zero = i8x64::ZERO;
11213        let r: i8x64 = match IMM8 % 16 {
11214            0 => {
11215                simd_shuffle!(
11216                    a,
11217                    zero,
11218                    [
11219                        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
11220                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
11221                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
11222                        59, 60, 61, 62, 63,
11223                    ],
11224                )
11225            }
11226            1 => {
11227                simd_shuffle!(
11228                    a,
11229                    zero,
11230                    [
11231                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21,
11232                        22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40,
11233                        41, 42, 43, 44, 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
11234                        60, 61, 62, 63, 112,
11235                    ],
11236                )
11237            }
11238            2 => {
11239                simd_shuffle!(
11240                    a,
11241                    zero,
11242                    [
11243                        2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22,
11244                        23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41,
11245                        42, 43, 44, 45, 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
11246                        61, 62, 63, 112, 113,
11247                    ],
11248                )
11249            }
11250            3 => {
11251                simd_shuffle!(
11252                    a,
11253                    zero,
11254                    [
11255                        3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22,
11256                        23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41,
11257                        42, 43, 44, 45, 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
11258                        61, 62, 63, 112, 113, 114,
11259                    ],
11260                )
11261            }
11262            4 => {
11263                simd_shuffle!(
11264                    a,
11265                    zero,
11266                    [
11267                        4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23,
11268                        24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42,
11269                        43, 44, 45, 46, 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
11270                        62, 63, 112, 113, 114, 115,
11271                    ],
11272                )
11273            }
11274            5 => {
11275                simd_shuffle!(
11276                    a,
11277                    zero,
11278                    [
11279                        5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24,
11280                        25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43,
11281                        44, 45, 46, 47, 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61,
11282                        62, 63, 112, 113, 114, 115, 116,
11283                    ],
11284                )
11285            }
11286            6 => {
11287                simd_shuffle!(
11288                    a,
11289                    zero,
11290                    [
11291                        6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25,
11292                        26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44,
11293                        45, 46, 47, 96, 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62,
11294                        63, 112, 113, 114, 115, 116, 117,
11295                    ],
11296                )
11297            }
11298            7 => {
11299                simd_shuffle!(
11300                    a,
11301                    zero,
11302                    [
11303                        7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25,
11304                        26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44,
11305                        45, 46, 47, 96, 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62,
11306                        63, 112, 113, 114, 115, 116, 117, 118,
11307                    ],
11308                )
11309            }
11310            8 => {
11311                simd_shuffle!(
11312                    a,
11313                    zero,
11314                    [
11315                        8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26,
11316                        27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45,
11317                        46, 47, 96, 97, 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63,
11318                        112, 113, 114, 115, 116, 117, 118, 119,
11319                    ],
11320                )
11321            }
11322            9 => {
11323                simd_shuffle!(
11324                    a,
11325                    zero,
11326                    [
11327                        9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27,
11328                        28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46,
11329                        47, 96, 97, 98, 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63,
11330                        112, 113, 114, 115, 116, 117, 118, 119, 120,
11331                    ],
11332                )
11333            }
11334            10 => {
11335                simd_shuffle!(
11336                    a,
11337                    zero,
11338                    [
11339                        10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28,
11340                        29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47,
11341                        96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112,
11342                        113, 114, 115, 116, 117, 118, 119, 120, 121,
11343                    ],
11344                )
11345            }
11346            11 => {
11347                simd_shuffle!(
11348                    a,
11349                    zero,
11350                    [
11351                        11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29,
11352                        30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96,
11353                        97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112,
11354                        113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
11355                    ],
11356                )
11357            }
11358            12 => {
11359                simd_shuffle!(
11360                    a,
11361                    zero,
11362                    [
11363                        12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30,
11364                        31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97,
11365                        98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113,
11366                        114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
11367                    ],
11368                )
11369            }
11370            13 => {
11371                simd_shuffle!(
11372                    a,
11373                    zero,
11374                    [
11375                        13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31,
11376                        80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98,
11377                        99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114,
11378                        115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
11379                    ],
11380                )
11381            }
11382            14 => {
11383                simd_shuffle!(
11384                    a,
11385                    zero,
11386                    [
11387                        14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80,
11388                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99,
11389                        100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114,
11390                        115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
11391                    ],
11392                )
11393            }
11394            15 => {
11395                simd_shuffle!(
11396                    a,
11397                    zero,
11398                    [
11399                        15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81,
11400                        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99,
11401                        100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114,
11402                        115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
11403                    ],
11404                )
11405            }
11406            _ => zero,
11407        };
11408        transmute(r)
11409    }
11410}
11411
11412/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
11413/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
11414/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
11415///
11416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
11417#[inline]
11418#[target_feature(enable = "avx512bw")]
11419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11420#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11421#[rustc_legacy_const_generics(2)]
11422pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
11423    unsafe {
11424        // If palignr is shifting the pair of vectors more than the size of two
11425        // lanes, emit zero.
11426        if IMM8 >= 32 {
11427            return _mm512_setzero_si512();
11428        }
11429        // If palignr is shifting the pair of input vectors more than one lane,
11430        // but less than two lanes, convert to shifting in zeroes.
11431        let (a, b) = if IMM8 > 16 {
11432            (_mm512_setzero_si512(), a)
11433        } else {
11434            (a, b)
11435        };
11436        let a = a.as_i8x64();
11437        let b = b.as_i8x64();
11438        if IMM8 == 16 {
11439            return transmute(a);
11440        }
11441        let r: i8x64 = match IMM8 % 16 {
11442            0 => {
11443                simd_shuffle!(
11444                    b,
11445                    a,
11446                    [
11447                        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
11448                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
11449                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
11450                        59, 60, 61, 62, 63,
11451                    ],
11452                )
11453            }
11454            1 => {
11455                simd_shuffle!(
11456                    b,
11457                    a,
11458                    [
11459                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21,
11460                        22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40,
11461                        41, 42, 43, 44, 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
11462                        60, 61, 62, 63, 112,
11463                    ],
11464                )
11465            }
11466            2 => {
11467                simd_shuffle!(
11468                    b,
11469                    a,
11470                    [
11471                        2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22,
11472                        23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41,
11473                        42, 43, 44, 45, 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
11474                        61, 62, 63, 112, 113,
11475                    ],
11476                )
11477            }
11478            3 => {
11479                simd_shuffle!(
11480                    b,
11481                    a,
11482                    [
11483                        3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22,
11484                        23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41,
11485                        42, 43, 44, 45, 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
11486                        61, 62, 63, 112, 113, 114,
11487                    ],
11488                )
11489            }
11490            4 => {
11491                simd_shuffle!(
11492                    b,
11493                    a,
11494                    [
11495                        4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23,
11496                        24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42,
11497                        43, 44, 45, 46, 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
11498                        62, 63, 112, 113, 114, 115,
11499                    ],
11500                )
11501            }
11502            5 => {
11503                simd_shuffle!(
11504                    b,
11505                    a,
11506                    [
11507                        5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24,
11508                        25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43,
11509                        44, 45, 46, 47, 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61,
11510                        62, 63, 112, 113, 114, 115, 116,
11511                    ],
11512                )
11513            }
11514            6 => {
11515                simd_shuffle!(
11516                    b,
11517                    a,
11518                    [
11519                        6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25,
11520                        26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44,
11521                        45, 46, 47, 96, 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62,
11522                        63, 112, 113, 114, 115, 116, 117,
11523                    ],
11524                )
11525            }
11526            7 => {
11527                simd_shuffle!(
11528                    b,
11529                    a,
11530                    [
11531                        7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25,
11532                        26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44,
11533                        45, 46, 47, 96, 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62,
11534                        63, 112, 113, 114, 115, 116, 117, 118,
11535                    ],
11536                )
11537            }
11538            8 => {
11539                simd_shuffle!(
11540                    b,
11541                    a,
11542                    [
11543                        8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26,
11544                        27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45,
11545                        46, 47, 96, 97, 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63,
11546                        112, 113, 114, 115, 116, 117, 118, 119,
11547                    ],
11548                )
11549            }
11550            9 => {
11551                simd_shuffle!(
11552                    b,
11553                    a,
11554                    [
11555                        9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27,
11556                        28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46,
11557                        47, 96, 97, 98, 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63,
11558                        112, 113, 114, 115, 116, 117, 118, 119, 120,
11559                    ],
11560                )
11561            }
11562            10 => {
11563                simd_shuffle!(
11564                    b,
11565                    a,
11566                    [
11567                        10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28,
11568                        29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47,
11569                        96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112,
11570                        113, 114, 115, 116, 117, 118, 119, 120, 121,
11571                    ],
11572                )
11573            }
11574            11 => {
11575                simd_shuffle!(
11576                    b,
11577                    a,
11578                    [
11579                        11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29,
11580                        30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96,
11581                        97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112,
11582                        113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
11583                    ],
11584                )
11585            }
11586            12 => {
11587                simd_shuffle!(
11588                    b,
11589                    a,
11590                    [
11591                        12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30,
11592                        31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97,
11593                        98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113,
11594                        114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
11595                    ],
11596                )
11597            }
11598            13 => {
11599                simd_shuffle!(
11600                    b,
11601                    a,
11602                    [
11603                        13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31,
11604                        80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98,
11605                        99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114,
11606                        115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
11607                    ],
11608                )
11609            }
11610            14 => {
11611                simd_shuffle!(
11612                    b,
11613                    a,
11614                    [
11615                        14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80,
11616                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99,
11617                        100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114,
11618                        115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
11619                    ],
11620                )
11621            }
11622            15 => {
11623                simd_shuffle!(
11624                    b,
11625                    a,
11626                    [
11627                        15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81,
11628                        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99,
11629                        100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114,
11630                        115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
11631                    ],
11632                )
11633            }
11634            _ => unreachable_unchecked(),
11635        };
11636        transmute(r)
11637    }
11638}
11639
11640/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11641///
11642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
11643#[inline]
11644#[target_feature(enable = "avx512bw")]
11645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11646#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11647#[rustc_legacy_const_generics(4)]
11648pub fn _mm512_mask_alignr_epi8<const IMM8: i32>(
11649    src: __m512i,
11650    k: __mmask64,
11651    a: __m512i,
11652    b: __m512i,
11653) -> __m512i {
11654    unsafe {
11655        static_assert_uimm_bits!(IMM8, 8);
11656        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11657        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
11658    }
11659}
11660
11661/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11662///
11663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
11664#[inline]
11665#[target_feature(enable = "avx512bw")]
11666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11667#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11668#[rustc_legacy_const_generics(3)]
11669pub fn _mm512_maskz_alignr_epi8<const IMM8: i32>(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
11670    unsafe {
11671        static_assert_uimm_bits!(IMM8, 8);
11672        let r = _mm512_alignr_epi8::<IMM8>(a, b);
11673        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
11674    }
11675}
11676
11677/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11678///
11679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
11680#[inline]
11681#[target_feature(enable = "avx512bw,avx512vl")]
11682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11683#[rustc_legacy_const_generics(4)]
11684#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11685pub fn _mm256_mask_alignr_epi8<const IMM8: i32>(
11686    src: __m256i,
11687    k: __mmask32,
11688    a: __m256i,
11689    b: __m256i,
11690) -> __m256i {
11691    unsafe {
11692        static_assert_uimm_bits!(IMM8, 8);
11693        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11694        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
11695    }
11696}
11697
11698/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11699///
11700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
11701#[inline]
11702#[target_feature(enable = "avx512bw,avx512vl")]
11703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11704#[rustc_legacy_const_generics(3)]
11705#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11706pub fn _mm256_maskz_alignr_epi8<const IMM8: i32>(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
11707    unsafe {
11708        static_assert_uimm_bits!(IMM8, 8);
11709        let r = _mm256_alignr_epi8::<IMM8>(a, b);
11710        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
11711    }
11712}
11713
11714/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11715///
11716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
11717#[inline]
11718#[target_feature(enable = "avx512bw,avx512vl")]
11719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11720#[rustc_legacy_const_generics(4)]
11721#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11722pub fn _mm_mask_alignr_epi8<const IMM8: i32>(
11723    src: __m128i,
11724    k: __mmask16,
11725    a: __m128i,
11726    b: __m128i,
11727) -> __m128i {
11728    unsafe {
11729        static_assert_uimm_bits!(IMM8, 8);
11730        let r = _mm_alignr_epi8::<IMM8>(a, b);
11731        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
11732    }
11733}
11734
11735/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11736///
11737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
11738#[inline]
11739#[target_feature(enable = "avx512bw,avx512vl")]
11740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11741#[rustc_legacy_const_generics(3)]
11742#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11743pub fn _mm_maskz_alignr_epi8<const IMM8: i32>(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
11744    unsafe {
11745        static_assert_uimm_bits!(IMM8, 8);
11746        let r = _mm_alignr_epi8::<IMM8>(a, b);
11747        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
11748    }
11749}
11750
11751/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
11754#[inline]
11755#[target_feature(enable = "avx512bw")]
11756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11757#[cfg_attr(test, assert_instr(vpmovswb))]
11758pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11759    vpmovswbmem(mem_addr, a.as_i16x32(), k);
11760}
11761
11762/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11763///
11764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
11765#[inline]
11766#[target_feature(enable = "avx512bw,avx512vl")]
11767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11768#[cfg_attr(test, assert_instr(vpmovswb))]
11769pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11770    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
11771}
11772
11773/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11774///
11775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
11776#[inline]
11777#[target_feature(enable = "avx512bw,avx512vl")]
11778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11779#[cfg_attr(test, assert_instr(vpmovswb))]
11780pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11781    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
11782}
11783
11784/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11785///
11786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
11787#[inline]
11788#[target_feature(enable = "avx512bw")]
11789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11790#[cfg_attr(test, assert_instr(vpmovwb))]
11791pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11792    vpmovwbmem(mem_addr, a.as_i16x32(), k);
11793}
11794
11795/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11796///
11797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
11798#[inline]
11799#[target_feature(enable = "avx512bw,avx512vl")]
11800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11801#[cfg_attr(test, assert_instr(vpmovwb))]
11802pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11803    vpmovwbmem256(mem_addr, a.as_i16x16(), k);
11804}
11805
11806/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11807///
11808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
11809#[inline]
11810#[target_feature(enable = "avx512bw,avx512vl")]
11811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11812#[cfg_attr(test, assert_instr(vpmovwb))]
11813pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11814    vpmovwbmem128(mem_addr, a.as_i16x8(), k);
11815}
11816
11817/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11818///
11819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
11820#[inline]
11821#[target_feature(enable = "avx512bw")]
11822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11823#[cfg_attr(test, assert_instr(vpmovuswb))]
11824pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11825    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
11826}
11827
11828/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11829///
11830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
11831#[inline]
11832#[target_feature(enable = "avx512bw,avx512vl")]
11833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11834#[cfg_attr(test, assert_instr(vpmovuswb))]
11835pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11836    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
11837}
11838
11839/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11840///
11841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
11842#[inline]
11843#[target_feature(enable = "avx512bw,avx512vl")]
11844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11845#[cfg_attr(test, assert_instr(vpmovuswb))]
11846pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11847    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
11848}
11849
11850#[allow(improper_ctypes)]
11851unsafe extern "C" {
11852    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
11853    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
11854
11855    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
11856    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
11857    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
11858    fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
11859
11860    #[link_name = "llvm.x86.avx512.packssdw.512"]
11861    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
11862    #[link_name = "llvm.x86.avx512.packsswb.512"]
11863    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
11864    #[link_name = "llvm.x86.avx512.packusdw.512"]
11865    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
11866    #[link_name = "llvm.x86.avx512.packuswb.512"]
11867    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
11868
11869    #[link_name = "llvm.x86.avx512.psll.w.512"]
11870    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
11871
11872    #[link_name = "llvm.x86.avx512.psllv.w.512"]
11873    fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
11874    #[link_name = "llvm.x86.avx512.psllv.w.256"]
11875    fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
11876    #[link_name = "llvm.x86.avx512.psllv.w.128"]
11877    fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
11878
11879    #[link_name = "llvm.x86.avx512.psrl.w.512"]
11880    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
11881
11882    #[link_name = "llvm.x86.avx512.psrlv.w.512"]
11883    fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
11884    #[link_name = "llvm.x86.avx512.psrlv.w.256"]
11885    fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
11886    #[link_name = "llvm.x86.avx512.psrlv.w.128"]
11887    fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
11888
11889    #[link_name = "llvm.x86.avx512.psra.w.512"]
11890    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
11891
11892    #[link_name = "llvm.x86.avx512.psrav.w.512"]
11893    fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
11894    #[link_name = "llvm.x86.avx512.psrav.w.256"]
11895    fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
11896    #[link_name = "llvm.x86.avx512.psrav.w.128"]
11897    fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
11898
11899    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
11900    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
11901    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
11902    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
11903    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
11904    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
11905
11906    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
11907    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
11908    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
11909    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
11910    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
11911    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
11912
11913    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
11914    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
11915
11916    #[link_name = "llvm.x86.avx512.psad.bw.512"]
11917    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
11918
11919    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
11920    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
11921    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
11922    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
11923    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
11924    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
11925
11926    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
11927    fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
11928    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
11929    fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
11930    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
11931    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
11932
11933    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
11934    fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
11935    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
11936    fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
11937    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
11938    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
11939
11940    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
11941    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11942    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
11943    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11944    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
11945    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11946
11947    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
11948    fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11949    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
11950    fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11951    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
11952    fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11953
11954    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
11955    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11956    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
11957    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11958    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
11959    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11960
11961    #[link_name = "llvm.x86.avx512.mask.loadu.b.128"]
11962    fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
11963    #[link_name = "llvm.x86.avx512.mask.loadu.w.128"]
11964    fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
11965    #[link_name = "llvm.x86.avx512.mask.loadu.b.256"]
11966    fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
11967    #[link_name = "llvm.x86.avx512.mask.loadu.w.256"]
11968    fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
11969    #[link_name = "llvm.x86.avx512.mask.loadu.b.512"]
11970    fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
11971    #[link_name = "llvm.x86.avx512.mask.loadu.w.512"]
11972    fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
11973
11974    #[link_name = "llvm.x86.avx512.mask.storeu.b.128"]
11975    fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16);
11976    #[link_name = "llvm.x86.avx512.mask.storeu.w.128"]
11977    fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8);
11978    #[link_name = "llvm.x86.avx512.mask.storeu.b.256"]
11979    fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32);
11980    #[link_name = "llvm.x86.avx512.mask.storeu.w.256"]
11981    fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16);
11982    #[link_name = "llvm.x86.avx512.mask.storeu.b.512"]
11983    fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64);
11984    #[link_name = "llvm.x86.avx512.mask.storeu.w.512"]
11985    fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32);
11986
11987}
11988
11989#[cfg(test)]
11990mod tests {
11991
11992    use stdarch_test::simd_test;
11993
11994    use crate::core_arch::x86::*;
11995    use crate::hint::black_box;
11996    use crate::mem::{self};
11997
11998    #[simd_test(enable = "avx512bw")]
11999    unsafe fn test_mm512_abs_epi16() {
12000        let a = _mm512_set1_epi16(-1);
12001        let r = _mm512_abs_epi16(a);
12002        let e = _mm512_set1_epi16(1);
12003        assert_eq_m512i(r, e);
12004    }
12005
12006    #[simd_test(enable = "avx512bw")]
12007    unsafe fn test_mm512_mask_abs_epi16() {
12008        let a = _mm512_set1_epi16(-1);
12009        let r = _mm512_mask_abs_epi16(a, 0, a);
12010        assert_eq_m512i(r, a);
12011        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
12012        #[rustfmt::skip]
12013        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12014                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12015        assert_eq_m512i(r, e);
12016    }
12017
12018    #[simd_test(enable = "avx512bw")]
12019    unsafe fn test_mm512_maskz_abs_epi16() {
12020        let a = _mm512_set1_epi16(-1);
12021        let r = _mm512_maskz_abs_epi16(0, a);
12022        assert_eq_m512i(r, _mm512_setzero_si512());
12023        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
12024        #[rustfmt::skip]
12025        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12026                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12027        assert_eq_m512i(r, e);
12028    }
12029
12030    #[simd_test(enable = "avx512bw,avx512vl")]
12031    unsafe fn test_mm256_mask_abs_epi16() {
12032        let a = _mm256_set1_epi16(-1);
12033        let r = _mm256_mask_abs_epi16(a, 0, a);
12034        assert_eq_m256i(r, a);
12035        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
12036        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12037        assert_eq_m256i(r, e);
12038    }
12039
12040    #[simd_test(enable = "avx512bw,avx512vl")]
12041    unsafe fn test_mm256_maskz_abs_epi16() {
12042        let a = _mm256_set1_epi16(-1);
12043        let r = _mm256_maskz_abs_epi16(0, a);
12044        assert_eq_m256i(r, _mm256_setzero_si256());
12045        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
12046        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12047        assert_eq_m256i(r, e);
12048    }
12049
12050    #[simd_test(enable = "avx512bw,avx512vl")]
12051    unsafe fn test_mm_mask_abs_epi16() {
12052        let a = _mm_set1_epi16(-1);
12053        let r = _mm_mask_abs_epi16(a, 0, a);
12054        assert_eq_m128i(r, a);
12055        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
12056        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
12057        assert_eq_m128i(r, e);
12058    }
12059
12060    #[simd_test(enable = "avx512bw,avx512vl")]
12061    unsafe fn test_mm_maskz_abs_epi16() {
12062        let a = _mm_set1_epi16(-1);
12063        let r = _mm_maskz_abs_epi16(0, a);
12064        assert_eq_m128i(r, _mm_setzero_si128());
12065        let r = _mm_maskz_abs_epi16(0b00001111, a);
12066        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
12067        assert_eq_m128i(r, e);
12068    }
12069
12070    #[simd_test(enable = "avx512bw")]
12071    unsafe fn test_mm512_abs_epi8() {
12072        let a = _mm512_set1_epi8(-1);
12073        let r = _mm512_abs_epi8(a);
12074        let e = _mm512_set1_epi8(1);
12075        assert_eq_m512i(r, e);
12076    }
12077
12078    #[simd_test(enable = "avx512bw")]
12079    unsafe fn test_mm512_mask_abs_epi8() {
12080        let a = _mm512_set1_epi8(-1);
12081        let r = _mm512_mask_abs_epi8(a, 0, a);
12082        assert_eq_m512i(r, a);
12083        let r = _mm512_mask_abs_epi8(
12084            a,
12085            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12086            a,
12087        );
12088        #[rustfmt::skip]
12089        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12090                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12091                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12092                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12093        assert_eq_m512i(r, e);
12094    }
12095
12096    #[simd_test(enable = "avx512bw")]
12097    unsafe fn test_mm512_maskz_abs_epi8() {
12098        let a = _mm512_set1_epi8(-1);
12099        let r = _mm512_maskz_abs_epi8(0, a);
12100        assert_eq_m512i(r, _mm512_setzero_si512());
12101        let r = _mm512_maskz_abs_epi8(
12102            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12103            a,
12104        );
12105        #[rustfmt::skip]
12106        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12107                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12108                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12109                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12110        assert_eq_m512i(r, e);
12111    }
12112
12113    #[simd_test(enable = "avx512bw,avx512vl")]
12114    unsafe fn test_mm256_mask_abs_epi8() {
12115        let a = _mm256_set1_epi8(-1);
12116        let r = _mm256_mask_abs_epi8(a, 0, a);
12117        assert_eq_m256i(r, a);
12118        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
12119        #[rustfmt::skip]
12120        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12121                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12122        assert_eq_m256i(r, e);
12123    }
12124
12125    #[simd_test(enable = "avx512bw,avx512vl")]
12126    unsafe fn test_mm256_maskz_abs_epi8() {
12127        let a = _mm256_set1_epi8(-1);
12128        let r = _mm256_maskz_abs_epi8(0, a);
12129        assert_eq_m256i(r, _mm256_setzero_si256());
12130        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
12131        #[rustfmt::skip]
12132        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12133                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12134        assert_eq_m256i(r, e);
12135    }
12136
12137    #[simd_test(enable = "avx512bw,avx512vl")]
12138    unsafe fn test_mm_mask_abs_epi8() {
12139        let a = _mm_set1_epi8(-1);
12140        let r = _mm_mask_abs_epi8(a, 0, a);
12141        assert_eq_m128i(r, a);
12142        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
12143        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12144        assert_eq_m128i(r, e);
12145    }
12146
12147    #[simd_test(enable = "avx512bw,avx512vl")]
12148    unsafe fn test_mm_maskz_abs_epi8() {
12149        let a = _mm_set1_epi8(-1);
12150        let r = _mm_maskz_abs_epi8(0, a);
12151        assert_eq_m128i(r, _mm_setzero_si128());
12152        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
12153        #[rustfmt::skip]
12154        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12155        assert_eq_m128i(r, e);
12156    }
12157
12158    #[simd_test(enable = "avx512bw")]
12159    unsafe fn test_mm512_add_epi16() {
12160        let a = _mm512_set1_epi16(1);
12161        let b = _mm512_set1_epi16(2);
12162        let r = _mm512_add_epi16(a, b);
12163        let e = _mm512_set1_epi16(3);
12164        assert_eq_m512i(r, e);
12165    }
12166
12167    #[simd_test(enable = "avx512bw")]
12168    unsafe fn test_mm512_mask_add_epi16() {
12169        let a = _mm512_set1_epi16(1);
12170        let b = _mm512_set1_epi16(2);
12171        let r = _mm512_mask_add_epi16(a, 0, a, b);
12172        assert_eq_m512i(r, a);
12173        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12174        #[rustfmt::skip]
12175        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12176                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12177        assert_eq_m512i(r, e);
12178    }
12179
12180    #[simd_test(enable = "avx512bw")]
12181    unsafe fn test_mm512_maskz_add_epi16() {
12182        let a = _mm512_set1_epi16(1);
12183        let b = _mm512_set1_epi16(2);
12184        let r = _mm512_maskz_add_epi16(0, a, b);
12185        assert_eq_m512i(r, _mm512_setzero_si512());
12186        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
12187        #[rustfmt::skip]
12188        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12189                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12190        assert_eq_m512i(r, e);
12191    }
12192
12193    #[simd_test(enable = "avx512bw,avx512vl")]
12194    unsafe fn test_mm256_mask_add_epi16() {
12195        let a = _mm256_set1_epi16(1);
12196        let b = _mm256_set1_epi16(2);
12197        let r = _mm256_mask_add_epi16(a, 0, a, b);
12198        assert_eq_m256i(r, a);
12199        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
12200        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12201        assert_eq_m256i(r, e);
12202    }
12203
12204    #[simd_test(enable = "avx512bw,avx512vl")]
12205    unsafe fn test_mm256_maskz_add_epi16() {
12206        let a = _mm256_set1_epi16(1);
12207        let b = _mm256_set1_epi16(2);
12208        let r = _mm256_maskz_add_epi16(0, a, b);
12209        assert_eq_m256i(r, _mm256_setzero_si256());
12210        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
12211        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12212        assert_eq_m256i(r, e);
12213    }
12214
12215    #[simd_test(enable = "avx512bw,avx512vl")]
12216    unsafe fn test_mm_mask_add_epi16() {
12217        let a = _mm_set1_epi16(1);
12218        let b = _mm_set1_epi16(2);
12219        let r = _mm_mask_add_epi16(a, 0, a, b);
12220        assert_eq_m128i(r, a);
12221        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
12222        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
12223        assert_eq_m128i(r, e);
12224    }
12225
12226    #[simd_test(enable = "avx512bw,avx512vl")]
12227    unsafe fn test_mm_maskz_add_epi16() {
12228        let a = _mm_set1_epi16(1);
12229        let b = _mm_set1_epi16(2);
12230        let r = _mm_maskz_add_epi16(0, a, b);
12231        assert_eq_m128i(r, _mm_setzero_si128());
12232        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12233        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12234        assert_eq_m128i(r, e);
12235    }
12236
12237    #[simd_test(enable = "avx512bw")]
12238    unsafe fn test_mm512_add_epi8() {
12239        let a = _mm512_set1_epi8(1);
12240        let b = _mm512_set1_epi8(2);
12241        let r = _mm512_add_epi8(a, b);
12242        let e = _mm512_set1_epi8(3);
12243        assert_eq_m512i(r, e);
12244    }
12245
12246    #[simd_test(enable = "avx512bw")]
12247    unsafe fn test_mm512_mask_add_epi8() {
12248        let a = _mm512_set1_epi8(1);
12249        let b = _mm512_set1_epi8(2);
12250        let r = _mm512_mask_add_epi8(a, 0, a, b);
12251        assert_eq_m512i(r, a);
12252        let r = _mm512_mask_add_epi8(
12253            a,
12254            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12255            a,
12256            b,
12257        );
12258        #[rustfmt::skip]
12259        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12260                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12261                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12262                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12263        assert_eq_m512i(r, e);
12264    }
12265
12266    #[simd_test(enable = "avx512bw")]
12267    unsafe fn test_mm512_maskz_add_epi8() {
12268        let a = _mm512_set1_epi8(1);
12269        let b = _mm512_set1_epi8(2);
12270        let r = _mm512_maskz_add_epi8(0, a, b);
12271        assert_eq_m512i(r, _mm512_setzero_si512());
12272        let r = _mm512_maskz_add_epi8(
12273            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12274            a,
12275            b,
12276        );
12277        #[rustfmt::skip]
12278        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12279                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12280                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12281                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12282        assert_eq_m512i(r, e);
12283    }
12284
12285    #[simd_test(enable = "avx512bw,avx512vl")]
12286    unsafe fn test_mm256_mask_add_epi8() {
12287        let a = _mm256_set1_epi8(1);
12288        let b = _mm256_set1_epi8(2);
12289        let r = _mm256_mask_add_epi8(a, 0, a, b);
12290        assert_eq_m256i(r, a);
12291        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12292        #[rustfmt::skip]
12293        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12294                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12295        assert_eq_m256i(r, e);
12296    }
12297
12298    #[simd_test(enable = "avx512bw,avx512vl")]
12299    unsafe fn test_mm256_maskz_add_epi8() {
12300        let a = _mm256_set1_epi8(1);
12301        let b = _mm256_set1_epi8(2);
12302        let r = _mm256_maskz_add_epi8(0, a, b);
12303        assert_eq_m256i(r, _mm256_setzero_si256());
12304        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12305        #[rustfmt::skip]
12306        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12307                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12308        assert_eq_m256i(r, e);
12309    }
12310
12311    #[simd_test(enable = "avx512bw,avx512vl")]
12312    unsafe fn test_mm_mask_add_epi8() {
12313        let a = _mm_set1_epi8(1);
12314        let b = _mm_set1_epi8(2);
12315        let r = _mm_mask_add_epi8(a, 0, a, b);
12316        assert_eq_m128i(r, a);
12317        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12318        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12319        assert_eq_m128i(r, e);
12320    }
12321
12322    #[simd_test(enable = "avx512bw,avx512vl")]
12323    unsafe fn test_mm_maskz_add_epi8() {
12324        let a = _mm_set1_epi8(1);
12325        let b = _mm_set1_epi8(2);
12326        let r = _mm_maskz_add_epi8(0, a, b);
12327        assert_eq_m128i(r, _mm_setzero_si128());
12328        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12329        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12330        assert_eq_m128i(r, e);
12331    }
12332
12333    #[simd_test(enable = "avx512bw")]
12334    unsafe fn test_mm512_adds_epu16() {
12335        let a = _mm512_set1_epi16(1);
12336        let b = _mm512_set1_epi16(u16::MAX as i16);
12337        let r = _mm512_adds_epu16(a, b);
12338        let e = _mm512_set1_epi16(u16::MAX as i16);
12339        assert_eq_m512i(r, e);
12340    }
12341
12342    #[simd_test(enable = "avx512bw")]
12343    unsafe fn test_mm512_mask_adds_epu16() {
12344        let a = _mm512_set1_epi16(1);
12345        let b = _mm512_set1_epi16(u16::MAX as i16);
12346        let r = _mm512_mask_adds_epu16(a, 0, a, b);
12347        assert_eq_m512i(r, a);
12348        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12349        #[rustfmt::skip]
12350        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12351                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12352        assert_eq_m512i(r, e);
12353    }
12354
12355    #[simd_test(enable = "avx512bw")]
12356    unsafe fn test_mm512_maskz_adds_epu16() {
12357        let a = _mm512_set1_epi16(1);
12358        let b = _mm512_set1_epi16(u16::MAX as i16);
12359        let r = _mm512_maskz_adds_epu16(0, a, b);
12360        assert_eq_m512i(r, _mm512_setzero_si512());
12361        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
12362        #[rustfmt::skip]
12363        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12364                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12365        assert_eq_m512i(r, e);
12366    }
12367
12368    #[simd_test(enable = "avx512bw,avx512vl")]
12369    unsafe fn test_mm256_mask_adds_epu16() {
12370        let a = _mm256_set1_epi16(1);
12371        let b = _mm256_set1_epi16(u16::MAX as i16);
12372        let r = _mm256_mask_adds_epu16(a, 0, a, b);
12373        assert_eq_m256i(r, a);
12374        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
12375        #[rustfmt::skip]
12376        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12377        assert_eq_m256i(r, e);
12378    }
12379
12380    #[simd_test(enable = "avx512bw,avx512vl")]
12381    unsafe fn test_mm256_maskz_adds_epu16() {
12382        let a = _mm256_set1_epi16(1);
12383        let b = _mm256_set1_epi16(u16::MAX as i16);
12384        let r = _mm256_maskz_adds_epu16(0, a, b);
12385        assert_eq_m256i(r, _mm256_setzero_si256());
12386        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
12387        #[rustfmt::skip]
12388        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12389        assert_eq_m256i(r, e);
12390    }
12391
12392    #[simd_test(enable = "avx512bw,avx512vl")]
12393    unsafe fn test_mm_mask_adds_epu16() {
12394        let a = _mm_set1_epi16(1);
12395        let b = _mm_set1_epi16(u16::MAX as i16);
12396        let r = _mm_mask_adds_epu16(a, 0, a, b);
12397        assert_eq_m128i(r, a);
12398        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
12399        #[rustfmt::skip]
12400        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12401        assert_eq_m128i(r, e);
12402    }
12403
12404    #[simd_test(enable = "avx512bw,avx512vl")]
12405    unsafe fn test_mm_maskz_adds_epu16() {
12406        let a = _mm_set1_epi16(1);
12407        let b = _mm_set1_epi16(u16::MAX as i16);
12408        let r = _mm_maskz_adds_epu16(0, a, b);
12409        assert_eq_m128i(r, _mm_setzero_si128());
12410        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
12411        #[rustfmt::skip]
12412        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
12413        assert_eq_m128i(r, e);
12414    }
12415
12416    #[simd_test(enable = "avx512bw")]
12417    unsafe fn test_mm512_adds_epu8() {
12418        let a = _mm512_set1_epi8(1);
12419        let b = _mm512_set1_epi8(u8::MAX as i8);
12420        let r = _mm512_adds_epu8(a, b);
12421        let e = _mm512_set1_epi8(u8::MAX as i8);
12422        assert_eq_m512i(r, e);
12423    }
12424
12425    #[simd_test(enable = "avx512bw")]
12426    unsafe fn test_mm512_mask_adds_epu8() {
12427        let a = _mm512_set1_epi8(1);
12428        let b = _mm512_set1_epi8(u8::MAX as i8);
12429        let r = _mm512_mask_adds_epu8(a, 0, a, b);
12430        assert_eq_m512i(r, a);
12431        let r = _mm512_mask_adds_epu8(
12432            a,
12433            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12434            a,
12435            b,
12436        );
12437        #[rustfmt::skip]
12438        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12439                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12440                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12441                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12442        assert_eq_m512i(r, e);
12443    }
12444
12445    #[simd_test(enable = "avx512bw")]
12446    unsafe fn test_mm512_maskz_adds_epu8() {
12447        let a = _mm512_set1_epi8(1);
12448        let b = _mm512_set1_epi8(u8::MAX as i8);
12449        let r = _mm512_maskz_adds_epu8(0, a, b);
12450        assert_eq_m512i(r, _mm512_setzero_si512());
12451        let r = _mm512_maskz_adds_epu8(
12452            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12453            a,
12454            b,
12455        );
12456        #[rustfmt::skip]
12457        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12458                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12459                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12460                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12461        assert_eq_m512i(r, e);
12462    }
12463
12464    #[simd_test(enable = "avx512bw,avx512vl")]
12465    unsafe fn test_mm256_mask_adds_epu8() {
12466        let a = _mm256_set1_epi8(1);
12467        let b = _mm256_set1_epi8(u8::MAX as i8);
12468        let r = _mm256_mask_adds_epu8(a, 0, a, b);
12469        assert_eq_m256i(r, a);
12470        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12471        #[rustfmt::skip]
12472        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12473                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12474        assert_eq_m256i(r, e);
12475    }
12476
12477    #[simd_test(enable = "avx512bw,avx512vl")]
12478    unsafe fn test_mm256_maskz_adds_epu8() {
12479        let a = _mm256_set1_epi8(1);
12480        let b = _mm256_set1_epi8(u8::MAX as i8);
12481        let r = _mm256_maskz_adds_epu8(0, a, b);
12482        assert_eq_m256i(r, _mm256_setzero_si256());
12483        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
12484        #[rustfmt::skip]
12485        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12486                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12487        assert_eq_m256i(r, e);
12488    }
12489
12490    #[simd_test(enable = "avx512bw,avx512vl")]
12491    unsafe fn test_mm_mask_adds_epu8() {
12492        let a = _mm_set1_epi8(1);
12493        let b = _mm_set1_epi8(u8::MAX as i8);
12494        let r = _mm_mask_adds_epu8(a, 0, a, b);
12495        assert_eq_m128i(r, a);
12496        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
12497        #[rustfmt::skip]
12498        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12499        assert_eq_m128i(r, e);
12500    }
12501
12502    #[simd_test(enable = "avx512bw,avx512vl")]
12503    unsafe fn test_mm_maskz_adds_epu8() {
12504        let a = _mm_set1_epi8(1);
12505        let b = _mm_set1_epi8(u8::MAX as i8);
12506        let r = _mm_maskz_adds_epu8(0, a, b);
12507        assert_eq_m128i(r, _mm_setzero_si128());
12508        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
12509        #[rustfmt::skip]
12510        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
12511        assert_eq_m128i(r, e);
12512    }
12513
12514    #[simd_test(enable = "avx512bw")]
12515    unsafe fn test_mm512_adds_epi16() {
12516        let a = _mm512_set1_epi16(1);
12517        let b = _mm512_set1_epi16(i16::MAX);
12518        let r = _mm512_adds_epi16(a, b);
12519        let e = _mm512_set1_epi16(i16::MAX);
12520        assert_eq_m512i(r, e);
12521    }
12522
12523    #[simd_test(enable = "avx512bw")]
12524    unsafe fn test_mm512_mask_adds_epi16() {
12525        let a = _mm512_set1_epi16(1);
12526        let b = _mm512_set1_epi16(i16::MAX);
12527        let r = _mm512_mask_adds_epi16(a, 0, a, b);
12528        assert_eq_m512i(r, a);
12529        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12530        #[rustfmt::skip]
12531        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12532                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12533        assert_eq_m512i(r, e);
12534    }
12535
12536    #[simd_test(enable = "avx512bw")]
12537    unsafe fn test_mm512_maskz_adds_epi16() {
12538        let a = _mm512_set1_epi16(1);
12539        let b = _mm512_set1_epi16(i16::MAX);
12540        let r = _mm512_maskz_adds_epi16(0, a, b);
12541        assert_eq_m512i(r, _mm512_setzero_si512());
12542        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
12543        #[rustfmt::skip]
12544        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12545                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12546        assert_eq_m512i(r, e);
12547    }
12548
12549    #[simd_test(enable = "avx512bw,avx512vl")]
12550    unsafe fn test_mm256_mask_adds_epi16() {
12551        let a = _mm256_set1_epi16(1);
12552        let b = _mm256_set1_epi16(i16::MAX);
12553        let r = _mm256_mask_adds_epi16(a, 0, a, b);
12554        assert_eq_m256i(r, a);
12555        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
12556        #[rustfmt::skip]
12557        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12558        assert_eq_m256i(r, e);
12559    }
12560
12561    #[simd_test(enable = "avx512bw,avx512vl")]
12562    unsafe fn test_mm256_maskz_adds_epi16() {
12563        let a = _mm256_set1_epi16(1);
12564        let b = _mm256_set1_epi16(i16::MAX);
12565        let r = _mm256_maskz_adds_epi16(0, a, b);
12566        assert_eq_m256i(r, _mm256_setzero_si256());
12567        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
12568        #[rustfmt::skip]
12569        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12570        assert_eq_m256i(r, e);
12571    }
12572
12573    #[simd_test(enable = "avx512bw,avx512vl")]
12574    unsafe fn test_mm_mask_adds_epi16() {
12575        let a = _mm_set1_epi16(1);
12576        let b = _mm_set1_epi16(i16::MAX);
12577        let r = _mm_mask_adds_epi16(a, 0, a, b);
12578        assert_eq_m128i(r, a);
12579        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
12580        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12581        assert_eq_m128i(r, e);
12582    }
12583
12584    #[simd_test(enable = "avx512bw,avx512vl")]
12585    unsafe fn test_mm_maskz_adds_epi16() {
12586        let a = _mm_set1_epi16(1);
12587        let b = _mm_set1_epi16(i16::MAX);
12588        let r = _mm_maskz_adds_epi16(0, a, b);
12589        assert_eq_m128i(r, _mm_setzero_si128());
12590        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
12591        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
12592        assert_eq_m128i(r, e);
12593    }
12594
12595    #[simd_test(enable = "avx512bw")]
12596    unsafe fn test_mm512_adds_epi8() {
12597        let a = _mm512_set1_epi8(1);
12598        let b = _mm512_set1_epi8(i8::MAX);
12599        let r = _mm512_adds_epi8(a, b);
12600        let e = _mm512_set1_epi8(i8::MAX);
12601        assert_eq_m512i(r, e);
12602    }
12603
12604    #[simd_test(enable = "avx512bw")]
12605    unsafe fn test_mm512_mask_adds_epi8() {
12606        let a = _mm512_set1_epi8(1);
12607        let b = _mm512_set1_epi8(i8::MAX);
12608        let r = _mm512_mask_adds_epi8(a, 0, a, b);
12609        assert_eq_m512i(r, a);
12610        let r = _mm512_mask_adds_epi8(
12611            a,
12612            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12613            a,
12614            b,
12615        );
12616        #[rustfmt::skip]
12617        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12618                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12619                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12620                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12621        assert_eq_m512i(r, e);
12622    }
12623
12624    #[simd_test(enable = "avx512bw")]
12625    unsafe fn test_mm512_maskz_adds_epi8() {
12626        let a = _mm512_set1_epi8(1);
12627        let b = _mm512_set1_epi8(i8::MAX);
12628        let r = _mm512_maskz_adds_epi8(0, a, b);
12629        assert_eq_m512i(r, _mm512_setzero_si512());
12630        let r = _mm512_maskz_adds_epi8(
12631            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12632            a,
12633            b,
12634        );
12635        #[rustfmt::skip]
12636        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12637                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12638                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12639                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12640        assert_eq_m512i(r, e);
12641    }
12642
12643    #[simd_test(enable = "avx512bw,avx512vl")]
12644    unsafe fn test_mm256_mask_adds_epi8() {
12645        let a = _mm256_set1_epi8(1);
12646        let b = _mm256_set1_epi8(i8::MAX);
12647        let r = _mm256_mask_adds_epi8(a, 0, a, b);
12648        assert_eq_m256i(r, a);
12649        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12650        #[rustfmt::skip]
12651        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12652                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12653        assert_eq_m256i(r, e);
12654    }
12655
12656    #[simd_test(enable = "avx512bw,avx512vl")]
12657    unsafe fn test_mm256_maskz_adds_epi8() {
12658        let a = _mm256_set1_epi8(1);
12659        let b = _mm256_set1_epi8(i8::MAX);
12660        let r = _mm256_maskz_adds_epi8(0, a, b);
12661        assert_eq_m256i(r, _mm256_setzero_si256());
12662        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
12663        #[rustfmt::skip]
12664        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12665                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12666        assert_eq_m256i(r, e);
12667    }
12668
12669    #[simd_test(enable = "avx512bw,avx512vl")]
12670    unsafe fn test_mm_mask_adds_epi8() {
12671        let a = _mm_set1_epi8(1);
12672        let b = _mm_set1_epi8(i8::MAX);
12673        let r = _mm_mask_adds_epi8(a, 0, a, b);
12674        assert_eq_m128i(r, a);
12675        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
12676        #[rustfmt::skip]
12677        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12678        assert_eq_m128i(r, e);
12679    }
12680
12681    #[simd_test(enable = "avx512bw,avx512vl")]
12682    unsafe fn test_mm_maskz_adds_epi8() {
12683        let a = _mm_set1_epi8(1);
12684        let b = _mm_set1_epi8(i8::MAX);
12685        let r = _mm_maskz_adds_epi8(0, a, b);
12686        assert_eq_m128i(r, _mm_setzero_si128());
12687        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
12688        #[rustfmt::skip]
12689        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12690        assert_eq_m128i(r, e);
12691    }
12692
12693    #[simd_test(enable = "avx512bw")]
12694    unsafe fn test_mm512_sub_epi16() {
12695        let a = _mm512_set1_epi16(1);
12696        let b = _mm512_set1_epi16(2);
12697        let r = _mm512_sub_epi16(a, b);
12698        let e = _mm512_set1_epi16(-1);
12699        assert_eq_m512i(r, e);
12700    }
12701
12702    #[simd_test(enable = "avx512bw")]
12703    unsafe fn test_mm512_mask_sub_epi16() {
12704        let a = _mm512_set1_epi16(1);
12705        let b = _mm512_set1_epi16(2);
12706        let r = _mm512_mask_sub_epi16(a, 0, a, b);
12707        assert_eq_m512i(r, a);
12708        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12709        #[rustfmt::skip]
12710        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12711                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12712        assert_eq_m512i(r, e);
12713    }
12714
12715    #[simd_test(enable = "avx512bw")]
12716    unsafe fn test_mm512_maskz_sub_epi16() {
12717        let a = _mm512_set1_epi16(1);
12718        let b = _mm512_set1_epi16(2);
12719        let r = _mm512_maskz_sub_epi16(0, a, b);
12720        assert_eq_m512i(r, _mm512_setzero_si512());
12721        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
12722        #[rustfmt::skip]
12723        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12724                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12725        assert_eq_m512i(r, e);
12726    }
12727
12728    #[simd_test(enable = "avx512bw,avx512vl")]
12729    unsafe fn test_mm256_mask_sub_epi16() {
12730        let a = _mm256_set1_epi16(1);
12731        let b = _mm256_set1_epi16(2);
12732        let r = _mm256_mask_sub_epi16(a, 0, a, b);
12733        assert_eq_m256i(r, a);
12734        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
12735        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12736        assert_eq_m256i(r, e);
12737    }
12738
12739    #[simd_test(enable = "avx512bw,avx512vl")]
12740    unsafe fn test_mm256_maskz_sub_epi16() {
12741        let a = _mm256_set1_epi16(1);
12742        let b = _mm256_set1_epi16(2);
12743        let r = _mm256_maskz_sub_epi16(0, a, b);
12744        assert_eq_m256i(r, _mm256_setzero_si256());
12745        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
12746        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12747        assert_eq_m256i(r, e);
12748    }
12749
12750    #[simd_test(enable = "avx512bw,avx512vl")]
12751    unsafe fn test_mm_mask_sub_epi16() {
12752        let a = _mm_set1_epi16(1);
12753        let b = _mm_set1_epi16(2);
12754        let r = _mm_mask_sub_epi16(a, 0, a, b);
12755        assert_eq_m128i(r, a);
12756        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
12757        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
12758        assert_eq_m128i(r, e);
12759    }
12760
12761    #[simd_test(enable = "avx512bw,avx512vl")]
12762    unsafe fn test_mm_maskz_sub_epi16() {
12763        let a = _mm_set1_epi16(1);
12764        let b = _mm_set1_epi16(2);
12765        let r = _mm_maskz_sub_epi16(0, a, b);
12766        assert_eq_m128i(r, _mm_setzero_si128());
12767        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
12768        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
12769        assert_eq_m128i(r, e);
12770    }
12771
12772    #[simd_test(enable = "avx512bw")]
12773    unsafe fn test_mm512_sub_epi8() {
12774        let a = _mm512_set1_epi8(1);
12775        let b = _mm512_set1_epi8(2);
12776        let r = _mm512_sub_epi8(a, b);
12777        let e = _mm512_set1_epi8(-1);
12778        assert_eq_m512i(r, e);
12779    }
12780
12781    #[simd_test(enable = "avx512bw")]
12782    unsafe fn test_mm512_mask_sub_epi8() {
12783        let a = _mm512_set1_epi8(1);
12784        let b = _mm512_set1_epi8(2);
12785        let r = _mm512_mask_sub_epi8(a, 0, a, b);
12786        assert_eq_m512i(r, a);
12787        let r = _mm512_mask_sub_epi8(
12788            a,
12789            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12790            a,
12791            b,
12792        );
12793        #[rustfmt::skip]
12794        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12795                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12796                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12797                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12798        assert_eq_m512i(r, e);
12799    }
12800
12801    #[simd_test(enable = "avx512bw")]
12802    unsafe fn test_mm512_maskz_sub_epi8() {
12803        let a = _mm512_set1_epi8(1);
12804        let b = _mm512_set1_epi8(2);
12805        let r = _mm512_maskz_sub_epi8(0, a, b);
12806        assert_eq_m512i(r, _mm512_setzero_si512());
12807        let r = _mm512_maskz_sub_epi8(
12808            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12809            a,
12810            b,
12811        );
12812        #[rustfmt::skip]
12813        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12814                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12815                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12816                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12817        assert_eq_m512i(r, e);
12818    }
12819
12820    #[simd_test(enable = "avx512bw,avx512vl")]
12821    unsafe fn test_mm256_mask_sub_epi8() {
12822        let a = _mm256_set1_epi8(1);
12823        let b = _mm256_set1_epi8(2);
12824        let r = _mm256_mask_sub_epi8(a, 0, a, b);
12825        assert_eq_m256i(r, a);
12826        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12827        #[rustfmt::skip]
12828        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12829                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12830        assert_eq_m256i(r, e);
12831    }
12832
12833    #[simd_test(enable = "avx512bw,avx512vl")]
12834    unsafe fn test_mm256_maskz_sub_epi8() {
12835        let a = _mm256_set1_epi8(1);
12836        let b = _mm256_set1_epi8(2);
12837        let r = _mm256_maskz_sub_epi8(0, a, b);
12838        assert_eq_m256i(r, _mm256_setzero_si256());
12839        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
12840        #[rustfmt::skip]
12841        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12842                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12843        assert_eq_m256i(r, e);
12844    }
12845
12846    #[simd_test(enable = "avx512bw,avx512vl")]
12847    unsafe fn test_mm_mask_sub_epi8() {
12848        let a = _mm_set1_epi8(1);
12849        let b = _mm_set1_epi8(2);
12850        let r = _mm_mask_sub_epi8(a, 0, a, b);
12851        assert_eq_m128i(r, a);
12852        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
12853        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12854        assert_eq_m128i(r, e);
12855    }
12856
12857    #[simd_test(enable = "avx512bw,avx512vl")]
12858    unsafe fn test_mm_maskz_sub_epi8() {
12859        let a = _mm_set1_epi8(1);
12860        let b = _mm_set1_epi8(2);
12861        let r = _mm_maskz_sub_epi8(0, a, b);
12862        assert_eq_m128i(r, _mm_setzero_si128());
12863        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
12864        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12865        assert_eq_m128i(r, e);
12866    }
12867
12868    #[simd_test(enable = "avx512bw")]
12869    unsafe fn test_mm512_subs_epu16() {
12870        let a = _mm512_set1_epi16(1);
12871        let b = _mm512_set1_epi16(u16::MAX as i16);
12872        let r = _mm512_subs_epu16(a, b);
12873        let e = _mm512_set1_epi16(0);
12874        assert_eq_m512i(r, e);
12875    }
12876
12877    #[simd_test(enable = "avx512bw")]
12878    unsafe fn test_mm512_mask_subs_epu16() {
12879        let a = _mm512_set1_epi16(1);
12880        let b = _mm512_set1_epi16(u16::MAX as i16);
12881        let r = _mm512_mask_subs_epu16(a, 0, a, b);
12882        assert_eq_m512i(r, a);
12883        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12884        #[rustfmt::skip]
12885        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12886                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12887        assert_eq_m512i(r, e);
12888    }
12889
12890    #[simd_test(enable = "avx512bw")]
12891    unsafe fn test_mm512_maskz_subs_epu16() {
12892        let a = _mm512_set1_epi16(1);
12893        let b = _mm512_set1_epi16(u16::MAX as i16);
12894        let r = _mm512_maskz_subs_epu16(0, a, b);
12895        assert_eq_m512i(r, _mm512_setzero_si512());
12896        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
12897        #[rustfmt::skip]
12898        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12899                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12900        assert_eq_m512i(r, e);
12901    }
12902
12903    #[simd_test(enable = "avx512bw,avx512vl")]
12904    unsafe fn test_mm256_mask_subs_epu16() {
12905        let a = _mm256_set1_epi16(1);
12906        let b = _mm256_set1_epi16(u16::MAX as i16);
12907        let r = _mm256_mask_subs_epu16(a, 0, a, b);
12908        assert_eq_m256i(r, a);
12909        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
12910        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12911        assert_eq_m256i(r, e);
12912    }
12913
12914    #[simd_test(enable = "avx512bw,avx512vl")]
12915    unsafe fn test_mm256_maskz_subs_epu16() {
12916        let a = _mm256_set1_epi16(1);
12917        let b = _mm256_set1_epi16(u16::MAX as i16);
12918        let r = _mm256_maskz_subs_epu16(0, a, b);
12919        assert_eq_m256i(r, _mm256_setzero_si256());
12920        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
12921        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12922        assert_eq_m256i(r, e);
12923    }
12924
12925    #[simd_test(enable = "avx512bw,avx512vl")]
12926    unsafe fn test_mm_mask_subs_epu16() {
12927        let a = _mm_set1_epi16(1);
12928        let b = _mm_set1_epi16(u16::MAX as i16);
12929        let r = _mm_mask_subs_epu16(a, 0, a, b);
12930        assert_eq_m128i(r, a);
12931        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
12932        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12933        assert_eq_m128i(r, e);
12934    }
12935
12936    #[simd_test(enable = "avx512bw,avx512vl")]
12937    unsafe fn test_mm_maskz_subs_epu16() {
12938        let a = _mm_set1_epi16(1);
12939        let b = _mm_set1_epi16(u16::MAX as i16);
12940        let r = _mm_maskz_subs_epu16(0, a, b);
12941        assert_eq_m128i(r, _mm_setzero_si128());
12942        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
12943        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12944        assert_eq_m128i(r, e);
12945    }
12946
12947    #[simd_test(enable = "avx512bw")]
12948    unsafe fn test_mm512_subs_epu8() {
12949        let a = _mm512_set1_epi8(1);
12950        let b = _mm512_set1_epi8(u8::MAX as i8);
12951        let r = _mm512_subs_epu8(a, b);
12952        let e = _mm512_set1_epi8(0);
12953        assert_eq_m512i(r, e);
12954    }
12955
12956    #[simd_test(enable = "avx512bw")]
12957    unsafe fn test_mm512_mask_subs_epu8() {
12958        let a = _mm512_set1_epi8(1);
12959        let b = _mm512_set1_epi8(u8::MAX as i8);
12960        let r = _mm512_mask_subs_epu8(a, 0, a, b);
12961        assert_eq_m512i(r, a);
12962        let r = _mm512_mask_subs_epu8(
12963            a,
12964            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12965            a,
12966            b,
12967        );
12968        #[rustfmt::skip]
12969        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12970                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12971                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12972                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12973        assert_eq_m512i(r, e);
12974    }
12975
12976    #[simd_test(enable = "avx512bw")]
12977    unsafe fn test_mm512_maskz_subs_epu8() {
12978        let a = _mm512_set1_epi8(1);
12979        let b = _mm512_set1_epi8(u8::MAX as i8);
12980        let r = _mm512_maskz_subs_epu8(0, a, b);
12981        assert_eq_m512i(r, _mm512_setzero_si512());
12982        let r = _mm512_maskz_subs_epu8(
12983            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12984            a,
12985            b,
12986        );
12987        #[rustfmt::skip]
12988        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12989                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12990                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12991                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12992        assert_eq_m512i(r, e);
12993    }
12994
12995    #[simd_test(enable = "avx512bw,avx512vl")]
12996    unsafe fn test_mm256_mask_subs_epu8() {
12997        let a = _mm256_set1_epi8(1);
12998        let b = _mm256_set1_epi8(u8::MAX as i8);
12999        let r = _mm256_mask_subs_epu8(a, 0, a, b);
13000        assert_eq_m256i(r, a);
13001        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13002        #[rustfmt::skip]
13003        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13004                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13005        assert_eq_m256i(r, e);
13006    }
13007
13008    #[simd_test(enable = "avx512bw,avx512vl")]
13009    unsafe fn test_mm256_maskz_subs_epu8() {
13010        let a = _mm256_set1_epi8(1);
13011        let b = _mm256_set1_epi8(u8::MAX as i8);
13012        let r = _mm256_maskz_subs_epu8(0, a, b);
13013        assert_eq_m256i(r, _mm256_setzero_si256());
13014        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
13015        #[rustfmt::skip]
13016        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13017                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13018        assert_eq_m256i(r, e);
13019    }
13020
13021    #[simd_test(enable = "avx512bw,avx512vl")]
13022    unsafe fn test_mm_mask_subs_epu8() {
13023        let a = _mm_set1_epi8(1);
13024        let b = _mm_set1_epi8(u8::MAX as i8);
13025        let r = _mm_mask_subs_epu8(a, 0, a, b);
13026        assert_eq_m128i(r, a);
13027        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
13028        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13029        assert_eq_m128i(r, e);
13030    }
13031
13032    #[simd_test(enable = "avx512bw,avx512vl")]
13033    unsafe fn test_mm_maskz_subs_epu8() {
13034        let a = _mm_set1_epi8(1);
13035        let b = _mm_set1_epi8(u8::MAX as i8);
13036        let r = _mm_maskz_subs_epu8(0, a, b);
13037        assert_eq_m128i(r, _mm_setzero_si128());
13038        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
13039        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13040        assert_eq_m128i(r, e);
13041    }
13042
13043    #[simd_test(enable = "avx512bw")]
13044    unsafe fn test_mm512_subs_epi16() {
13045        let a = _mm512_set1_epi16(-1);
13046        let b = _mm512_set1_epi16(i16::MAX);
13047        let r = _mm512_subs_epi16(a, b);
13048        let e = _mm512_set1_epi16(i16::MIN);
13049        assert_eq_m512i(r, e);
13050    }
13051
13052    #[simd_test(enable = "avx512bw")]
13053    unsafe fn test_mm512_mask_subs_epi16() {
13054        let a = _mm512_set1_epi16(-1);
13055        let b = _mm512_set1_epi16(i16::MAX);
13056        let r = _mm512_mask_subs_epi16(a, 0, a, b);
13057        assert_eq_m512i(r, a);
13058        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13059        #[rustfmt::skip]
13060        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13061                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13062        assert_eq_m512i(r, e);
13063    }
13064
13065    #[simd_test(enable = "avx512bw")]
13066    unsafe fn test_mm512_maskz_subs_epi16() {
13067        let a = _mm512_set1_epi16(-1);
13068        let b = _mm512_set1_epi16(i16::MAX);
13069        let r = _mm512_maskz_subs_epi16(0, a, b);
13070        assert_eq_m512i(r, _mm512_setzero_si512());
13071        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13072        #[rustfmt::skip]
13073        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13074                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13075        assert_eq_m512i(r, e);
13076    }
13077
13078    #[simd_test(enable = "avx512bw,avx512vl")]
13079    unsafe fn test_mm256_mask_subs_epi16() {
13080        let a = _mm256_set1_epi16(-1);
13081        let b = _mm256_set1_epi16(i16::MAX);
13082        let r = _mm256_mask_subs_epi16(a, 0, a, b);
13083        assert_eq_m256i(r, a);
13084        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
13085        #[rustfmt::skip]
13086        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13087        assert_eq_m256i(r, e);
13088    }
13089
13090    #[simd_test(enable = "avx512bw,avx512vl")]
13091    unsafe fn test_mm256_maskz_subs_epi16() {
13092        let a = _mm256_set1_epi16(-1);
13093        let b = _mm256_set1_epi16(i16::MAX);
13094        let r = _mm256_maskz_subs_epi16(0, a, b);
13095        assert_eq_m256i(r, _mm256_setzero_si256());
13096        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
13097        #[rustfmt::skip]
13098        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13099        assert_eq_m256i(r, e);
13100    }
13101
13102    #[simd_test(enable = "avx512bw,avx512vl")]
13103    unsafe fn test_mm_mask_subs_epi16() {
13104        let a = _mm_set1_epi16(-1);
13105        let b = _mm_set1_epi16(i16::MAX);
13106        let r = _mm_mask_subs_epi16(a, 0, a, b);
13107        assert_eq_m128i(r, a);
13108        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
13109        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13110        assert_eq_m128i(r, e);
13111    }
13112
13113    #[simd_test(enable = "avx512bw,avx512vl")]
13114    unsafe fn test_mm_maskz_subs_epi16() {
13115        let a = _mm_set1_epi16(-1);
13116        let b = _mm_set1_epi16(i16::MAX);
13117        let r = _mm_maskz_subs_epi16(0, a, b);
13118        assert_eq_m128i(r, _mm_setzero_si128());
13119        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
13120        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13121        assert_eq_m128i(r, e);
13122    }
13123
13124    #[simd_test(enable = "avx512bw")]
13125    unsafe fn test_mm512_subs_epi8() {
13126        let a = _mm512_set1_epi8(-1);
13127        let b = _mm512_set1_epi8(i8::MAX);
13128        let r = _mm512_subs_epi8(a, b);
13129        let e = _mm512_set1_epi8(i8::MIN);
13130        assert_eq_m512i(r, e);
13131    }
13132
13133    #[simd_test(enable = "avx512bw")]
13134    unsafe fn test_mm512_mask_subs_epi8() {
13135        let a = _mm512_set1_epi8(-1);
13136        let b = _mm512_set1_epi8(i8::MAX);
13137        let r = _mm512_mask_subs_epi8(a, 0, a, b);
13138        assert_eq_m512i(r, a);
13139        let r = _mm512_mask_subs_epi8(
13140            a,
13141            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13142            a,
13143            b,
13144        );
13145        #[rustfmt::skip]
13146        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13147                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13148                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13149                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13150        assert_eq_m512i(r, e);
13151    }
13152
13153    #[simd_test(enable = "avx512bw")]
13154    unsafe fn test_mm512_maskz_subs_epi8() {
13155        let a = _mm512_set1_epi8(-1);
13156        let b = _mm512_set1_epi8(i8::MAX);
13157        let r = _mm512_maskz_subs_epi8(0, a, b);
13158        assert_eq_m512i(r, _mm512_setzero_si512());
13159        let r = _mm512_maskz_subs_epi8(
13160            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13161            a,
13162            b,
13163        );
13164        #[rustfmt::skip]
13165        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13166                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13167                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13168                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13169        assert_eq_m512i(r, e);
13170    }
13171
13172    #[simd_test(enable = "avx512bw,avx512vl")]
13173    unsafe fn test_mm256_mask_subs_epi8() {
13174        let a = _mm256_set1_epi8(-1);
13175        let b = _mm256_set1_epi8(i8::MAX);
13176        let r = _mm256_mask_subs_epi8(a, 0, a, b);
13177        assert_eq_m256i(r, a);
13178        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13179        #[rustfmt::skip]
13180        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13181                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13182        assert_eq_m256i(r, e);
13183    }
13184
13185    #[simd_test(enable = "avx512bw,avx512vl")]
13186    unsafe fn test_mm256_maskz_subs_epi8() {
13187        let a = _mm256_set1_epi8(-1);
13188        let b = _mm256_set1_epi8(i8::MAX);
13189        let r = _mm256_maskz_subs_epi8(0, a, b);
13190        assert_eq_m256i(r, _mm256_setzero_si256());
13191        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
13192        #[rustfmt::skip]
13193        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13194                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13195        assert_eq_m256i(r, e);
13196    }
13197
13198    #[simd_test(enable = "avx512bw,avx512vl")]
13199    unsafe fn test_mm_mask_subs_epi8() {
13200        let a = _mm_set1_epi8(-1);
13201        let b = _mm_set1_epi8(i8::MAX);
13202        let r = _mm_mask_subs_epi8(a, 0, a, b);
13203        assert_eq_m128i(r, a);
13204        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
13205        #[rustfmt::skip]
13206        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13207        assert_eq_m128i(r, e);
13208    }
13209
13210    #[simd_test(enable = "avx512bw,avx512vl")]
13211    unsafe fn test_mm_maskz_subs_epi8() {
13212        let a = _mm_set1_epi8(-1);
13213        let b = _mm_set1_epi8(i8::MAX);
13214        let r = _mm_maskz_subs_epi8(0, a, b);
13215        assert_eq_m128i(r, _mm_setzero_si128());
13216        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
13217        #[rustfmt::skip]
13218        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13219        assert_eq_m128i(r, e);
13220    }
13221
13222    #[simd_test(enable = "avx512bw")]
13223    unsafe fn test_mm512_mulhi_epu16() {
13224        let a = _mm512_set1_epi16(1);
13225        let b = _mm512_set1_epi16(1);
13226        let r = _mm512_mulhi_epu16(a, b);
13227        let e = _mm512_set1_epi16(0);
13228        assert_eq_m512i(r, e);
13229    }
13230
13231    #[simd_test(enable = "avx512bw")]
13232    unsafe fn test_mm512_mask_mulhi_epu16() {
13233        let a = _mm512_set1_epi16(1);
13234        let b = _mm512_set1_epi16(1);
13235        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13236        assert_eq_m512i(r, a);
13237        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13238        #[rustfmt::skip]
13239        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13240                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13241        assert_eq_m512i(r, e);
13242    }
13243
13244    #[simd_test(enable = "avx512bw")]
13245    unsafe fn test_mm512_maskz_mulhi_epu16() {
13246        let a = _mm512_set1_epi16(1);
13247        let b = _mm512_set1_epi16(1);
13248        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13249        assert_eq_m512i(r, _mm512_setzero_si512());
13250        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13251        #[rustfmt::skip]
13252        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13253                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13254        assert_eq_m512i(r, e);
13255    }
13256
13257    #[simd_test(enable = "avx512bw,avx512vl")]
13258    unsafe fn test_mm256_mask_mulhi_epu16() {
13259        let a = _mm256_set1_epi16(1);
13260        let b = _mm256_set1_epi16(1);
13261        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13262        assert_eq_m256i(r, a);
13263        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13264        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13265        assert_eq_m256i(r, e);
13266    }
13267
13268    #[simd_test(enable = "avx512bw,avx512vl")]
13269    unsafe fn test_mm256_maskz_mulhi_epu16() {
13270        let a = _mm256_set1_epi16(1);
13271        let b = _mm256_set1_epi16(1);
13272        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13273        assert_eq_m256i(r, _mm256_setzero_si256());
13274        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13275        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13276        assert_eq_m256i(r, e);
13277    }
13278
13279    #[simd_test(enable = "avx512bw,avx512vl")]
13280    unsafe fn test_mm_mask_mulhi_epu16() {
13281        let a = _mm_set1_epi16(1);
13282        let b = _mm_set1_epi16(1);
13283        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13284        assert_eq_m128i(r, a);
13285        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13286        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13287        assert_eq_m128i(r, e);
13288    }
13289
13290    #[simd_test(enable = "avx512bw,avx512vl")]
13291    unsafe fn test_mm_maskz_mulhi_epu16() {
13292        let a = _mm_set1_epi16(1);
13293        let b = _mm_set1_epi16(1);
13294        let r = _mm_maskz_mulhi_epu16(0, a, b);
13295        assert_eq_m128i(r, _mm_setzero_si128());
13296        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13297        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13298        assert_eq_m128i(r, e);
13299    }
13300
13301    #[simd_test(enable = "avx512bw")]
13302    unsafe fn test_mm512_mulhi_epi16() {
13303        let a = _mm512_set1_epi16(1);
13304        let b = _mm512_set1_epi16(1);
13305        let r = _mm512_mulhi_epi16(a, b);
13306        let e = _mm512_set1_epi16(0);
13307        assert_eq_m512i(r, e);
13308    }
13309
13310    #[simd_test(enable = "avx512bw")]
13311    unsafe fn test_mm512_mask_mulhi_epi16() {
13312        let a = _mm512_set1_epi16(1);
13313        let b = _mm512_set1_epi16(1);
13314        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13315        assert_eq_m512i(r, a);
13316        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13317        #[rustfmt::skip]
13318        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13319                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13320        assert_eq_m512i(r, e);
13321    }
13322
13323    #[simd_test(enable = "avx512bw")]
13324    unsafe fn test_mm512_maskz_mulhi_epi16() {
13325        let a = _mm512_set1_epi16(1);
13326        let b = _mm512_set1_epi16(1);
13327        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13328        assert_eq_m512i(r, _mm512_setzero_si512());
13329        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13330        #[rustfmt::skip]
13331        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13332                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13333        assert_eq_m512i(r, e);
13334    }
13335
13336    #[simd_test(enable = "avx512bw,avx512vl")]
13337    unsafe fn test_mm256_mask_mulhi_epi16() {
13338        let a = _mm256_set1_epi16(1);
13339        let b = _mm256_set1_epi16(1);
13340        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
13341        assert_eq_m256i(r, a);
13342        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
13343        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13344        assert_eq_m256i(r, e);
13345    }
13346
13347    #[simd_test(enable = "avx512bw,avx512vl")]
13348    unsafe fn test_mm256_maskz_mulhi_epi16() {
13349        let a = _mm256_set1_epi16(1);
13350        let b = _mm256_set1_epi16(1);
13351        let r = _mm256_maskz_mulhi_epi16(0, a, b);
13352        assert_eq_m256i(r, _mm256_setzero_si256());
13353        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
13354        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13355        assert_eq_m256i(r, e);
13356    }
13357
13358    #[simd_test(enable = "avx512bw,avx512vl")]
13359    unsafe fn test_mm_mask_mulhi_epi16() {
13360        let a = _mm_set1_epi16(1);
13361        let b = _mm_set1_epi16(1);
13362        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
13363        assert_eq_m128i(r, a);
13364        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
13365        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13366        assert_eq_m128i(r, e);
13367    }
13368
13369    #[simd_test(enable = "avx512bw,avx512vl")]
13370    unsafe fn test_mm_maskz_mulhi_epi16() {
13371        let a = _mm_set1_epi16(1);
13372        let b = _mm_set1_epi16(1);
13373        let r = _mm_maskz_mulhi_epi16(0, a, b);
13374        assert_eq_m128i(r, _mm_setzero_si128());
13375        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
13376        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13377        assert_eq_m128i(r, e);
13378    }
13379
13380    #[simd_test(enable = "avx512bw")]
13381    unsafe fn test_mm512_mulhrs_epi16() {
13382        let a = _mm512_set1_epi16(1);
13383        let b = _mm512_set1_epi16(1);
13384        let r = _mm512_mulhrs_epi16(a, b);
13385        let e = _mm512_set1_epi16(0);
13386        assert_eq_m512i(r, e);
13387    }
13388
13389    #[simd_test(enable = "avx512bw")]
13390    unsafe fn test_mm512_mask_mulhrs_epi16() {
13391        let a = _mm512_set1_epi16(1);
13392        let b = _mm512_set1_epi16(1);
13393        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
13394        assert_eq_m512i(r, a);
13395        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13396        #[rustfmt::skip]
13397        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13398                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13399        assert_eq_m512i(r, e);
13400    }
13401
13402    #[simd_test(enable = "avx512bw")]
13403    unsafe fn test_mm512_maskz_mulhrs_epi16() {
13404        let a = _mm512_set1_epi16(1);
13405        let b = _mm512_set1_epi16(1);
13406        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
13407        assert_eq_m512i(r, _mm512_setzero_si512());
13408        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13409        #[rustfmt::skip]
13410        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13411                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13412        assert_eq_m512i(r, e);
13413    }
13414
13415    #[simd_test(enable = "avx512bw,avx512vl")]
13416    unsafe fn test_mm256_mask_mulhrs_epi16() {
13417        let a = _mm256_set1_epi16(1);
13418        let b = _mm256_set1_epi16(1);
13419        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
13420        assert_eq_m256i(r, a);
13421        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
13422        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13423        assert_eq_m256i(r, e);
13424    }
13425
13426    #[simd_test(enable = "avx512bw,avx512vl")]
13427    unsafe fn test_mm256_maskz_mulhrs_epi16() {
13428        let a = _mm256_set1_epi16(1);
13429        let b = _mm256_set1_epi16(1);
13430        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
13431        assert_eq_m256i(r, _mm256_setzero_si256());
13432        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
13433        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13434        assert_eq_m256i(r, e);
13435    }
13436
13437    #[simd_test(enable = "avx512bw,avx512vl")]
13438    unsafe fn test_mm_mask_mulhrs_epi16() {
13439        let a = _mm_set1_epi16(1);
13440        let b = _mm_set1_epi16(1);
13441        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
13442        assert_eq_m128i(r, a);
13443        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
13444        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13445        assert_eq_m128i(r, e);
13446    }
13447
13448    #[simd_test(enable = "avx512bw,avx512vl")]
13449    unsafe fn test_mm_maskz_mulhrs_epi16() {
13450        let a = _mm_set1_epi16(1);
13451        let b = _mm_set1_epi16(1);
13452        let r = _mm_maskz_mulhrs_epi16(0, a, b);
13453        assert_eq_m128i(r, _mm_setzero_si128());
13454        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
13455        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13456        assert_eq_m128i(r, e);
13457    }
13458
13459    #[simd_test(enable = "avx512bw")]
13460    unsafe fn test_mm512_mullo_epi16() {
13461        let a = _mm512_set1_epi16(1);
13462        let b = _mm512_set1_epi16(1);
13463        let r = _mm512_mullo_epi16(a, b);
13464        let e = _mm512_set1_epi16(1);
13465        assert_eq_m512i(r, e);
13466    }
13467
13468    #[simd_test(enable = "avx512bw")]
13469    unsafe fn test_mm512_mask_mullo_epi16() {
13470        let a = _mm512_set1_epi16(1);
13471        let b = _mm512_set1_epi16(1);
13472        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
13473        assert_eq_m512i(r, a);
13474        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13475        #[rustfmt::skip]
13476        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13477                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13478        assert_eq_m512i(r, e);
13479    }
13480
13481    #[simd_test(enable = "avx512bw")]
13482    unsafe fn test_mm512_maskz_mullo_epi16() {
13483        let a = _mm512_set1_epi16(1);
13484        let b = _mm512_set1_epi16(1);
13485        let r = _mm512_maskz_mullo_epi16(0, a, b);
13486        assert_eq_m512i(r, _mm512_setzero_si512());
13487        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
13488        #[rustfmt::skip]
13489        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13490                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13491        assert_eq_m512i(r, e);
13492    }
13493
13494    #[simd_test(enable = "avx512bw,avx512vl")]
13495    unsafe fn test_mm256_mask_mullo_epi16() {
13496        let a = _mm256_set1_epi16(1);
13497        let b = _mm256_set1_epi16(1);
13498        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
13499        assert_eq_m256i(r, a);
13500        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
13501        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
13502        assert_eq_m256i(r, e);
13503    }
13504
13505    #[simd_test(enable = "avx512bw,avx512vl")]
13506    unsafe fn test_mm256_maskz_mullo_epi16() {
13507        let a = _mm256_set1_epi16(1);
13508        let b = _mm256_set1_epi16(1);
13509        let r = _mm256_maskz_mullo_epi16(0, a, b);
13510        assert_eq_m256i(r, _mm256_setzero_si256());
13511        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
13512        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
13513        assert_eq_m256i(r, e);
13514    }
13515
13516    #[simd_test(enable = "avx512bw,avx512vl")]
13517    unsafe fn test_mm_mask_mullo_epi16() {
13518        let a = _mm_set1_epi16(1);
13519        let b = _mm_set1_epi16(1);
13520        let r = _mm_mask_mullo_epi16(a, 0, a, b);
13521        assert_eq_m128i(r, a);
13522        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
13523        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
13524        assert_eq_m128i(r, e);
13525    }
13526
13527    #[simd_test(enable = "avx512bw,avx512vl")]
13528    unsafe fn test_mm_maskz_mullo_epi16() {
13529        let a = _mm_set1_epi16(1);
13530        let b = _mm_set1_epi16(1);
13531        let r = _mm_maskz_mullo_epi16(0, a, b);
13532        assert_eq_m128i(r, _mm_setzero_si128());
13533        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
13534        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
13535        assert_eq_m128i(r, e);
13536    }
13537
13538    #[simd_test(enable = "avx512bw")]
13539    unsafe fn test_mm512_max_epu16() {
13540        #[rustfmt::skip]
13541        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13542                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13543        #[rustfmt::skip]
13544        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13545                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13546        let r = _mm512_max_epu16(a, b);
13547        #[rustfmt::skip]
13548        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13549                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13550        assert_eq_m512i(r, e);
13551    }
13552
13553    #[simd_test(enable = "avx512f")]
13554    unsafe fn test_mm512_mask_max_epu16() {
13555        #[rustfmt::skip]
13556        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13557                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13558        #[rustfmt::skip]
13559        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13560                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13561        let r = _mm512_mask_max_epu16(a, 0, a, b);
13562        assert_eq_m512i(r, a);
13563        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13564        #[rustfmt::skip]
13565        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13566                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13567        assert_eq_m512i(r, e);
13568    }
13569
13570    #[simd_test(enable = "avx512f")]
13571    unsafe fn test_mm512_maskz_max_epu16() {
13572        #[rustfmt::skip]
13573        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13574                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13575        #[rustfmt::skip]
13576        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13577                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13578        let r = _mm512_maskz_max_epu16(0, a, b);
13579        assert_eq_m512i(r, _mm512_setzero_si512());
13580        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
13581        #[rustfmt::skip]
13582        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13583                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13584        assert_eq_m512i(r, e);
13585    }
13586
13587    #[simd_test(enable = "avx512f,avx512vl")]
13588    unsafe fn test_mm256_mask_max_epu16() {
13589        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13590        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13591        let r = _mm256_mask_max_epu16(a, 0, a, b);
13592        assert_eq_m256i(r, a);
13593        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
13594        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13595        assert_eq_m256i(r, e);
13596    }
13597
13598    #[simd_test(enable = "avx512f,avx512vl")]
13599    unsafe fn test_mm256_maskz_max_epu16() {
13600        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13601        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13602        let r = _mm256_maskz_max_epu16(0, a, b);
13603        assert_eq_m256i(r, _mm256_setzero_si256());
13604        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
13605        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13606        assert_eq_m256i(r, e);
13607    }
13608
13609    #[simd_test(enable = "avx512f,avx512vl")]
13610    unsafe fn test_mm_mask_max_epu16() {
13611        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13612        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13613        let r = _mm_mask_max_epu16(a, 0, a, b);
13614        assert_eq_m128i(r, a);
13615        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
13616        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13617        assert_eq_m128i(r, e);
13618    }
13619
13620    #[simd_test(enable = "avx512f,avx512vl")]
13621    unsafe fn test_mm_maskz_max_epu16() {
13622        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13623        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13624        let r = _mm_maskz_max_epu16(0, a, b);
13625        assert_eq_m128i(r, _mm_setzero_si128());
13626        let r = _mm_maskz_max_epu16(0b00001111, a, b);
13627        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13628        assert_eq_m128i(r, e);
13629    }
13630
13631    #[simd_test(enable = "avx512bw")]
13632    unsafe fn test_mm512_max_epu8() {
13633        #[rustfmt::skip]
13634        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13635                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13636                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13637                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13638        #[rustfmt::skip]
13639        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13640                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13641                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13642                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13643        let r = _mm512_max_epu8(a, b);
13644        #[rustfmt::skip]
13645        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13646                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13647                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13648                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13649        assert_eq_m512i(r, e);
13650    }
13651
13652    #[simd_test(enable = "avx512f")]
13653    unsafe fn test_mm512_mask_max_epu8() {
13654        #[rustfmt::skip]
13655        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13656                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13657                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13658                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13659        #[rustfmt::skip]
13660        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13661                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13662                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13663                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13664        let r = _mm512_mask_max_epu8(a, 0, a, b);
13665        assert_eq_m512i(r, a);
13666        let r = _mm512_mask_max_epu8(
13667            a,
13668            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13669            a,
13670            b,
13671        );
13672        #[rustfmt::skip]
13673        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13674                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13675                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13676                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13677        assert_eq_m512i(r, e);
13678    }
13679
13680    #[simd_test(enable = "avx512f")]
13681    unsafe fn test_mm512_maskz_max_epu8() {
13682        #[rustfmt::skip]
13683        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13684                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13685                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13686                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13687        #[rustfmt::skip]
13688        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13689                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13690                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13691                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13692        let r = _mm512_maskz_max_epu8(0, a, b);
13693        assert_eq_m512i(r, _mm512_setzero_si512());
13694        let r = _mm512_maskz_max_epu8(
13695            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13696            a,
13697            b,
13698        );
13699        #[rustfmt::skip]
13700        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13701                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13702                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13703                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13704        assert_eq_m512i(r, e);
13705    }
13706
13707    #[simd_test(enable = "avx512f,avx512vl")]
13708    unsafe fn test_mm256_mask_max_epu8() {
13709        #[rustfmt::skip]
13710        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13711                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13712        #[rustfmt::skip]
13713        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13714                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13715        let r = _mm256_mask_max_epu8(a, 0, a, b);
13716        assert_eq_m256i(r, a);
13717        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13718        #[rustfmt::skip]
13719        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13720                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13721        assert_eq_m256i(r, e);
13722    }
13723
13724    #[simd_test(enable = "avx512f,avx512vl")]
13725    unsafe fn test_mm256_maskz_max_epu8() {
13726        #[rustfmt::skip]
13727        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13728                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13729        #[rustfmt::skip]
13730        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13731                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13732        let r = _mm256_maskz_max_epu8(0, a, b);
13733        assert_eq_m256i(r, _mm256_setzero_si256());
13734        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
13735        #[rustfmt::skip]
13736        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13737                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13738        assert_eq_m256i(r, e);
13739    }
13740
13741    #[simd_test(enable = "avx512f,avx512vl")]
13742    unsafe fn test_mm_mask_max_epu8() {
13743        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13744        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13745        let r = _mm_mask_max_epu8(a, 0, a, b);
13746        assert_eq_m128i(r, a);
13747        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
13748        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13749        assert_eq_m128i(r, e);
13750    }
13751
13752    #[simd_test(enable = "avx512f,avx512vl")]
13753    unsafe fn test_mm_maskz_max_epu8() {
13754        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13755        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13756        let r = _mm_maskz_max_epu8(0, a, b);
13757        assert_eq_m128i(r, _mm_setzero_si128());
13758        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
13759        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13760        assert_eq_m128i(r, e);
13761    }
13762
13763    #[simd_test(enable = "avx512bw")]
13764    unsafe fn test_mm512_max_epi16() {
13765        #[rustfmt::skip]
13766        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13767                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13768        #[rustfmt::skip]
13769        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13770                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13771        let r = _mm512_max_epi16(a, b);
13772        #[rustfmt::skip]
13773        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13774                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13775        assert_eq_m512i(r, e);
13776    }
13777
13778    #[simd_test(enable = "avx512f")]
13779    unsafe fn test_mm512_mask_max_epi16() {
13780        #[rustfmt::skip]
13781        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13782                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13783        #[rustfmt::skip]
13784        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13785                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13786        let r = _mm512_mask_max_epi16(a, 0, a, b);
13787        assert_eq_m512i(r, a);
13788        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13789        #[rustfmt::skip]
13790        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13791                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13792        assert_eq_m512i(r, e);
13793    }
13794
13795    #[simd_test(enable = "avx512f")]
13796    unsafe fn test_mm512_maskz_max_epi16() {
13797        #[rustfmt::skip]
13798        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13799                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13800        #[rustfmt::skip]
13801        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13802                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13803        let r = _mm512_maskz_max_epi16(0, a, b);
13804        assert_eq_m512i(r, _mm512_setzero_si512());
13805        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
13806        #[rustfmt::skip]
13807        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13808                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13809        assert_eq_m512i(r, e);
13810    }
13811
13812    #[simd_test(enable = "avx512f,avx512vl")]
13813    unsafe fn test_mm256_mask_max_epi16() {
13814        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13815        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13816        let r = _mm256_mask_max_epi16(a, 0, a, b);
13817        assert_eq_m256i(r, a);
13818        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
13819        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13820        assert_eq_m256i(r, e);
13821    }
13822
13823    #[simd_test(enable = "avx512f,avx512vl")]
13824    unsafe fn test_mm256_maskz_max_epi16() {
13825        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13826        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13827        let r = _mm256_maskz_max_epi16(0, a, b);
13828        assert_eq_m256i(r, _mm256_setzero_si256());
13829        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
13830        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13831        assert_eq_m256i(r, e);
13832    }
13833
13834    #[simd_test(enable = "avx512f,avx512vl")]
13835    unsafe fn test_mm_mask_max_epi16() {
13836        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13837        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13838        let r = _mm_mask_max_epi16(a, 0, a, b);
13839        assert_eq_m128i(r, a);
13840        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
13841        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13842        assert_eq_m128i(r, e);
13843    }
13844
13845    #[simd_test(enable = "avx512f,avx512vl")]
13846    unsafe fn test_mm_maskz_max_epi16() {
13847        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13848        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13849        let r = _mm_maskz_max_epi16(0, a, b);
13850        assert_eq_m128i(r, _mm_setzero_si128());
13851        let r = _mm_maskz_max_epi16(0b00001111, a, b);
13852        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13853        assert_eq_m128i(r, e);
13854    }
13855
13856    #[simd_test(enable = "avx512bw")]
13857    unsafe fn test_mm512_max_epi8() {
13858        #[rustfmt::skip]
13859        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13860                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13861                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13862                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13863        #[rustfmt::skip]
13864        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13865                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13866                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13867                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13868        let r = _mm512_max_epi8(a, b);
13869        #[rustfmt::skip]
13870        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13871                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13872                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13873                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13874        assert_eq_m512i(r, e);
13875    }
13876
13877    #[simd_test(enable = "avx512f")]
13878    unsafe fn test_mm512_mask_max_epi8() {
13879        #[rustfmt::skip]
13880        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13881                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13882                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13883                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13884        #[rustfmt::skip]
13885        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13886                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13887                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13888                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13889        let r = _mm512_mask_max_epi8(a, 0, a, b);
13890        assert_eq_m512i(r, a);
13891        let r = _mm512_mask_max_epi8(
13892            a,
13893            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13894            a,
13895            b,
13896        );
13897        #[rustfmt::skip]
13898        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13899                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13900                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13901                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13902        assert_eq_m512i(r, e);
13903    }
13904
13905    #[simd_test(enable = "avx512f")]
13906    unsafe fn test_mm512_maskz_max_epi8() {
13907        #[rustfmt::skip]
13908        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13909                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13910                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13911                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13912        #[rustfmt::skip]
13913        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13914                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13915                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13916                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13917        let r = _mm512_maskz_max_epi8(0, a, b);
13918        assert_eq_m512i(r, _mm512_setzero_si512());
13919        let r = _mm512_maskz_max_epi8(
13920            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13921            a,
13922            b,
13923        );
13924        #[rustfmt::skip]
13925        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13926                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13927                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13928                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13929        assert_eq_m512i(r, e);
13930    }
13931
13932    #[simd_test(enable = "avx512f,avx512vl")]
13933    unsafe fn test_mm256_mask_max_epi8() {
13934        #[rustfmt::skip]
13935        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13936                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13937        #[rustfmt::skip]
13938        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13939                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13940        let r = _mm256_mask_max_epi8(a, 0, a, b);
13941        assert_eq_m256i(r, a);
13942        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13943        #[rustfmt::skip]
13944        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13945                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13946        assert_eq_m256i(r, e);
13947    }
13948
13949    #[simd_test(enable = "avx512f,avx512vl")]
13950    unsafe fn test_mm256_maskz_max_epi8() {
13951        #[rustfmt::skip]
13952        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13953                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13954        #[rustfmt::skip]
13955        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13956                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13957        let r = _mm256_maskz_max_epi8(0, a, b);
13958        assert_eq_m256i(r, _mm256_setzero_si256());
13959        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
13960        #[rustfmt::skip]
13961        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13962                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13963        assert_eq_m256i(r, e);
13964    }
13965
13966    #[simd_test(enable = "avx512f,avx512vl")]
13967    unsafe fn test_mm_mask_max_epi8() {
13968        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13969        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13970        let r = _mm_mask_max_epi8(a, 0, a, b);
13971        assert_eq_m128i(r, a);
13972        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
13973        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13974        assert_eq_m128i(r, e);
13975    }
13976
13977    #[simd_test(enable = "avx512f,avx512vl")]
13978    unsafe fn test_mm_maskz_max_epi8() {
13979        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13980        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13981        let r = _mm_maskz_max_epi8(0, a, b);
13982        assert_eq_m128i(r, _mm_setzero_si128());
13983        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
13984        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13985        assert_eq_m128i(r, e);
13986    }
13987
13988    #[simd_test(enable = "avx512bw")]
13989    unsafe fn test_mm512_min_epu16() {
13990        #[rustfmt::skip]
13991        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13992                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13993        #[rustfmt::skip]
13994        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13995                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13996        let r = _mm512_min_epu16(a, b);
13997        #[rustfmt::skip]
13998        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13999                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14000        assert_eq_m512i(r, e);
14001    }
14002
14003    #[simd_test(enable = "avx512f")]
14004    unsafe fn test_mm512_mask_min_epu16() {
14005        #[rustfmt::skip]
14006        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14007                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14008        #[rustfmt::skip]
14009        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14010                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14011        let r = _mm512_mask_min_epu16(a, 0, a, b);
14012        assert_eq_m512i(r, a);
14013        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14014        #[rustfmt::skip]
14015        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14016                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14017        assert_eq_m512i(r, e);
14018    }
14019
14020    #[simd_test(enable = "avx512f")]
14021    unsafe fn test_mm512_maskz_min_epu16() {
14022        #[rustfmt::skip]
14023        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14024                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14025        #[rustfmt::skip]
14026        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14027                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14028        let r = _mm512_maskz_min_epu16(0, a, b);
14029        assert_eq_m512i(r, _mm512_setzero_si512());
14030        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
14031        #[rustfmt::skip]
14032        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14033                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14034        assert_eq_m512i(r, e);
14035    }
14036
14037    #[simd_test(enable = "avx512f,avx512vl")]
14038    unsafe fn test_mm256_mask_min_epu16() {
14039        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14040        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14041        let r = _mm256_mask_min_epu16(a, 0, a, b);
14042        assert_eq_m256i(r, a);
14043        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
14044        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14045        assert_eq_m256i(r, e);
14046    }
14047
14048    #[simd_test(enable = "avx512f,avx512vl")]
14049    unsafe fn test_mm256_maskz_min_epu16() {
14050        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14051        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14052        let r = _mm256_maskz_min_epu16(0, a, b);
14053        assert_eq_m256i(r, _mm256_setzero_si256());
14054        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
14055        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14056        assert_eq_m256i(r, e);
14057    }
14058
14059    #[simd_test(enable = "avx512f,avx512vl")]
14060    unsafe fn test_mm_mask_min_epu16() {
14061        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14062        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14063        let r = _mm_mask_min_epu16(a, 0, a, b);
14064        assert_eq_m128i(r, a);
14065        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
14066        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14067        assert_eq_m128i(r, e);
14068    }
14069
14070    #[simd_test(enable = "avx512f,avx512vl")]
14071    unsafe fn test_mm_maskz_min_epu16() {
14072        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14073        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14074        let r = _mm_maskz_min_epu16(0, a, b);
14075        assert_eq_m128i(r, _mm_setzero_si128());
14076        let r = _mm_maskz_min_epu16(0b00001111, a, b);
14077        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14078        assert_eq_m128i(r, e);
14079    }
14080
14081    #[simd_test(enable = "avx512bw")]
14082    unsafe fn test_mm512_min_epu8() {
14083        #[rustfmt::skip]
14084        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14085                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14086                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14087                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14088        #[rustfmt::skip]
14089        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14090                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14091                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14092                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14093        let r = _mm512_min_epu8(a, b);
14094        #[rustfmt::skip]
14095        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14096                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14097                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14098                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14099        assert_eq_m512i(r, e);
14100    }
14101
14102    #[simd_test(enable = "avx512f")]
14103    unsafe fn test_mm512_mask_min_epu8() {
14104        #[rustfmt::skip]
14105        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14106                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14107                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14108                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14109        #[rustfmt::skip]
14110        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14111                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14112                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14113                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14114        let r = _mm512_mask_min_epu8(a, 0, a, b);
14115        assert_eq_m512i(r, a);
14116        let r = _mm512_mask_min_epu8(
14117            a,
14118            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14119            a,
14120            b,
14121        );
14122        #[rustfmt::skip]
14123        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14124                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14125                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14126                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14127        assert_eq_m512i(r, e);
14128    }
14129
14130    #[simd_test(enable = "avx512f")]
14131    unsafe fn test_mm512_maskz_min_epu8() {
14132        #[rustfmt::skip]
14133        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14134                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14135                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14136                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14137        #[rustfmt::skip]
14138        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14139                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14140                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14141                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14142        let r = _mm512_maskz_min_epu8(0, a, b);
14143        assert_eq_m512i(r, _mm512_setzero_si512());
14144        let r = _mm512_maskz_min_epu8(
14145            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14146            a,
14147            b,
14148        );
14149        #[rustfmt::skip]
14150        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14151                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14152                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14153                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14154        assert_eq_m512i(r, e);
14155    }
14156
14157    #[simd_test(enable = "avx512f,avx512vl")]
14158    unsafe fn test_mm256_mask_min_epu8() {
14159        #[rustfmt::skip]
14160        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14161                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14162        #[rustfmt::skip]
14163        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14164                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14165        let r = _mm256_mask_min_epu8(a, 0, a, b);
14166        assert_eq_m256i(r, a);
14167        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14168        #[rustfmt::skip]
14169        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14170                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14171        assert_eq_m256i(r, e);
14172    }
14173
14174    #[simd_test(enable = "avx512f,avx512vl")]
14175    unsafe fn test_mm256_maskz_min_epu8() {
14176        #[rustfmt::skip]
14177        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14178                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14179        #[rustfmt::skip]
14180        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14181                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14182        let r = _mm256_maskz_min_epu8(0, a, b);
14183        assert_eq_m256i(r, _mm256_setzero_si256());
14184        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
14185        #[rustfmt::skip]
14186        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14187                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14188        assert_eq_m256i(r, e);
14189    }
14190
14191    #[simd_test(enable = "avx512f,avx512vl")]
14192    unsafe fn test_mm_mask_min_epu8() {
14193        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14194        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14195        let r = _mm_mask_min_epu8(a, 0, a, b);
14196        assert_eq_m128i(r, a);
14197        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
14198        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14199        assert_eq_m128i(r, e);
14200    }
14201
14202    #[simd_test(enable = "avx512f,avx512vl")]
14203    unsafe fn test_mm_maskz_min_epu8() {
14204        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14205        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14206        let r = _mm_maskz_min_epu8(0, a, b);
14207        assert_eq_m128i(r, _mm_setzero_si128());
14208        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
14209        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14210        assert_eq_m128i(r, e);
14211    }
14212
14213    #[simd_test(enable = "avx512bw")]
14214    unsafe fn test_mm512_min_epi16() {
14215        #[rustfmt::skip]
14216        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14217                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14218        #[rustfmt::skip]
14219        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14220                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14221        let r = _mm512_min_epi16(a, b);
14222        #[rustfmt::skip]
14223        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14224                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14225        assert_eq_m512i(r, e);
14226    }
14227
14228    #[simd_test(enable = "avx512f")]
14229    unsafe fn test_mm512_mask_min_epi16() {
14230        #[rustfmt::skip]
14231        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14232                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14233        #[rustfmt::skip]
14234        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14235                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14236        let r = _mm512_mask_min_epi16(a, 0, a, b);
14237        assert_eq_m512i(r, a);
14238        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14239        #[rustfmt::skip]
14240        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14241                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14242        assert_eq_m512i(r, e);
14243    }
14244
14245    #[simd_test(enable = "avx512f")]
14246    unsafe fn test_mm512_maskz_min_epi16() {
14247        #[rustfmt::skip]
14248        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14249                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14250        #[rustfmt::skip]
14251        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14252                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14253        let r = _mm512_maskz_min_epi16(0, a, b);
14254        assert_eq_m512i(r, _mm512_setzero_si512());
14255        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14256        #[rustfmt::skip]
14257        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14258                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14259        assert_eq_m512i(r, e);
14260    }
14261
14262    #[simd_test(enable = "avx512f,avx512vl")]
14263    unsafe fn test_mm256_mask_min_epi16() {
14264        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14265        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14266        let r = _mm256_mask_min_epi16(a, 0, a, b);
14267        assert_eq_m256i(r, a);
14268        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14269        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14270        assert_eq_m256i(r, e);
14271    }
14272
14273    #[simd_test(enable = "avx512f,avx512vl")]
14274    unsafe fn test_mm256_maskz_min_epi16() {
14275        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14276        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14277        let r = _mm256_maskz_min_epi16(0, a, b);
14278        assert_eq_m256i(r, _mm256_setzero_si256());
14279        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14280        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14281        assert_eq_m256i(r, e);
14282    }
14283
14284    #[simd_test(enable = "avx512f,avx512vl")]
14285    unsafe fn test_mm_mask_min_epi16() {
14286        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14287        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14288        let r = _mm_mask_min_epi16(a, 0, a, b);
14289        assert_eq_m128i(r, a);
14290        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14291        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14292        assert_eq_m128i(r, e);
14293    }
14294
14295    #[simd_test(enable = "avx512f,avx512vl")]
14296    unsafe fn test_mm_maskz_min_epi16() {
14297        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14298        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14299        let r = _mm_maskz_min_epi16(0, a, b);
14300        assert_eq_m128i(r, _mm_setzero_si128());
14301        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14302        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14303        assert_eq_m128i(r, e);
14304    }
14305
14306    #[simd_test(enable = "avx512bw")]
14307    unsafe fn test_mm512_min_epi8() {
14308        #[rustfmt::skip]
14309        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14310                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14311                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14312                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14313        #[rustfmt::skip]
14314        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14315                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14316                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14317                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14318        let r = _mm512_min_epi8(a, b);
14319        #[rustfmt::skip]
14320        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14321                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14322                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14323                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14324        assert_eq_m512i(r, e);
14325    }
14326
14327    #[simd_test(enable = "avx512f")]
14328    unsafe fn test_mm512_mask_min_epi8() {
14329        #[rustfmt::skip]
14330        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14331                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14332                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14333                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14334        #[rustfmt::skip]
14335        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14336                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14337                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14338                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14339        let r = _mm512_mask_min_epi8(a, 0, a, b);
14340        assert_eq_m512i(r, a);
14341        let r = _mm512_mask_min_epi8(
14342            a,
14343            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14344            a,
14345            b,
14346        );
14347        #[rustfmt::skip]
14348        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14349                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14350                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14351                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14352        assert_eq_m512i(r, e);
14353    }
14354
14355    #[simd_test(enable = "avx512f")]
14356    unsafe fn test_mm512_maskz_min_epi8() {
14357        #[rustfmt::skip]
14358        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14359                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14360                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14361                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14362        #[rustfmt::skip]
14363        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14364                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14365                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14366                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14367        let r = _mm512_maskz_min_epi8(0, a, b);
14368        assert_eq_m512i(r, _mm512_setzero_si512());
14369        let r = _mm512_maskz_min_epi8(
14370            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14371            a,
14372            b,
14373        );
14374        #[rustfmt::skip]
14375        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14376                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14377                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14378                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14379        assert_eq_m512i(r, e);
14380    }
14381
14382    #[simd_test(enable = "avx512f,avx512vl")]
14383    unsafe fn test_mm256_mask_min_epi8() {
14384        #[rustfmt::skip]
14385        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14386                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14387        #[rustfmt::skip]
14388        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14389                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14390        let r = _mm256_mask_min_epi8(a, 0, a, b);
14391        assert_eq_m256i(r, a);
14392        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14393        #[rustfmt::skip]
14394        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14395                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14396        assert_eq_m256i(r, e);
14397    }
14398
14399    #[simd_test(enable = "avx512f,avx512vl")]
14400    unsafe fn test_mm256_maskz_min_epi8() {
14401        #[rustfmt::skip]
14402        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14403                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14404        #[rustfmt::skip]
14405        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14406                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14407        let r = _mm256_maskz_min_epi8(0, a, b);
14408        assert_eq_m256i(r, _mm256_setzero_si256());
14409        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
14410        #[rustfmt::skip]
14411        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14412                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14413        assert_eq_m256i(r, e);
14414    }
14415
14416    #[simd_test(enable = "avx512f,avx512vl")]
14417    unsafe fn test_mm_mask_min_epi8() {
14418        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14419        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14420        let r = _mm_mask_min_epi8(a, 0, a, b);
14421        assert_eq_m128i(r, a);
14422        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
14423        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14424        assert_eq_m128i(r, e);
14425    }
14426
14427    #[simd_test(enable = "avx512f,avx512vl")]
14428    unsafe fn test_mm_maskz_min_epi8() {
14429        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14430        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14431        let r = _mm_maskz_min_epi8(0, a, b);
14432        assert_eq_m128i(r, _mm_setzero_si128());
14433        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
14434        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14435        assert_eq_m128i(r, e);
14436    }
14437
14438    #[simd_test(enable = "avx512bw")]
14439    unsafe fn test_mm512_cmplt_epu16_mask() {
14440        let a = _mm512_set1_epi16(-2);
14441        let b = _mm512_set1_epi16(-1);
14442        let m = _mm512_cmplt_epu16_mask(a, b);
14443        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14444    }
14445
14446    #[simd_test(enable = "avx512bw")]
14447    unsafe fn test_mm512_mask_cmplt_epu16_mask() {
14448        let a = _mm512_set1_epi16(-2);
14449        let b = _mm512_set1_epi16(-1);
14450        let mask = 0b01010101_01010101_01010101_01010101;
14451        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
14452        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14453    }
14454
14455    #[simd_test(enable = "avx512bw,avx512vl")]
14456    unsafe fn test_mm256_cmplt_epu16_mask() {
14457        let a = _mm256_set1_epi16(-2);
14458        let b = _mm256_set1_epi16(-1);
14459        let m = _mm256_cmplt_epu16_mask(a, b);
14460        assert_eq!(m, 0b11111111_11111111);
14461    }
14462
14463    #[simd_test(enable = "avx512bw,avx512vl")]
14464    unsafe fn test_mm256_mask_cmplt_epu16_mask() {
14465        let a = _mm256_set1_epi16(-2);
14466        let b = _mm256_set1_epi16(-1);
14467        let mask = 0b01010101_01010101;
14468        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
14469        assert_eq!(r, 0b01010101_01010101);
14470    }
14471
14472    #[simd_test(enable = "avx512bw,avx512vl")]
14473    unsafe fn test_mm_cmplt_epu16_mask() {
14474        let a = _mm_set1_epi16(-2);
14475        let b = _mm_set1_epi16(-1);
14476        let m = _mm_cmplt_epu16_mask(a, b);
14477        assert_eq!(m, 0b11111111);
14478    }
14479
14480    #[simd_test(enable = "avx512bw,avx512vl")]
14481    unsafe fn test_mm_mask_cmplt_epu16_mask() {
14482        let a = _mm_set1_epi16(-2);
14483        let b = _mm_set1_epi16(-1);
14484        let mask = 0b01010101;
14485        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
14486        assert_eq!(r, 0b01010101);
14487    }
14488
14489    #[simd_test(enable = "avx512bw")]
14490    unsafe fn test_mm512_cmplt_epu8_mask() {
14491        let a = _mm512_set1_epi8(-2);
14492        let b = _mm512_set1_epi8(-1);
14493        let m = _mm512_cmplt_epu8_mask(a, b);
14494        assert_eq!(
14495            m,
14496            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14497        );
14498    }
14499
14500    #[simd_test(enable = "avx512bw")]
14501    unsafe fn test_mm512_mask_cmplt_epu8_mask() {
14502        let a = _mm512_set1_epi8(-2);
14503        let b = _mm512_set1_epi8(-1);
14504        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14505        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
14506        assert_eq!(
14507            r,
14508            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14509        );
14510    }
14511
14512    #[simd_test(enable = "avx512bw,avx512vl")]
14513    unsafe fn test_mm256_cmplt_epu8_mask() {
14514        let a = _mm256_set1_epi8(-2);
14515        let b = _mm256_set1_epi8(-1);
14516        let m = _mm256_cmplt_epu8_mask(a, b);
14517        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14518    }
14519
14520    #[simd_test(enable = "avx512bw,avx512vl")]
14521    unsafe fn test_mm256_mask_cmplt_epu8_mask() {
14522        let a = _mm256_set1_epi8(-2);
14523        let b = _mm256_set1_epi8(-1);
14524        let mask = 0b01010101_01010101_01010101_01010101;
14525        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
14526        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14527    }
14528
14529    #[simd_test(enable = "avx512bw,avx512vl")]
14530    unsafe fn test_mm_cmplt_epu8_mask() {
14531        let a = _mm_set1_epi8(-2);
14532        let b = _mm_set1_epi8(-1);
14533        let m = _mm_cmplt_epu8_mask(a, b);
14534        assert_eq!(m, 0b11111111_11111111);
14535    }
14536
14537    #[simd_test(enable = "avx512bw,avx512vl")]
14538    unsafe fn test_mm_mask_cmplt_epu8_mask() {
14539        let a = _mm_set1_epi8(-2);
14540        let b = _mm_set1_epi8(-1);
14541        let mask = 0b01010101_01010101;
14542        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
14543        assert_eq!(r, 0b01010101_01010101);
14544    }
14545
14546    #[simd_test(enable = "avx512bw")]
14547    unsafe fn test_mm512_cmplt_epi16_mask() {
14548        let a = _mm512_set1_epi16(-2);
14549        let b = _mm512_set1_epi16(-1);
14550        let m = _mm512_cmplt_epi16_mask(a, b);
14551        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14552    }
14553
14554    #[simd_test(enable = "avx512bw")]
14555    unsafe fn test_mm512_mask_cmplt_epi16_mask() {
14556        let a = _mm512_set1_epi16(-2);
14557        let b = _mm512_set1_epi16(-1);
14558        let mask = 0b01010101_01010101_01010101_01010101;
14559        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
14560        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14561    }
14562
14563    #[simd_test(enable = "avx512bw,avx512vl")]
14564    unsafe fn test_mm256_cmplt_epi16_mask() {
14565        let a = _mm256_set1_epi16(-2);
14566        let b = _mm256_set1_epi16(-1);
14567        let m = _mm256_cmplt_epi16_mask(a, b);
14568        assert_eq!(m, 0b11111111_11111111);
14569    }
14570
14571    #[simd_test(enable = "avx512bw,avx512vl")]
14572    unsafe fn test_mm256_mask_cmplt_epi16_mask() {
14573        let a = _mm256_set1_epi16(-2);
14574        let b = _mm256_set1_epi16(-1);
14575        let mask = 0b01010101_01010101;
14576        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
14577        assert_eq!(r, 0b01010101_01010101);
14578    }
14579
14580    #[simd_test(enable = "avx512bw,avx512vl")]
14581    unsafe fn test_mm_cmplt_epi16_mask() {
14582        let a = _mm_set1_epi16(-2);
14583        let b = _mm_set1_epi16(-1);
14584        let m = _mm_cmplt_epi16_mask(a, b);
14585        assert_eq!(m, 0b11111111);
14586    }
14587
14588    #[simd_test(enable = "avx512bw,avx512vl")]
14589    unsafe fn test_mm_mask_cmplt_epi16_mask() {
14590        let a = _mm_set1_epi16(-2);
14591        let b = _mm_set1_epi16(-1);
14592        let mask = 0b01010101;
14593        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
14594        assert_eq!(r, 0b01010101);
14595    }
14596
14597    #[simd_test(enable = "avx512bw")]
14598    unsafe fn test_mm512_cmplt_epi8_mask() {
14599        let a = _mm512_set1_epi8(-2);
14600        let b = _mm512_set1_epi8(-1);
14601        let m = _mm512_cmplt_epi8_mask(a, b);
14602        assert_eq!(
14603            m,
14604            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14605        );
14606    }
14607
14608    #[simd_test(enable = "avx512bw")]
14609    unsafe fn test_mm512_mask_cmplt_epi8_mask() {
14610        let a = _mm512_set1_epi8(-2);
14611        let b = _mm512_set1_epi8(-1);
14612        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14613        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
14614        assert_eq!(
14615            r,
14616            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14617        );
14618    }
14619
14620    #[simd_test(enable = "avx512bw,avx512vl")]
14621    unsafe fn test_mm256_cmplt_epi8_mask() {
14622        let a = _mm256_set1_epi8(-2);
14623        let b = _mm256_set1_epi8(-1);
14624        let m = _mm256_cmplt_epi8_mask(a, b);
14625        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14626    }
14627
14628    #[simd_test(enable = "avx512bw,avx512vl")]
14629    unsafe fn test_mm256_mask_cmplt_epi8_mask() {
14630        let a = _mm256_set1_epi8(-2);
14631        let b = _mm256_set1_epi8(-1);
14632        let mask = 0b01010101_01010101_01010101_01010101;
14633        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
14634        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14635    }
14636
14637    #[simd_test(enable = "avx512bw,avx512vl")]
14638    unsafe fn test_mm_cmplt_epi8_mask() {
14639        let a = _mm_set1_epi8(-2);
14640        let b = _mm_set1_epi8(-1);
14641        let m = _mm_cmplt_epi8_mask(a, b);
14642        assert_eq!(m, 0b11111111_11111111);
14643    }
14644
14645    #[simd_test(enable = "avx512bw,avx512vl")]
14646    unsafe fn test_mm_mask_cmplt_epi8_mask() {
14647        let a = _mm_set1_epi8(-2);
14648        let b = _mm_set1_epi8(-1);
14649        let mask = 0b01010101_01010101;
14650        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
14651        assert_eq!(r, 0b01010101_01010101);
14652    }
14653
14654    #[simd_test(enable = "avx512bw")]
14655    unsafe fn test_mm512_cmpgt_epu16_mask() {
14656        let a = _mm512_set1_epi16(2);
14657        let b = _mm512_set1_epi16(1);
14658        let m = _mm512_cmpgt_epu16_mask(a, b);
14659        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14660    }
14661
14662    #[simd_test(enable = "avx512bw")]
14663    unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
14664        let a = _mm512_set1_epi16(2);
14665        let b = _mm512_set1_epi16(1);
14666        let mask = 0b01010101_01010101_01010101_01010101;
14667        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
14668        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14669    }
14670
14671    #[simd_test(enable = "avx512bw,avx512vl")]
14672    unsafe fn test_mm256_cmpgt_epu16_mask() {
14673        let a = _mm256_set1_epi16(2);
14674        let b = _mm256_set1_epi16(1);
14675        let m = _mm256_cmpgt_epu16_mask(a, b);
14676        assert_eq!(m, 0b11111111_11111111);
14677    }
14678
14679    #[simd_test(enable = "avx512bw,avx512vl")]
14680    unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
14681        let a = _mm256_set1_epi16(2);
14682        let b = _mm256_set1_epi16(1);
14683        let mask = 0b01010101_01010101;
14684        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
14685        assert_eq!(r, 0b01010101_01010101);
14686    }
14687
14688    #[simd_test(enable = "avx512bw,avx512vl")]
14689    unsafe fn test_mm_cmpgt_epu16_mask() {
14690        let a = _mm_set1_epi16(2);
14691        let b = _mm_set1_epi16(1);
14692        let m = _mm_cmpgt_epu16_mask(a, b);
14693        assert_eq!(m, 0b11111111);
14694    }
14695
14696    #[simd_test(enable = "avx512bw,avx512vl")]
14697    unsafe fn test_mm_mask_cmpgt_epu16_mask() {
14698        let a = _mm_set1_epi16(2);
14699        let b = _mm_set1_epi16(1);
14700        let mask = 0b01010101;
14701        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
14702        assert_eq!(r, 0b01010101);
14703    }
14704
14705    #[simd_test(enable = "avx512bw")]
14706    unsafe fn test_mm512_cmpgt_epu8_mask() {
14707        let a = _mm512_set1_epi8(2);
14708        let b = _mm512_set1_epi8(1);
14709        let m = _mm512_cmpgt_epu8_mask(a, b);
14710        assert_eq!(
14711            m,
14712            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14713        );
14714    }
14715
14716    #[simd_test(enable = "avx512bw")]
14717    unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
14718        let a = _mm512_set1_epi8(2);
14719        let b = _mm512_set1_epi8(1);
14720        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14721        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
14722        assert_eq!(
14723            r,
14724            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14725        );
14726    }
14727
14728    #[simd_test(enable = "avx512bw,avx512vl")]
14729    unsafe fn test_mm256_cmpgt_epu8_mask() {
14730        let a = _mm256_set1_epi8(2);
14731        let b = _mm256_set1_epi8(1);
14732        let m = _mm256_cmpgt_epu8_mask(a, b);
14733        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14734    }
14735
14736    #[simd_test(enable = "avx512bw,avx512vl")]
14737    unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
14738        let a = _mm256_set1_epi8(2);
14739        let b = _mm256_set1_epi8(1);
14740        let mask = 0b01010101_01010101_01010101_01010101;
14741        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
14742        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14743    }
14744
14745    #[simd_test(enable = "avx512bw,avx512vl")]
14746    unsafe fn test_mm_cmpgt_epu8_mask() {
14747        let a = _mm_set1_epi8(2);
14748        let b = _mm_set1_epi8(1);
14749        let m = _mm_cmpgt_epu8_mask(a, b);
14750        assert_eq!(m, 0b11111111_11111111);
14751    }
14752
14753    #[simd_test(enable = "avx512bw,avx512vl")]
14754    unsafe fn test_mm_mask_cmpgt_epu8_mask() {
14755        let a = _mm_set1_epi8(2);
14756        let b = _mm_set1_epi8(1);
14757        let mask = 0b01010101_01010101;
14758        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
14759        assert_eq!(r, 0b01010101_01010101);
14760    }
14761
14762    #[simd_test(enable = "avx512bw")]
14763    unsafe fn test_mm512_cmpgt_epi16_mask() {
14764        let a = _mm512_set1_epi16(2);
14765        let b = _mm512_set1_epi16(-1);
14766        let m = _mm512_cmpgt_epi16_mask(a, b);
14767        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14768    }
14769
14770    #[simd_test(enable = "avx512bw")]
14771    unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
14772        let a = _mm512_set1_epi16(2);
14773        let b = _mm512_set1_epi16(-1);
14774        let mask = 0b01010101_01010101_01010101_01010101;
14775        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
14776        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14777    }
14778
14779    #[simd_test(enable = "avx512bw,avx512vl")]
14780    unsafe fn test_mm256_cmpgt_epi16_mask() {
14781        let a = _mm256_set1_epi16(2);
14782        let b = _mm256_set1_epi16(-1);
14783        let m = _mm256_cmpgt_epi16_mask(a, b);
14784        assert_eq!(m, 0b11111111_11111111);
14785    }
14786
14787    #[simd_test(enable = "avx512bw,avx512vl")]
14788    unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
14789        let a = _mm256_set1_epi16(2);
14790        let b = _mm256_set1_epi16(-1);
14791        let mask = 0b001010101_01010101;
14792        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
14793        assert_eq!(r, 0b01010101_01010101);
14794    }
14795
14796    #[simd_test(enable = "avx512bw,avx512vl")]
14797    unsafe fn test_mm_cmpgt_epi16_mask() {
14798        let a = _mm_set1_epi16(2);
14799        let b = _mm_set1_epi16(-1);
14800        let m = _mm_cmpgt_epi16_mask(a, b);
14801        assert_eq!(m, 0b11111111);
14802    }
14803
14804    #[simd_test(enable = "avx512bw,avx512vl")]
14805    unsafe fn test_mm_mask_cmpgt_epi16_mask() {
14806        let a = _mm_set1_epi16(2);
14807        let b = _mm_set1_epi16(-1);
14808        let mask = 0b01010101;
14809        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
14810        assert_eq!(r, 0b01010101);
14811    }
14812
14813    #[simd_test(enable = "avx512bw")]
14814    unsafe fn test_mm512_cmpgt_epi8_mask() {
14815        let a = _mm512_set1_epi8(2);
14816        let b = _mm512_set1_epi8(-1);
14817        let m = _mm512_cmpgt_epi8_mask(a, b);
14818        assert_eq!(
14819            m,
14820            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14821        );
14822    }
14823
14824    #[simd_test(enable = "avx512bw")]
14825    unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
14826        let a = _mm512_set1_epi8(2);
14827        let b = _mm512_set1_epi8(-1);
14828        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14829        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
14830        assert_eq!(
14831            r,
14832            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14833        );
14834    }
14835
14836    #[simd_test(enable = "avx512bw,avx512vl")]
14837    unsafe fn test_mm256_cmpgt_epi8_mask() {
14838        let a = _mm256_set1_epi8(2);
14839        let b = _mm256_set1_epi8(-1);
14840        let m = _mm256_cmpgt_epi8_mask(a, b);
14841        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14842    }
14843
14844    #[simd_test(enable = "avx512bw,avx512vl")]
14845    unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
14846        let a = _mm256_set1_epi8(2);
14847        let b = _mm256_set1_epi8(-1);
14848        let mask = 0b01010101_01010101_01010101_01010101;
14849        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
14850        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14851    }
14852
14853    #[simd_test(enable = "avx512bw,avx512vl")]
14854    unsafe fn test_mm_cmpgt_epi8_mask() {
14855        let a = _mm_set1_epi8(2);
14856        let b = _mm_set1_epi8(-1);
14857        let m = _mm_cmpgt_epi8_mask(a, b);
14858        assert_eq!(m, 0b11111111_11111111);
14859    }
14860
14861    #[simd_test(enable = "avx512bw,avx512vl")]
14862    unsafe fn test_mm_mask_cmpgt_epi8_mask() {
14863        let a = _mm_set1_epi8(2);
14864        let b = _mm_set1_epi8(-1);
14865        let mask = 0b01010101_01010101;
14866        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
14867        assert_eq!(r, 0b01010101_01010101);
14868    }
14869
14870    #[simd_test(enable = "avx512bw")]
14871    unsafe fn test_mm512_cmple_epu16_mask() {
14872        let a = _mm512_set1_epi16(-1);
14873        let b = _mm512_set1_epi16(-1);
14874        let m = _mm512_cmple_epu16_mask(a, b);
14875        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14876    }
14877
14878    #[simd_test(enable = "avx512bw")]
14879    unsafe fn test_mm512_mask_cmple_epu16_mask() {
14880        let a = _mm512_set1_epi16(-1);
14881        let b = _mm512_set1_epi16(-1);
14882        let mask = 0b01010101_01010101_01010101_01010101;
14883        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
14884        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14885    }
14886
14887    #[simd_test(enable = "avx512bw,avx512vl")]
14888    unsafe fn test_mm256_cmple_epu16_mask() {
14889        let a = _mm256_set1_epi16(-1);
14890        let b = _mm256_set1_epi16(-1);
14891        let m = _mm256_cmple_epu16_mask(a, b);
14892        assert_eq!(m, 0b11111111_11111111);
14893    }
14894
14895    #[simd_test(enable = "avx512bw,avx512vl")]
14896    unsafe fn test_mm256_mask_cmple_epu16_mask() {
14897        let a = _mm256_set1_epi16(-1);
14898        let b = _mm256_set1_epi16(-1);
14899        let mask = 0b01010101_01010101;
14900        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
14901        assert_eq!(r, 0b01010101_01010101);
14902    }
14903
14904    #[simd_test(enable = "avx512bw,avx512vl")]
14905    unsafe fn test_mm_cmple_epu16_mask() {
14906        let a = _mm_set1_epi16(-1);
14907        let b = _mm_set1_epi16(-1);
14908        let m = _mm_cmple_epu16_mask(a, b);
14909        assert_eq!(m, 0b11111111);
14910    }
14911
14912    #[simd_test(enable = "avx512bw,avx512vl")]
14913    unsafe fn test_mm_mask_cmple_epu16_mask() {
14914        let a = _mm_set1_epi16(-1);
14915        let b = _mm_set1_epi16(-1);
14916        let mask = 0b01010101;
14917        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
14918        assert_eq!(r, 0b01010101);
14919    }
14920
14921    #[simd_test(enable = "avx512bw")]
14922    unsafe fn test_mm512_cmple_epu8_mask() {
14923        let a = _mm512_set1_epi8(-1);
14924        let b = _mm512_set1_epi8(-1);
14925        let m = _mm512_cmple_epu8_mask(a, b);
14926        assert_eq!(
14927            m,
14928            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14929        );
14930    }
14931
14932    #[simd_test(enable = "avx512bw")]
14933    unsafe fn test_mm512_mask_cmple_epu8_mask() {
14934        let a = _mm512_set1_epi8(-1);
14935        let b = _mm512_set1_epi8(-1);
14936        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14937        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
14938        assert_eq!(
14939            r,
14940            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14941        );
14942    }
14943
14944    #[simd_test(enable = "avx512bw,avx512vl")]
14945    unsafe fn test_mm256_cmple_epu8_mask() {
14946        let a = _mm256_set1_epi8(-1);
14947        let b = _mm256_set1_epi8(-1);
14948        let m = _mm256_cmple_epu8_mask(a, b);
14949        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14950    }
14951
14952    #[simd_test(enable = "avx512bw,avx512vl")]
14953    unsafe fn test_mm256_mask_cmple_epu8_mask() {
14954        let a = _mm256_set1_epi8(-1);
14955        let b = _mm256_set1_epi8(-1);
14956        let mask = 0b01010101_01010101_01010101_01010101;
14957        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
14958        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14959    }
14960
14961    #[simd_test(enable = "avx512bw,avx512vl")]
14962    unsafe fn test_mm_cmple_epu8_mask() {
14963        let a = _mm_set1_epi8(-1);
14964        let b = _mm_set1_epi8(-1);
14965        let m = _mm_cmple_epu8_mask(a, b);
14966        assert_eq!(m, 0b11111111_11111111);
14967    }
14968
14969    #[simd_test(enable = "avx512bw,avx512vl")]
14970    unsafe fn test_mm_mask_cmple_epu8_mask() {
14971        let a = _mm_set1_epi8(-1);
14972        let b = _mm_set1_epi8(-1);
14973        let mask = 0b01010101_01010101;
14974        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
14975        assert_eq!(r, 0b01010101_01010101);
14976    }
14977
14978    #[simd_test(enable = "avx512bw")]
14979    unsafe fn test_mm512_cmple_epi16_mask() {
14980        let a = _mm512_set1_epi16(-1);
14981        let b = _mm512_set1_epi16(-1);
14982        let m = _mm512_cmple_epi16_mask(a, b);
14983        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14984    }
14985
14986    #[simd_test(enable = "avx512bw")]
14987    unsafe fn test_mm512_mask_cmple_epi16_mask() {
14988        let a = _mm512_set1_epi16(-1);
14989        let b = _mm512_set1_epi16(-1);
14990        let mask = 0b01010101_01010101_01010101_01010101;
14991        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
14992        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14993    }
14994
14995    #[simd_test(enable = "avx512bw,avx512vl")]
14996    unsafe fn test_mm256_cmple_epi16_mask() {
14997        let a = _mm256_set1_epi16(-1);
14998        let b = _mm256_set1_epi16(-1);
14999        let m = _mm256_cmple_epi16_mask(a, b);
15000        assert_eq!(m, 0b11111111_11111111);
15001    }
15002
15003    #[simd_test(enable = "avx512bw,avx512vl")]
15004    unsafe fn test_mm256_mask_cmple_epi16_mask() {
15005        let a = _mm256_set1_epi16(-1);
15006        let b = _mm256_set1_epi16(-1);
15007        let mask = 0b01010101_01010101;
15008        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
15009        assert_eq!(r, 0b01010101_01010101);
15010    }
15011
15012    #[simd_test(enable = "avx512bw,avx512vl")]
15013    unsafe fn test_mm_cmple_epi16_mask() {
15014        let a = _mm_set1_epi16(-1);
15015        let b = _mm_set1_epi16(-1);
15016        let m = _mm_cmple_epi16_mask(a, b);
15017        assert_eq!(m, 0b11111111);
15018    }
15019
15020    #[simd_test(enable = "avx512bw,avx512vl")]
15021    unsafe fn test_mm_mask_cmple_epi16_mask() {
15022        let a = _mm_set1_epi16(-1);
15023        let b = _mm_set1_epi16(-1);
15024        let mask = 0b01010101;
15025        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
15026        assert_eq!(r, 0b01010101);
15027    }
15028
15029    #[simd_test(enable = "avx512bw")]
15030    unsafe fn test_mm512_cmple_epi8_mask() {
15031        let a = _mm512_set1_epi8(-1);
15032        let b = _mm512_set1_epi8(-1);
15033        let m = _mm512_cmple_epi8_mask(a, b);
15034        assert_eq!(
15035            m,
15036            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15037        );
15038    }
15039
15040    #[simd_test(enable = "avx512bw")]
15041    unsafe fn test_mm512_mask_cmple_epi8_mask() {
15042        let a = _mm512_set1_epi8(-1);
15043        let b = _mm512_set1_epi8(-1);
15044        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15045        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
15046        assert_eq!(
15047            r,
15048            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15049        );
15050    }
15051
15052    #[simd_test(enable = "avx512bw,avx512vl")]
15053    unsafe fn test_mm256_cmple_epi8_mask() {
15054        let a = _mm256_set1_epi8(-1);
15055        let b = _mm256_set1_epi8(-1);
15056        let m = _mm256_cmple_epi8_mask(a, b);
15057        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15058    }
15059
15060    #[simd_test(enable = "avx512bw,avx512vl")]
15061    unsafe fn test_mm256_mask_cmple_epi8_mask() {
15062        let a = _mm256_set1_epi8(-1);
15063        let b = _mm256_set1_epi8(-1);
15064        let mask = 0b01010101_01010101_01010101_01010101;
15065        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
15066        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15067    }
15068
15069    #[simd_test(enable = "avx512bw,avx512vl")]
15070    unsafe fn test_mm_cmple_epi8_mask() {
15071        let a = _mm_set1_epi8(-1);
15072        let b = _mm_set1_epi8(-1);
15073        let m = _mm_cmple_epi8_mask(a, b);
15074        assert_eq!(m, 0b11111111_11111111);
15075    }
15076
15077    #[simd_test(enable = "avx512bw,avx512vl")]
15078    unsafe fn test_mm_mask_cmple_epi8_mask() {
15079        let a = _mm_set1_epi8(-1);
15080        let b = _mm_set1_epi8(-1);
15081        let mask = 0b01010101_01010101;
15082        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
15083        assert_eq!(r, 0b01010101_01010101);
15084    }
15085
15086    #[simd_test(enable = "avx512bw")]
15087    unsafe fn test_mm512_cmpge_epu16_mask() {
15088        let a = _mm512_set1_epi16(1);
15089        let b = _mm512_set1_epi16(1);
15090        let m = _mm512_cmpge_epu16_mask(a, b);
15091        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15092    }
15093
15094    #[simd_test(enable = "avx512bw")]
15095    unsafe fn test_mm512_mask_cmpge_epu16_mask() {
15096        let a = _mm512_set1_epi16(1);
15097        let b = _mm512_set1_epi16(1);
15098        let mask = 0b01010101_01010101_01010101_01010101;
15099        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
15100        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15101    }
15102
15103    #[simd_test(enable = "avx512bw,avx512vl")]
15104    unsafe fn test_mm256_cmpge_epu16_mask() {
15105        let a = _mm256_set1_epi16(1);
15106        let b = _mm256_set1_epi16(1);
15107        let m = _mm256_cmpge_epu16_mask(a, b);
15108        assert_eq!(m, 0b11111111_11111111);
15109    }
15110
15111    #[simd_test(enable = "avx512bw,avx512vl")]
15112    unsafe fn test_mm256_mask_cmpge_epu16_mask() {
15113        let a = _mm256_set1_epi16(1);
15114        let b = _mm256_set1_epi16(1);
15115        let mask = 0b01010101_01010101;
15116        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
15117        assert_eq!(r, 0b01010101_01010101);
15118    }
15119
15120    #[simd_test(enable = "avx512bw,avx512vl")]
15121    unsafe fn test_mm_cmpge_epu16_mask() {
15122        let a = _mm_set1_epi16(1);
15123        let b = _mm_set1_epi16(1);
15124        let m = _mm_cmpge_epu16_mask(a, b);
15125        assert_eq!(m, 0b11111111);
15126    }
15127
15128    #[simd_test(enable = "avx512bw,avx512vl")]
15129    unsafe fn test_mm_mask_cmpge_epu16_mask() {
15130        let a = _mm_set1_epi16(1);
15131        let b = _mm_set1_epi16(1);
15132        let mask = 0b01010101;
15133        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
15134        assert_eq!(r, 0b01010101);
15135    }
15136
15137    #[simd_test(enable = "avx512bw")]
15138    unsafe fn test_mm512_cmpge_epu8_mask() {
15139        let a = _mm512_set1_epi8(1);
15140        let b = _mm512_set1_epi8(1);
15141        let m = _mm512_cmpge_epu8_mask(a, b);
15142        assert_eq!(
15143            m,
15144            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15145        );
15146    }
15147
15148    #[simd_test(enable = "avx512bw")]
15149    unsafe fn test_mm512_mask_cmpge_epu8_mask() {
15150        let a = _mm512_set1_epi8(1);
15151        let b = _mm512_set1_epi8(1);
15152        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15153        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
15154        assert_eq!(
15155            r,
15156            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15157        );
15158    }
15159
15160    #[simd_test(enable = "avx512bw,avx512vl")]
15161    unsafe fn test_mm256_cmpge_epu8_mask() {
15162        let a = _mm256_set1_epi8(1);
15163        let b = _mm256_set1_epi8(1);
15164        let m = _mm256_cmpge_epu8_mask(a, b);
15165        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15166    }
15167
15168    #[simd_test(enable = "avx512bw,avx512vl")]
15169    unsafe fn test_mm256_mask_cmpge_epu8_mask() {
15170        let a = _mm256_set1_epi8(1);
15171        let b = _mm256_set1_epi8(1);
15172        let mask = 0b01010101_01010101_01010101_01010101;
15173        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
15174        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15175    }
15176
15177    #[simd_test(enable = "avx512bw,avx512vl")]
15178    unsafe fn test_mm_cmpge_epu8_mask() {
15179        let a = _mm_set1_epi8(1);
15180        let b = _mm_set1_epi8(1);
15181        let m = _mm_cmpge_epu8_mask(a, b);
15182        assert_eq!(m, 0b11111111_11111111);
15183    }
15184
15185    #[simd_test(enable = "avx512bw,avx512vl")]
15186    unsafe fn test_mm_mask_cmpge_epu8_mask() {
15187        let a = _mm_set1_epi8(1);
15188        let b = _mm_set1_epi8(1);
15189        let mask = 0b01010101_01010101;
15190        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
15191        assert_eq!(r, 0b01010101_01010101);
15192    }
15193
15194    #[simd_test(enable = "avx512bw")]
15195    unsafe fn test_mm512_cmpge_epi16_mask() {
15196        let a = _mm512_set1_epi16(-1);
15197        let b = _mm512_set1_epi16(-1);
15198        let m = _mm512_cmpge_epi16_mask(a, b);
15199        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15200    }
15201
15202    #[simd_test(enable = "avx512bw")]
15203    unsafe fn test_mm512_mask_cmpge_epi16_mask() {
15204        let a = _mm512_set1_epi16(-1);
15205        let b = _mm512_set1_epi16(-1);
15206        let mask = 0b01010101_01010101_01010101_01010101;
15207        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
15208        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15209    }
15210
15211    #[simd_test(enable = "avx512bw,avx512vl")]
15212    unsafe fn test_mm256_cmpge_epi16_mask() {
15213        let a = _mm256_set1_epi16(-1);
15214        let b = _mm256_set1_epi16(-1);
15215        let m = _mm256_cmpge_epi16_mask(a, b);
15216        assert_eq!(m, 0b11111111_11111111);
15217    }
15218
15219    #[simd_test(enable = "avx512bw,avx512vl")]
15220    unsafe fn test_mm256_mask_cmpge_epi16_mask() {
15221        let a = _mm256_set1_epi16(-1);
15222        let b = _mm256_set1_epi16(-1);
15223        let mask = 0b01010101_01010101;
15224        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15225        assert_eq!(r, 0b01010101_01010101);
15226    }
15227
15228    #[simd_test(enable = "avx512bw,avx512vl")]
15229    unsafe fn test_mm_cmpge_epi16_mask() {
15230        let a = _mm_set1_epi16(-1);
15231        let b = _mm_set1_epi16(-1);
15232        let m = _mm_cmpge_epi16_mask(a, b);
15233        assert_eq!(m, 0b11111111);
15234    }
15235
15236    #[simd_test(enable = "avx512bw,avx512vl")]
15237    unsafe fn test_mm_mask_cmpge_epi16_mask() {
15238        let a = _mm_set1_epi16(-1);
15239        let b = _mm_set1_epi16(-1);
15240        let mask = 0b01010101;
15241        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15242        assert_eq!(r, 0b01010101);
15243    }
15244
15245    #[simd_test(enable = "avx512bw")]
15246    unsafe fn test_mm512_cmpge_epi8_mask() {
15247        let a = _mm512_set1_epi8(-1);
15248        let b = _mm512_set1_epi8(-1);
15249        let m = _mm512_cmpge_epi8_mask(a, b);
15250        assert_eq!(
15251            m,
15252            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15253        );
15254    }
15255
15256    #[simd_test(enable = "avx512bw")]
15257    unsafe fn test_mm512_mask_cmpge_epi8_mask() {
15258        let a = _mm512_set1_epi8(-1);
15259        let b = _mm512_set1_epi8(-1);
15260        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15261        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15262        assert_eq!(
15263            r,
15264            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15265        );
15266    }
15267
15268    #[simd_test(enable = "avx512bw,avx512vl")]
15269    unsafe fn test_mm256_cmpge_epi8_mask() {
15270        let a = _mm256_set1_epi8(-1);
15271        let b = _mm256_set1_epi8(-1);
15272        let m = _mm256_cmpge_epi8_mask(a, b);
15273        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15274    }
15275
15276    #[simd_test(enable = "avx512bw,avx512vl")]
15277    unsafe fn test_mm256_mask_cmpge_epi8_mask() {
15278        let a = _mm256_set1_epi8(-1);
15279        let b = _mm256_set1_epi8(-1);
15280        let mask = 0b01010101_01010101_01010101_01010101;
15281        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15282        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15283    }
15284
15285    #[simd_test(enable = "avx512bw,avx512vl")]
15286    unsafe fn test_mm_cmpge_epi8_mask() {
15287        let a = _mm_set1_epi8(-1);
15288        let b = _mm_set1_epi8(-1);
15289        let m = _mm_cmpge_epi8_mask(a, b);
15290        assert_eq!(m, 0b11111111_11111111);
15291    }
15292
15293    #[simd_test(enable = "avx512bw,avx512vl")]
15294    unsafe fn test_mm_mask_cmpge_epi8_mask() {
15295        let a = _mm_set1_epi8(-1);
15296        let b = _mm_set1_epi8(-1);
15297        let mask = 0b01010101_01010101;
15298        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15299        assert_eq!(r, 0b01010101_01010101);
15300    }
15301
15302    #[simd_test(enable = "avx512bw")]
15303    unsafe fn test_mm512_cmpeq_epu16_mask() {
15304        let a = _mm512_set1_epi16(1);
15305        let b = _mm512_set1_epi16(1);
15306        let m = _mm512_cmpeq_epu16_mask(a, b);
15307        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15308    }
15309
15310    #[simd_test(enable = "avx512bw")]
15311    unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
15312        let a = _mm512_set1_epi16(1);
15313        let b = _mm512_set1_epi16(1);
15314        let mask = 0b01010101_01010101_01010101_01010101;
15315        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15316        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15317    }
15318
15319    #[simd_test(enable = "avx512bw,avx512vl")]
15320    unsafe fn test_mm256_cmpeq_epu16_mask() {
15321        let a = _mm256_set1_epi16(1);
15322        let b = _mm256_set1_epi16(1);
15323        let m = _mm256_cmpeq_epu16_mask(a, b);
15324        assert_eq!(m, 0b11111111_11111111);
15325    }
15326
15327    #[simd_test(enable = "avx512bw,avx512vl")]
15328    unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
15329        let a = _mm256_set1_epi16(1);
15330        let b = _mm256_set1_epi16(1);
15331        let mask = 0b01010101_01010101;
15332        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15333        assert_eq!(r, 0b01010101_01010101);
15334    }
15335
15336    #[simd_test(enable = "avx512bw,avx512vl")]
15337    unsafe fn test_mm_cmpeq_epu16_mask() {
15338        let a = _mm_set1_epi16(1);
15339        let b = _mm_set1_epi16(1);
15340        let m = _mm_cmpeq_epu16_mask(a, b);
15341        assert_eq!(m, 0b11111111);
15342    }
15343
15344    #[simd_test(enable = "avx512bw,avx512vl")]
15345    unsafe fn test_mm_mask_cmpeq_epu16_mask() {
15346        let a = _mm_set1_epi16(1);
15347        let b = _mm_set1_epi16(1);
15348        let mask = 0b01010101;
15349        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
15350        assert_eq!(r, 0b01010101);
15351    }
15352
15353    #[simd_test(enable = "avx512bw")]
15354    unsafe fn test_mm512_cmpeq_epu8_mask() {
15355        let a = _mm512_set1_epi8(1);
15356        let b = _mm512_set1_epi8(1);
15357        let m = _mm512_cmpeq_epu8_mask(a, b);
15358        assert_eq!(
15359            m,
15360            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15361        );
15362    }
15363
15364    #[simd_test(enable = "avx512bw")]
15365    unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
15366        let a = _mm512_set1_epi8(1);
15367        let b = _mm512_set1_epi8(1);
15368        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15369        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
15370        assert_eq!(
15371            r,
15372            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15373        );
15374    }
15375
15376    #[simd_test(enable = "avx512bw,avx512vl")]
15377    unsafe fn test_mm256_cmpeq_epu8_mask() {
15378        let a = _mm256_set1_epi8(1);
15379        let b = _mm256_set1_epi8(1);
15380        let m = _mm256_cmpeq_epu8_mask(a, b);
15381        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15382    }
15383
15384    #[simd_test(enable = "avx512bw,avx512vl")]
15385    unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
15386        let a = _mm256_set1_epi8(1);
15387        let b = _mm256_set1_epi8(1);
15388        let mask = 0b01010101_01010101_01010101_01010101;
15389        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
15390        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15391    }
15392
15393    #[simd_test(enable = "avx512bw,avx512vl")]
15394    unsafe fn test_mm_cmpeq_epu8_mask() {
15395        let a = _mm_set1_epi8(1);
15396        let b = _mm_set1_epi8(1);
15397        let m = _mm_cmpeq_epu8_mask(a, b);
15398        assert_eq!(m, 0b11111111_11111111);
15399    }
15400
15401    #[simd_test(enable = "avx512bw,avx512vl")]
15402    unsafe fn test_mm_mask_cmpeq_epu8_mask() {
15403        let a = _mm_set1_epi8(1);
15404        let b = _mm_set1_epi8(1);
15405        let mask = 0b01010101_01010101;
15406        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
15407        assert_eq!(r, 0b01010101_01010101);
15408    }
15409
15410    #[simd_test(enable = "avx512bw")]
15411    unsafe fn test_mm512_cmpeq_epi16_mask() {
15412        let a = _mm512_set1_epi16(-1);
15413        let b = _mm512_set1_epi16(-1);
15414        let m = _mm512_cmpeq_epi16_mask(a, b);
15415        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15416    }
15417
15418    #[simd_test(enable = "avx512bw")]
15419    unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
15420        let a = _mm512_set1_epi16(-1);
15421        let b = _mm512_set1_epi16(-1);
15422        let mask = 0b01010101_01010101_01010101_01010101;
15423        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
15424        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15425    }
15426
15427    #[simd_test(enable = "avx512bw,avx512vl")]
15428    unsafe fn test_mm256_cmpeq_epi16_mask() {
15429        let a = _mm256_set1_epi16(-1);
15430        let b = _mm256_set1_epi16(-1);
15431        let m = _mm256_cmpeq_epi16_mask(a, b);
15432        assert_eq!(m, 0b11111111_11111111);
15433    }
15434
15435    #[simd_test(enable = "avx512bw,avx512vl")]
15436    unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
15437        let a = _mm256_set1_epi16(-1);
15438        let b = _mm256_set1_epi16(-1);
15439        let mask = 0b01010101_01010101;
15440        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
15441        assert_eq!(r, 0b01010101_01010101);
15442    }
15443
15444    #[simd_test(enable = "avx512bw,avx512vl")]
15445    unsafe fn test_mm_cmpeq_epi16_mask() {
15446        let a = _mm_set1_epi16(-1);
15447        let b = _mm_set1_epi16(-1);
15448        let m = _mm_cmpeq_epi16_mask(a, b);
15449        assert_eq!(m, 0b11111111);
15450    }
15451
15452    #[simd_test(enable = "avx512bw,avx512vl")]
15453    unsafe fn test_mm_mask_cmpeq_epi16_mask() {
15454        let a = _mm_set1_epi16(-1);
15455        let b = _mm_set1_epi16(-1);
15456        let mask = 0b01010101;
15457        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
15458        assert_eq!(r, 0b01010101);
15459    }
15460
15461    #[simd_test(enable = "avx512bw")]
15462    unsafe fn test_mm512_cmpeq_epi8_mask() {
15463        let a = _mm512_set1_epi8(-1);
15464        let b = _mm512_set1_epi8(-1);
15465        let m = _mm512_cmpeq_epi8_mask(a, b);
15466        assert_eq!(
15467            m,
15468            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15469        );
15470    }
15471
15472    #[simd_test(enable = "avx512bw")]
15473    unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
15474        let a = _mm512_set1_epi8(-1);
15475        let b = _mm512_set1_epi8(-1);
15476        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15477        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
15478        assert_eq!(
15479            r,
15480            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15481        );
15482    }
15483
15484    #[simd_test(enable = "avx512bw,avx512vl")]
15485    unsafe fn test_mm256_cmpeq_epi8_mask() {
15486        let a = _mm256_set1_epi8(-1);
15487        let b = _mm256_set1_epi8(-1);
15488        let m = _mm256_cmpeq_epi8_mask(a, b);
15489        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15490    }
15491
15492    #[simd_test(enable = "avx512bw,avx512vl")]
15493    unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
15494        let a = _mm256_set1_epi8(-1);
15495        let b = _mm256_set1_epi8(-1);
15496        let mask = 0b01010101_01010101_01010101_01010101;
15497        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
15498        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15499    }
15500
15501    #[simd_test(enable = "avx512bw,avx512vl")]
15502    unsafe fn test_mm_cmpeq_epi8_mask() {
15503        let a = _mm_set1_epi8(-1);
15504        let b = _mm_set1_epi8(-1);
15505        let m = _mm_cmpeq_epi8_mask(a, b);
15506        assert_eq!(m, 0b11111111_11111111);
15507    }
15508
15509    #[simd_test(enable = "avx512bw,avx512vl")]
15510    unsafe fn test_mm_mask_cmpeq_epi8_mask() {
15511        let a = _mm_set1_epi8(-1);
15512        let b = _mm_set1_epi8(-1);
15513        let mask = 0b01010101_01010101;
15514        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
15515        assert_eq!(r, 0b01010101_01010101);
15516    }
15517
15518    #[simd_test(enable = "avx512bw")]
15519    unsafe fn test_mm512_cmpneq_epu16_mask() {
15520        let a = _mm512_set1_epi16(2);
15521        let b = _mm512_set1_epi16(1);
15522        let m = _mm512_cmpneq_epu16_mask(a, b);
15523        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15524    }
15525
15526    #[simd_test(enable = "avx512bw")]
15527    unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
15528        let a = _mm512_set1_epi16(2);
15529        let b = _mm512_set1_epi16(1);
15530        let mask = 0b01010101_01010101_01010101_01010101;
15531        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
15532        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15533    }
15534
15535    #[simd_test(enable = "avx512bw,avx512vl")]
15536    unsafe fn test_mm256_cmpneq_epu16_mask() {
15537        let a = _mm256_set1_epi16(2);
15538        let b = _mm256_set1_epi16(1);
15539        let m = _mm256_cmpneq_epu16_mask(a, b);
15540        assert_eq!(m, 0b11111111_11111111);
15541    }
15542
15543    #[simd_test(enable = "avx512bw,avx512vl")]
15544    unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
15545        let a = _mm256_set1_epi16(2);
15546        let b = _mm256_set1_epi16(1);
15547        let mask = 0b01010101_01010101;
15548        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
15549        assert_eq!(r, 0b01010101_01010101);
15550    }
15551
15552    #[simd_test(enable = "avx512bw,avx512vl")]
15553    unsafe fn test_mm_cmpneq_epu16_mask() {
15554        let a = _mm_set1_epi16(2);
15555        let b = _mm_set1_epi16(1);
15556        let m = _mm_cmpneq_epu16_mask(a, b);
15557        assert_eq!(m, 0b11111111);
15558    }
15559
15560    #[simd_test(enable = "avx512bw,avx512vl")]
15561    unsafe fn test_mm_mask_cmpneq_epu16_mask() {
15562        let a = _mm_set1_epi16(2);
15563        let b = _mm_set1_epi16(1);
15564        let mask = 0b01010101;
15565        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
15566        assert_eq!(r, 0b01010101);
15567    }
15568
15569    #[simd_test(enable = "avx512bw")]
15570    unsafe fn test_mm512_cmpneq_epu8_mask() {
15571        let a = _mm512_set1_epi8(2);
15572        let b = _mm512_set1_epi8(1);
15573        let m = _mm512_cmpneq_epu8_mask(a, b);
15574        assert_eq!(
15575            m,
15576            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15577        );
15578    }
15579
15580    #[simd_test(enable = "avx512bw")]
15581    unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
15582        let a = _mm512_set1_epi8(2);
15583        let b = _mm512_set1_epi8(1);
15584        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15585        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
15586        assert_eq!(
15587            r,
15588            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15589        );
15590    }
15591
15592    #[simd_test(enable = "avx512bw,avx512vl")]
15593    unsafe fn test_mm256_cmpneq_epu8_mask() {
15594        let a = _mm256_set1_epi8(2);
15595        let b = _mm256_set1_epi8(1);
15596        let m = _mm256_cmpneq_epu8_mask(a, b);
15597        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15598    }
15599
15600    #[simd_test(enable = "avx512bw,avx512vl")]
15601    unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
15602        let a = _mm256_set1_epi8(2);
15603        let b = _mm256_set1_epi8(1);
15604        let mask = 0b01010101_01010101_01010101_01010101;
15605        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
15606        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15607    }
15608
15609    #[simd_test(enable = "avx512bw,avx512vl")]
15610    unsafe fn test_mm_cmpneq_epu8_mask() {
15611        let a = _mm_set1_epi8(2);
15612        let b = _mm_set1_epi8(1);
15613        let m = _mm_cmpneq_epu8_mask(a, b);
15614        assert_eq!(m, 0b11111111_11111111);
15615    }
15616
15617    #[simd_test(enable = "avx512bw,avx512vl")]
15618    unsafe fn test_mm_mask_cmpneq_epu8_mask() {
15619        let a = _mm_set1_epi8(2);
15620        let b = _mm_set1_epi8(1);
15621        let mask = 0b01010101_01010101;
15622        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
15623        assert_eq!(r, 0b01010101_01010101);
15624    }
15625
15626    #[simd_test(enable = "avx512bw")]
15627    unsafe fn test_mm512_cmpneq_epi16_mask() {
15628        let a = _mm512_set1_epi16(1);
15629        let b = _mm512_set1_epi16(-1);
15630        let m = _mm512_cmpneq_epi16_mask(a, b);
15631        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15632    }
15633
15634    #[simd_test(enable = "avx512bw")]
15635    unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
15636        let a = _mm512_set1_epi16(1);
15637        let b = _mm512_set1_epi16(-1);
15638        let mask = 0b01010101_01010101_01010101_01010101;
15639        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
15640        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15641    }
15642
15643    #[simd_test(enable = "avx512bw,avx512vl")]
15644    unsafe fn test_mm256_cmpneq_epi16_mask() {
15645        let a = _mm256_set1_epi16(1);
15646        let b = _mm256_set1_epi16(-1);
15647        let m = _mm256_cmpneq_epi16_mask(a, b);
15648        assert_eq!(m, 0b11111111_11111111);
15649    }
15650
15651    #[simd_test(enable = "avx512bw,avx512vl")]
15652    unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
15653        let a = _mm256_set1_epi16(1);
15654        let b = _mm256_set1_epi16(-1);
15655        let mask = 0b01010101_01010101;
15656        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
15657        assert_eq!(r, 0b01010101_01010101);
15658    }
15659
15660    #[simd_test(enable = "avx512bw,avx512vl")]
15661    unsafe fn test_mm_cmpneq_epi16_mask() {
15662        let a = _mm_set1_epi16(1);
15663        let b = _mm_set1_epi16(-1);
15664        let m = _mm_cmpneq_epi16_mask(a, b);
15665        assert_eq!(m, 0b11111111);
15666    }
15667
15668    #[simd_test(enable = "avx512bw,avx512vl")]
15669    unsafe fn test_mm_mask_cmpneq_epi16_mask() {
15670        let a = _mm_set1_epi16(1);
15671        let b = _mm_set1_epi16(-1);
15672        let mask = 0b01010101;
15673        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
15674        assert_eq!(r, 0b01010101);
15675    }
15676
15677    #[simd_test(enable = "avx512bw")]
15678    unsafe fn test_mm512_cmpneq_epi8_mask() {
15679        let a = _mm512_set1_epi8(1);
15680        let b = _mm512_set1_epi8(-1);
15681        let m = _mm512_cmpneq_epi8_mask(a, b);
15682        assert_eq!(
15683            m,
15684            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15685        );
15686    }
15687
15688    #[simd_test(enable = "avx512bw")]
15689    unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
15690        let a = _mm512_set1_epi8(1);
15691        let b = _mm512_set1_epi8(-1);
15692        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15693        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
15694        assert_eq!(
15695            r,
15696            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15697        );
15698    }
15699
15700    #[simd_test(enable = "avx512bw,avx512vl")]
15701    unsafe fn test_mm256_cmpneq_epi8_mask() {
15702        let a = _mm256_set1_epi8(1);
15703        let b = _mm256_set1_epi8(-1);
15704        let m = _mm256_cmpneq_epi8_mask(a, b);
15705        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15706    }
15707
15708    #[simd_test(enable = "avx512bw,avx512vl")]
15709    unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
15710        let a = _mm256_set1_epi8(1);
15711        let b = _mm256_set1_epi8(-1);
15712        let mask = 0b01010101_01010101_01010101_01010101;
15713        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
15714        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15715    }
15716
15717    #[simd_test(enable = "avx512bw,avx512vl")]
15718    unsafe fn test_mm_cmpneq_epi8_mask() {
15719        let a = _mm_set1_epi8(1);
15720        let b = _mm_set1_epi8(-1);
15721        let m = _mm_cmpneq_epi8_mask(a, b);
15722        assert_eq!(m, 0b11111111_11111111);
15723    }
15724
15725    #[simd_test(enable = "avx512bw,avx512vl")]
15726    unsafe fn test_mm_mask_cmpneq_epi8_mask() {
15727        let a = _mm_set1_epi8(1);
15728        let b = _mm_set1_epi8(-1);
15729        let mask = 0b01010101_01010101;
15730        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
15731        assert_eq!(r, 0b01010101_01010101);
15732    }
15733
15734    #[simd_test(enable = "avx512bw")]
15735    unsafe fn test_mm512_cmp_epu16_mask() {
15736        let a = _mm512_set1_epi16(0);
15737        let b = _mm512_set1_epi16(1);
15738        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15739        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15740    }
15741
15742    #[simd_test(enable = "avx512bw")]
15743    unsafe fn test_mm512_mask_cmp_epu16_mask() {
15744        let a = _mm512_set1_epi16(0);
15745        let b = _mm512_set1_epi16(1);
15746        let mask = 0b01010101_01010101_01010101_01010101;
15747        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15748        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15749    }
15750
15751    #[simd_test(enable = "avx512bw,avx512vl")]
15752    unsafe fn test_mm256_cmp_epu16_mask() {
15753        let a = _mm256_set1_epi16(0);
15754        let b = _mm256_set1_epi16(1);
15755        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15756        assert_eq!(m, 0b11111111_11111111);
15757    }
15758
15759    #[simd_test(enable = "avx512bw,avx512vl")]
15760    unsafe fn test_mm256_mask_cmp_epu16_mask() {
15761        let a = _mm256_set1_epi16(0);
15762        let b = _mm256_set1_epi16(1);
15763        let mask = 0b01010101_01010101;
15764        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15765        assert_eq!(r, 0b01010101_01010101);
15766    }
15767
15768    #[simd_test(enable = "avx512bw,avx512vl")]
15769    unsafe fn test_mm_cmp_epu16_mask() {
15770        let a = _mm_set1_epi16(0);
15771        let b = _mm_set1_epi16(1);
15772        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15773        assert_eq!(m, 0b11111111);
15774    }
15775
15776    #[simd_test(enable = "avx512bw,avx512vl")]
15777    unsafe fn test_mm_mask_cmp_epu16_mask() {
15778        let a = _mm_set1_epi16(0);
15779        let b = _mm_set1_epi16(1);
15780        let mask = 0b01010101;
15781        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15782        assert_eq!(r, 0b01010101);
15783    }
15784
15785    #[simd_test(enable = "avx512bw")]
15786    unsafe fn test_mm512_cmp_epu8_mask() {
15787        let a = _mm512_set1_epi8(0);
15788        let b = _mm512_set1_epi8(1);
15789        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15790        assert_eq!(
15791            m,
15792            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15793        );
15794    }
15795
15796    #[simd_test(enable = "avx512bw")]
15797    unsafe fn test_mm512_mask_cmp_epu8_mask() {
15798        let a = _mm512_set1_epi8(0);
15799        let b = _mm512_set1_epi8(1);
15800        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15801        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15802        assert_eq!(
15803            r,
15804            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15805        );
15806    }
15807
15808    #[simd_test(enable = "avx512bw,avx512vl")]
15809    unsafe fn test_mm256_cmp_epu8_mask() {
15810        let a = _mm256_set1_epi8(0);
15811        let b = _mm256_set1_epi8(1);
15812        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15813        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15814    }
15815
15816    #[simd_test(enable = "avx512bw,avx512vl")]
15817    unsafe fn test_mm256_mask_cmp_epu8_mask() {
15818        let a = _mm256_set1_epi8(0);
15819        let b = _mm256_set1_epi8(1);
15820        let mask = 0b01010101_01010101_01010101_01010101;
15821        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15822        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15823    }
15824
15825    #[simd_test(enable = "avx512bw,avx512vl")]
15826    unsafe fn test_mm_cmp_epu8_mask() {
15827        let a = _mm_set1_epi8(0);
15828        let b = _mm_set1_epi8(1);
15829        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15830        assert_eq!(m, 0b11111111_11111111);
15831    }
15832
15833    #[simd_test(enable = "avx512bw,avx512vl")]
15834    unsafe fn test_mm_mask_cmp_epu8_mask() {
15835        let a = _mm_set1_epi8(0);
15836        let b = _mm_set1_epi8(1);
15837        let mask = 0b01010101_01010101;
15838        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15839        assert_eq!(r, 0b01010101_01010101);
15840    }
15841
15842    #[simd_test(enable = "avx512bw")]
15843    unsafe fn test_mm512_cmp_epi16_mask() {
15844        let a = _mm512_set1_epi16(0);
15845        let b = _mm512_set1_epi16(1);
15846        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15847        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15848    }
15849
15850    #[simd_test(enable = "avx512bw")]
15851    unsafe fn test_mm512_mask_cmp_epi16_mask() {
15852        let a = _mm512_set1_epi16(0);
15853        let b = _mm512_set1_epi16(1);
15854        let mask = 0b01010101_01010101_01010101_01010101;
15855        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15856        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15857    }
15858
15859    #[simd_test(enable = "avx512bw,avx512vl")]
15860    unsafe fn test_mm256_cmp_epi16_mask() {
15861        let a = _mm256_set1_epi16(0);
15862        let b = _mm256_set1_epi16(1);
15863        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15864        assert_eq!(m, 0b11111111_11111111);
15865    }
15866
15867    #[simd_test(enable = "avx512bw,avx512vl")]
15868    unsafe fn test_mm256_mask_cmp_epi16_mask() {
15869        let a = _mm256_set1_epi16(0);
15870        let b = _mm256_set1_epi16(1);
15871        let mask = 0b01010101_01010101;
15872        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15873        assert_eq!(r, 0b01010101_01010101);
15874    }
15875
15876    #[simd_test(enable = "avx512bw,avx512vl")]
15877    unsafe fn test_mm_cmp_epi16_mask() {
15878        let a = _mm_set1_epi16(0);
15879        let b = _mm_set1_epi16(1);
15880        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15881        assert_eq!(m, 0b11111111);
15882    }
15883
15884    #[simd_test(enable = "avx512bw,avx512vl")]
15885    unsafe fn test_mm_mask_cmp_epi16_mask() {
15886        let a = _mm_set1_epi16(0);
15887        let b = _mm_set1_epi16(1);
15888        let mask = 0b01010101;
15889        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15890        assert_eq!(r, 0b01010101);
15891    }
15892
15893    #[simd_test(enable = "avx512bw")]
15894    unsafe fn test_mm512_cmp_epi8_mask() {
15895        let a = _mm512_set1_epi8(0);
15896        let b = _mm512_set1_epi8(1);
15897        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15898        assert_eq!(
15899            m,
15900            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15901        );
15902    }
15903
15904    #[simd_test(enable = "avx512bw")]
15905    unsafe fn test_mm512_mask_cmp_epi8_mask() {
15906        let a = _mm512_set1_epi8(0);
15907        let b = _mm512_set1_epi8(1);
15908        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15909        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15910        assert_eq!(
15911            r,
15912            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15913        );
15914    }
15915
15916    #[simd_test(enable = "avx512bw,avx512vl")]
15917    unsafe fn test_mm256_cmp_epi8_mask() {
15918        let a = _mm256_set1_epi8(0);
15919        let b = _mm256_set1_epi8(1);
15920        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15921        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15922    }
15923
15924    #[simd_test(enable = "avx512bw,avx512vl")]
15925    unsafe fn test_mm256_mask_cmp_epi8_mask() {
15926        let a = _mm256_set1_epi8(0);
15927        let b = _mm256_set1_epi8(1);
15928        let mask = 0b01010101_01010101_01010101_01010101;
15929        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15930        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15931    }
15932
15933    #[simd_test(enable = "avx512bw,avx512vl")]
15934    unsafe fn test_mm_cmp_epi8_mask() {
15935        let a = _mm_set1_epi8(0);
15936        let b = _mm_set1_epi8(1);
15937        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15938        assert_eq!(m, 0b11111111_11111111);
15939    }
15940
15941    #[simd_test(enable = "avx512bw,avx512vl")]
15942    unsafe fn test_mm_mask_cmp_epi8_mask() {
15943        let a = _mm_set1_epi8(0);
15944        let b = _mm_set1_epi8(1);
15945        let mask = 0b01010101_01010101;
15946        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15947        assert_eq!(r, 0b01010101_01010101);
15948    }
15949
15950    #[simd_test(enable = "avx512bw,avx512vl")]
15951    unsafe fn test_mm256_reduce_add_epi16() {
15952        let a = _mm256_set1_epi16(1);
15953        let e = _mm256_reduce_add_epi16(a);
15954        assert_eq!(16, e);
15955    }
15956
15957    #[simd_test(enable = "avx512bw,avx512vl")]
15958    unsafe fn test_mm256_mask_reduce_add_epi16() {
15959        let a = _mm256_set1_epi16(1);
15960        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
15961        assert_eq!(8, e);
15962    }
15963
15964    #[simd_test(enable = "avx512bw,avx512vl")]
15965    unsafe fn test_mm_reduce_add_epi16() {
15966        let a = _mm_set1_epi16(1);
15967        let e = _mm_reduce_add_epi16(a);
15968        assert_eq!(8, e);
15969    }
15970
15971    #[simd_test(enable = "avx512bw,avx512vl")]
15972    unsafe fn test_mm_mask_reduce_add_epi16() {
15973        let a = _mm_set1_epi16(1);
15974        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
15975        assert_eq!(4, e);
15976    }
15977
15978    #[simd_test(enable = "avx512bw,avx512vl")]
15979    unsafe fn test_mm256_reduce_add_epi8() {
15980        let a = _mm256_set1_epi8(1);
15981        let e = _mm256_reduce_add_epi8(a);
15982        assert_eq!(32, e);
15983    }
15984
15985    #[simd_test(enable = "avx512bw,avx512vl")]
15986    unsafe fn test_mm256_mask_reduce_add_epi8() {
15987        let a = _mm256_set1_epi8(1);
15988        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
15989        assert_eq!(16, e);
15990    }
15991
15992    #[simd_test(enable = "avx512bw,avx512vl")]
15993    unsafe fn test_mm_reduce_add_epi8() {
15994        let a = _mm_set1_epi8(1);
15995        let e = _mm_reduce_add_epi8(a);
15996        assert_eq!(16, e);
15997    }
15998
15999    #[simd_test(enable = "avx512bw,avx512vl")]
16000    unsafe fn test_mm_mask_reduce_add_epi8() {
16001        let a = _mm_set1_epi8(1);
16002        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
16003        assert_eq!(8, e);
16004    }
16005
16006    #[simd_test(enable = "avx512bw,avx512vl")]
16007    unsafe fn test_mm256_reduce_and_epi16() {
16008        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16009        let e = _mm256_reduce_and_epi16(a);
16010        assert_eq!(0, e);
16011    }
16012
16013    #[simd_test(enable = "avx512bw,avx512vl")]
16014    unsafe fn test_mm256_mask_reduce_and_epi16() {
16015        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16016        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
16017        assert_eq!(1, e);
16018    }
16019
16020    #[simd_test(enable = "avx512bw,avx512vl")]
16021    unsafe fn test_mm_reduce_and_epi16() {
16022        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16023        let e = _mm_reduce_and_epi16(a);
16024        assert_eq!(0, e);
16025    }
16026
16027    #[simd_test(enable = "avx512bw,avx512vl")]
16028    unsafe fn test_mm_mask_reduce_and_epi16() {
16029        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16030        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
16031        assert_eq!(1, e);
16032    }
16033
16034    #[simd_test(enable = "avx512bw,avx512vl")]
16035    unsafe fn test_mm256_reduce_and_epi8() {
16036        let a = _mm256_set_epi8(
16037            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16038            2, 2, 2,
16039        );
16040        let e = _mm256_reduce_and_epi8(a);
16041        assert_eq!(0, e);
16042    }
16043
16044    #[simd_test(enable = "avx512bw,avx512vl")]
16045    unsafe fn test_mm256_mask_reduce_and_epi8() {
16046        let a = _mm256_set_epi8(
16047            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16048            2, 2, 2,
16049        );
16050        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
16051        assert_eq!(1, e);
16052    }
16053
16054    #[simd_test(enable = "avx512bw,avx512vl")]
16055    unsafe fn test_mm_reduce_and_epi8() {
16056        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16057        let e = _mm_reduce_and_epi8(a);
16058        assert_eq!(0, e);
16059    }
16060
16061    #[simd_test(enable = "avx512bw,avx512vl")]
16062    unsafe fn test_mm_mask_reduce_and_epi8() {
16063        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16064        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
16065        assert_eq!(1, e);
16066    }
16067
16068    #[simd_test(enable = "avx512bw,avx512vl")]
16069    unsafe fn test_mm256_reduce_mul_epi16() {
16070        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16071        let e = _mm256_reduce_mul_epi16(a);
16072        assert_eq!(256, e);
16073    }
16074
16075    #[simd_test(enable = "avx512bw,avx512vl")]
16076    unsafe fn test_mm256_mask_reduce_mul_epi16() {
16077        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16078        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
16079        assert_eq!(1, e);
16080    }
16081
16082    #[simd_test(enable = "avx512bw,avx512vl")]
16083    unsafe fn test_mm_reduce_mul_epi16() {
16084        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
16085        let e = _mm_reduce_mul_epi16(a);
16086        assert_eq!(16, e);
16087    }
16088
16089    #[simd_test(enable = "avx512bw,avx512vl")]
16090    unsafe fn test_mm_mask_reduce_mul_epi16() {
16091        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16092        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
16093        assert_eq!(1, e);
16094    }
16095
16096    #[simd_test(enable = "avx512bw,avx512vl")]
16097    unsafe fn test_mm256_reduce_mul_epi8() {
16098        let a = _mm256_set_epi8(
16099            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16100            2, 2, 2,
16101        );
16102        let e = _mm256_reduce_mul_epi8(a);
16103        assert_eq!(64, e);
16104    }
16105
16106    #[simd_test(enable = "avx512bw,avx512vl")]
16107    unsafe fn test_mm256_mask_reduce_mul_epi8() {
16108        let a = _mm256_set_epi8(
16109            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16110            2, 2, 2,
16111        );
16112        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
16113        assert_eq!(1, e);
16114    }
16115
16116    #[simd_test(enable = "avx512bw,avx512vl")]
16117    unsafe fn test_mm_reduce_mul_epi8() {
16118        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16119        let e = _mm_reduce_mul_epi8(a);
16120        assert_eq!(8, e);
16121    }
16122
16123    #[simd_test(enable = "avx512bw,avx512vl")]
16124    unsafe fn test_mm_mask_reduce_mul_epi8() {
16125        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16126        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
16127        assert_eq!(1, e);
16128    }
16129
16130    #[simd_test(enable = "avx512bw,avx512vl")]
16131    unsafe fn test_mm256_reduce_max_epi16() {
16132        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16133        let e: i16 = _mm256_reduce_max_epi16(a);
16134        assert_eq!(15, e);
16135    }
16136
16137    #[simd_test(enable = "avx512bw,avx512vl")]
16138    unsafe fn test_mm256_mask_reduce_max_epi16() {
16139        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16140        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
16141        assert_eq!(7, e);
16142    }
16143
16144    #[simd_test(enable = "avx512bw,avx512vl")]
16145    unsafe fn test_mm_reduce_max_epi16() {
16146        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16147        let e: i16 = _mm_reduce_max_epi16(a);
16148        assert_eq!(7, e);
16149    }
16150
16151    #[simd_test(enable = "avx512bw,avx512vl")]
16152    unsafe fn test_mm_mask_reduce_max_epi16() {
16153        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16154        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
16155        assert_eq!(3, e);
16156    }
16157
16158    #[simd_test(enable = "avx512bw,avx512vl")]
16159    unsafe fn test_mm256_reduce_max_epi8() {
16160        let a = _mm256_set_epi8(
16161            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16162            24, 25, 26, 27, 28, 29, 30, 31,
16163        );
16164        let e: i8 = _mm256_reduce_max_epi8(a);
16165        assert_eq!(31, e);
16166    }
16167
16168    #[simd_test(enable = "avx512bw,avx512vl")]
16169    unsafe fn test_mm256_mask_reduce_max_epi8() {
16170        let a = _mm256_set_epi8(
16171            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16172            24, 25, 26, 27, 28, 29, 30, 31,
16173        );
16174        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
16175        assert_eq!(15, e);
16176    }
16177
16178    #[simd_test(enable = "avx512bw,avx512vl")]
16179    unsafe fn test_mm_reduce_max_epi8() {
16180        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16181        let e: i8 = _mm_reduce_max_epi8(a);
16182        assert_eq!(15, e);
16183    }
16184
16185    #[simd_test(enable = "avx512bw,avx512vl")]
16186    unsafe fn test_mm_mask_reduce_max_epi8() {
16187        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16188        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
16189        assert_eq!(7, e);
16190    }
16191
16192    #[simd_test(enable = "avx512bw,avx512vl")]
16193    unsafe fn test_mm256_reduce_max_epu16() {
16194        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16195        let e: u16 = _mm256_reduce_max_epu16(a);
16196        assert_eq!(15, e);
16197    }
16198
16199    #[simd_test(enable = "avx512bw,avx512vl")]
16200    unsafe fn test_mm256_mask_reduce_max_epu16() {
16201        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16202        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
16203        assert_eq!(7, e);
16204    }
16205
16206    #[simd_test(enable = "avx512bw,avx512vl")]
16207    unsafe fn test_mm_reduce_max_epu16() {
16208        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16209        let e: u16 = _mm_reduce_max_epu16(a);
16210        assert_eq!(7, e);
16211    }
16212
16213    #[simd_test(enable = "avx512bw,avx512vl")]
16214    unsafe fn test_mm_mask_reduce_max_epu16() {
16215        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16216        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
16217        assert_eq!(3, e);
16218    }
16219
16220    #[simd_test(enable = "avx512bw,avx512vl")]
16221    unsafe fn test_mm256_reduce_max_epu8() {
16222        let a = _mm256_set_epi8(
16223            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16224            24, 25, 26, 27, 28, 29, 30, 31,
16225        );
16226        let e: u8 = _mm256_reduce_max_epu8(a);
16227        assert_eq!(31, e);
16228    }
16229
16230    #[simd_test(enable = "avx512bw,avx512vl")]
16231    unsafe fn test_mm256_mask_reduce_max_epu8() {
16232        let a = _mm256_set_epi8(
16233            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16234            24, 25, 26, 27, 28, 29, 30, 31,
16235        );
16236        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16237        assert_eq!(15, e);
16238    }
16239
16240    #[simd_test(enable = "avx512bw,avx512vl")]
16241    unsafe fn test_mm_reduce_max_epu8() {
16242        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16243        let e: u8 = _mm_reduce_max_epu8(a);
16244        assert_eq!(15, e);
16245    }
16246
16247    #[simd_test(enable = "avx512bw,avx512vl")]
16248    unsafe fn test_mm_mask_reduce_max_epu8() {
16249        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16250        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16251        assert_eq!(7, e);
16252    }
16253
16254    #[simd_test(enable = "avx512bw,avx512vl")]
16255    unsafe fn test_mm256_reduce_min_epi16() {
16256        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16257        let e: i16 = _mm256_reduce_min_epi16(a);
16258        assert_eq!(0, e);
16259    }
16260
16261    #[simd_test(enable = "avx512bw,avx512vl")]
16262    unsafe fn test_mm256_mask_reduce_min_epi16() {
16263        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16264        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16265        assert_eq!(0, e);
16266    }
16267
16268    #[simd_test(enable = "avx512bw,avx512vl")]
16269    unsafe fn test_mm_reduce_min_epi16() {
16270        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16271        let e: i16 = _mm_reduce_min_epi16(a);
16272        assert_eq!(0, e);
16273    }
16274
16275    #[simd_test(enable = "avx512bw,avx512vl")]
16276    unsafe fn test_mm_mask_reduce_min_epi16() {
16277        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16278        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16279        assert_eq!(0, e);
16280    }
16281
16282    #[simd_test(enable = "avx512bw,avx512vl")]
16283    unsafe fn test_mm256_reduce_min_epi8() {
16284        let a = _mm256_set_epi8(
16285            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16286            24, 25, 26, 27, 28, 29, 30, 31,
16287        );
16288        let e: i8 = _mm256_reduce_min_epi8(a);
16289        assert_eq!(0, e);
16290    }
16291
16292    #[simd_test(enable = "avx512bw,avx512vl")]
16293    unsafe fn test_mm256_mask_reduce_min_epi8() {
16294        let a = _mm256_set_epi8(
16295            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16296            24, 25, 26, 27, 28, 29, 30, 31,
16297        );
16298        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16299        assert_eq!(0, e);
16300    }
16301
16302    #[simd_test(enable = "avx512bw,avx512vl")]
16303    unsafe fn test_mm_reduce_min_epi8() {
16304        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16305        let e: i8 = _mm_reduce_min_epi8(a);
16306        assert_eq!(0, e);
16307    }
16308
16309    #[simd_test(enable = "avx512bw,avx512vl")]
16310    unsafe fn test_mm_mask_reduce_min_epi8() {
16311        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16312        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16313        assert_eq!(0, e);
16314    }
16315
16316    #[simd_test(enable = "avx512bw,avx512vl")]
16317    unsafe fn test_mm256_reduce_min_epu16() {
16318        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16319        let e: u16 = _mm256_reduce_min_epu16(a);
16320        assert_eq!(0, e);
16321    }
16322
16323    #[simd_test(enable = "avx512bw,avx512vl")]
16324    unsafe fn test_mm256_mask_reduce_min_epu16() {
16325        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16326        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16327        assert_eq!(0, e);
16328    }
16329
16330    #[simd_test(enable = "avx512bw,avx512vl")]
16331    unsafe fn test_mm_reduce_min_epu16() {
16332        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16333        let e: u16 = _mm_reduce_min_epu16(a);
16334        assert_eq!(0, e);
16335    }
16336
16337    #[simd_test(enable = "avx512bw,avx512vl")]
16338    unsafe fn test_mm_mask_reduce_min_epu16() {
16339        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16340        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
16341        assert_eq!(0, e);
16342    }
16343
16344    #[simd_test(enable = "avx512bw,avx512vl")]
16345    unsafe fn test_mm256_reduce_min_epu8() {
16346        let a = _mm256_set_epi8(
16347            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16348            24, 25, 26, 27, 28, 29, 30, 31,
16349        );
16350        let e: u8 = _mm256_reduce_min_epu8(a);
16351        assert_eq!(0, e);
16352    }
16353
16354    #[simd_test(enable = "avx512bw,avx512vl")]
16355    unsafe fn test_mm256_mask_reduce_min_epu8() {
16356        let a = _mm256_set_epi8(
16357            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16358            24, 25, 26, 27, 28, 29, 30, 31,
16359        );
16360        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
16361        assert_eq!(0, e);
16362    }
16363
16364    #[simd_test(enable = "avx512bw,avx512vl")]
16365    unsafe fn test_mm_reduce_min_epu8() {
16366        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16367        let e: u8 = _mm_reduce_min_epu8(a);
16368        assert_eq!(0, e);
16369    }
16370
16371    #[simd_test(enable = "avx512bw,avx512vl")]
16372    unsafe fn test_mm_mask_reduce_min_epu8() {
16373        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16374        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
16375        assert_eq!(0, e);
16376    }
16377
16378    #[simd_test(enable = "avx512bw,avx512vl")]
16379    unsafe fn test_mm256_reduce_or_epi16() {
16380        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16381        let e = _mm256_reduce_or_epi16(a);
16382        assert_eq!(3, e);
16383    }
16384
16385    #[simd_test(enable = "avx512bw,avx512vl")]
16386    unsafe fn test_mm256_mask_reduce_or_epi16() {
16387        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16388        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
16389        assert_eq!(1, e);
16390    }
16391
16392    #[simd_test(enable = "avx512bw,avx512vl")]
16393    unsafe fn test_mm_reduce_or_epi16() {
16394        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16395        let e = _mm_reduce_or_epi16(a);
16396        assert_eq!(3, e);
16397    }
16398
16399    #[simd_test(enable = "avx512bw,avx512vl")]
16400    unsafe fn test_mm_mask_reduce_or_epi16() {
16401        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16402        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
16403        assert_eq!(1, e);
16404    }
16405
16406    #[simd_test(enable = "avx512bw,avx512vl")]
16407    unsafe fn test_mm256_reduce_or_epi8() {
16408        let a = _mm256_set_epi8(
16409            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16410            2, 2, 2,
16411        );
16412        let e = _mm256_reduce_or_epi8(a);
16413        assert_eq!(3, e);
16414    }
16415
16416    #[simd_test(enable = "avx512bw,avx512vl")]
16417    unsafe fn test_mm256_mask_reduce_or_epi8() {
16418        let a = _mm256_set_epi8(
16419            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16420            2, 2, 2,
16421        );
16422        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
16423        assert_eq!(1, e);
16424    }
16425
16426    #[simd_test(enable = "avx512bw,avx512vl")]
16427    unsafe fn test_mm_reduce_or_epi8() {
16428        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16429        let e = _mm_reduce_or_epi8(a);
16430        assert_eq!(3, e);
16431    }
16432
16433    #[simd_test(enable = "avx512bw,avx512vl")]
16434    unsafe fn test_mm_mask_reduce_or_epi8() {
16435        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16436        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
16437        assert_eq!(1, e);
16438    }
16439
16440    #[simd_test(enable = "avx512bw")]
16441    unsafe fn test_mm512_loadu_epi16() {
16442        #[rustfmt::skip]
16443        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16444        let r = _mm512_loadu_epi16(&a[0]);
16445        #[rustfmt::skip]
16446        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16447        assert_eq_m512i(r, e);
16448    }
16449
16450    #[simd_test(enable = "avx512bw,avx512vl")]
16451    unsafe fn test_mm256_loadu_epi16() {
16452        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16453        let r = _mm256_loadu_epi16(&a[0]);
16454        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16455        assert_eq_m256i(r, e);
16456    }
16457
16458    #[simd_test(enable = "avx512bw,avx512vl")]
16459    unsafe fn test_mm_loadu_epi16() {
16460        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
16461        let r = _mm_loadu_epi16(&a[0]);
16462        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
16463        assert_eq_m128i(r, e);
16464    }
16465
16466    #[simd_test(enable = "avx512bw")]
16467    unsafe fn test_mm512_loadu_epi8() {
16468        #[rustfmt::skip]
16469        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16470                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16471        let r = _mm512_loadu_epi8(&a[0]);
16472        #[rustfmt::skip]
16473        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
16474                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16475        assert_eq_m512i(r, e);
16476    }
16477
16478    #[simd_test(enable = "avx512bw,avx512vl")]
16479    unsafe fn test_mm256_loadu_epi8() {
16480        #[rustfmt::skip]
16481        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
16482        let r = _mm256_loadu_epi8(&a[0]);
16483        #[rustfmt::skip]
16484        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16485        assert_eq_m256i(r, e);
16486    }
16487
16488    #[simd_test(enable = "avx512bw,avx512vl")]
16489    unsafe fn test_mm_loadu_epi8() {
16490        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16491        let r = _mm_loadu_epi8(&a[0]);
16492        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
16493        assert_eq_m128i(r, e);
16494    }
16495
16496    #[simd_test(enable = "avx512bw")]
16497    unsafe fn test_mm512_storeu_epi16() {
16498        let a = _mm512_set1_epi16(9);
16499        let mut r = _mm512_undefined_epi32();
16500        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16501        assert_eq_m512i(r, a);
16502    }
16503
16504    #[simd_test(enable = "avx512bw,avx512vl")]
16505    unsafe fn test_mm256_storeu_epi16() {
16506        let a = _mm256_set1_epi16(9);
16507        let mut r = _mm256_set1_epi32(0);
16508        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16509        assert_eq_m256i(r, a);
16510    }
16511
16512    #[simd_test(enable = "avx512bw,avx512vl")]
16513    unsafe fn test_mm_storeu_epi16() {
16514        let a = _mm_set1_epi16(9);
16515        let mut r = _mm_set1_epi32(0);
16516        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
16517        assert_eq_m128i(r, a);
16518    }
16519
16520    #[simd_test(enable = "avx512bw")]
16521    unsafe fn test_mm512_storeu_epi8() {
16522        let a = _mm512_set1_epi8(9);
16523        let mut r = _mm512_undefined_epi32();
16524        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16525        assert_eq_m512i(r, a);
16526    }
16527
16528    #[simd_test(enable = "avx512bw,avx512vl")]
16529    unsafe fn test_mm256_storeu_epi8() {
16530        let a = _mm256_set1_epi8(9);
16531        let mut r = _mm256_set1_epi32(0);
16532        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16533        assert_eq_m256i(r, a);
16534    }
16535
16536    #[simd_test(enable = "avx512bw,avx512vl")]
16537    unsafe fn test_mm_storeu_epi8() {
16538        let a = _mm_set1_epi8(9);
16539        let mut r = _mm_set1_epi32(0);
16540        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
16541        assert_eq_m128i(r, a);
16542    }
16543
16544    #[simd_test(enable = "avx512f,avx512bw")]
16545    unsafe fn test_mm512_mask_loadu_epi16() {
16546        let src = _mm512_set1_epi16(42);
16547        let a = &[
16548            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16549            24, 25, 26, 27, 28, 29, 30, 31, 32,
16550        ];
16551        let p = a.as_ptr();
16552        let m = 0b10101010_11001100_11101000_11001010;
16553        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
16554        let e = &[
16555            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16556            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16557        ];
16558        let e = _mm512_loadu_epi16(e.as_ptr());
16559        assert_eq_m512i(r, e);
16560    }
16561
16562    #[simd_test(enable = "avx512f,avx512bw")]
16563    unsafe fn test_mm512_maskz_loadu_epi16() {
16564        let a = &[
16565            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16566            24, 25, 26, 27, 28, 29, 30, 31, 32,
16567        ];
16568        let p = a.as_ptr();
16569        let m = 0b10101010_11001100_11101000_11001010;
16570        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
16571        let e = &[
16572            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16573            26, 0, 28, 0, 30, 0, 32,
16574        ];
16575        let e = _mm512_loadu_epi16(e.as_ptr());
16576        assert_eq_m512i(r, e);
16577    }
16578
16579    #[simd_test(enable = "avx512f,avx512bw")]
16580    unsafe fn test_mm512_mask_storeu_epi16() {
16581        let mut r = [42_i16; 32];
16582        let a = &[
16583            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16584            24, 25, 26, 27, 28, 29, 30, 31, 32,
16585        ];
16586        let a = _mm512_loadu_epi16(a.as_ptr());
16587        let m = 0b10101010_11001100_11101000_11001010;
16588        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16589        let e = &[
16590            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16591            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16592        ];
16593        let e = _mm512_loadu_epi16(e.as_ptr());
16594        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
16595    }
16596
16597    #[simd_test(enable = "avx512f,avx512bw")]
16598    unsafe fn test_mm512_mask_loadu_epi8() {
16599        let src = _mm512_set1_epi8(42);
16600        let a = &[
16601            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16602            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16603            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16604        ];
16605        let p = a.as_ptr();
16606        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16607        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
16608        let e = &[
16609            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16610            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16611            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16612        ];
16613        let e = _mm512_loadu_epi8(e.as_ptr());
16614        assert_eq_m512i(r, e);
16615    }
16616
16617    #[simd_test(enable = "avx512f,avx512bw")]
16618    unsafe fn test_mm512_maskz_loadu_epi8() {
16619        let a = &[
16620            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16621            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16622            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16623        ];
16624        let p = a.as_ptr();
16625        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16626        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
16627        let e = &[
16628            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16629            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
16630            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
16631        ];
16632        let e = _mm512_loadu_epi8(e.as_ptr());
16633        assert_eq_m512i(r, e);
16634    }
16635
16636    #[simd_test(enable = "avx512f,avx512bw")]
16637    unsafe fn test_mm512_mask_storeu_epi8() {
16638        let mut r = [42_i8; 64];
16639        let a = &[
16640            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16641            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16642            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16643        ];
16644        let a = _mm512_loadu_epi8(a.as_ptr());
16645        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16646        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16647        let e = &[
16648            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16649            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16650            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16651        ];
16652        let e = _mm512_loadu_epi8(e.as_ptr());
16653        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
16654    }
16655
16656    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16657    unsafe fn test_mm256_mask_loadu_epi16() {
16658        let src = _mm256_set1_epi16(42);
16659        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16660        let p = a.as_ptr();
16661        let m = 0b11101000_11001010;
16662        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
16663        let e = &[
16664            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16665        ];
16666        let e = _mm256_loadu_epi16(e.as_ptr());
16667        assert_eq_m256i(r, e);
16668    }
16669
16670    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16671    unsafe fn test_mm256_maskz_loadu_epi16() {
16672        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16673        let p = a.as_ptr();
16674        let m = 0b11101000_11001010;
16675        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
16676        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16677        let e = _mm256_loadu_epi16(e.as_ptr());
16678        assert_eq_m256i(r, e);
16679    }
16680
16681    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16682    unsafe fn test_mm256_mask_storeu_epi16() {
16683        let mut r = [42_i16; 16];
16684        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16685        let a = _mm256_loadu_epi16(a.as_ptr());
16686        let m = 0b11101000_11001010;
16687        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16688        let e = &[
16689            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16690        ];
16691        let e = _mm256_loadu_epi16(e.as_ptr());
16692        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
16693    }
16694
16695    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16696    unsafe fn test_mm256_mask_loadu_epi8() {
16697        let src = _mm256_set1_epi8(42);
16698        let a = &[
16699            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16700            24, 25, 26, 27, 28, 29, 30, 31, 32,
16701        ];
16702        let p = a.as_ptr();
16703        let m = 0b10101010_11001100_11101000_11001010;
16704        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
16705        let e = &[
16706            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16707            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16708        ];
16709        let e = _mm256_loadu_epi8(e.as_ptr());
16710        assert_eq_m256i(r, e);
16711    }
16712
16713    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16714    unsafe fn test_mm256_maskz_loadu_epi8() {
16715        let a = &[
16716            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16717            24, 25, 26, 27, 28, 29, 30, 31, 32,
16718        ];
16719        let p = a.as_ptr();
16720        let m = 0b10101010_11001100_11101000_11001010;
16721        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
16722        let e = &[
16723            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16724            26, 0, 28, 0, 30, 0, 32,
16725        ];
16726        let e = _mm256_loadu_epi8(e.as_ptr());
16727        assert_eq_m256i(r, e);
16728    }
16729
16730    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16731    unsafe fn test_mm256_mask_storeu_epi8() {
16732        let mut r = [42_i8; 32];
16733        let a = &[
16734            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16735            24, 25, 26, 27, 28, 29, 30, 31, 32,
16736        ];
16737        let a = _mm256_loadu_epi8(a.as_ptr());
16738        let m = 0b10101010_11001100_11101000_11001010;
16739        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16740        let e = &[
16741            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16742            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16743        ];
16744        let e = _mm256_loadu_epi8(e.as_ptr());
16745        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
16746    }
16747
16748    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16749    unsafe fn test_mm_mask_loadu_epi16() {
16750        let src = _mm_set1_epi16(42);
16751        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16752        let p = a.as_ptr();
16753        let m = 0b11001010;
16754        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
16755        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16756        let e = _mm_loadu_epi16(e.as_ptr());
16757        assert_eq_m128i(r, e);
16758    }
16759
16760    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16761    unsafe fn test_mm_maskz_loadu_epi16() {
16762        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16763        let p = a.as_ptr();
16764        let m = 0b11001010;
16765        let r = _mm_maskz_loadu_epi16(m, black_box(p));
16766        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
16767        let e = _mm_loadu_epi16(e.as_ptr());
16768        assert_eq_m128i(r, e);
16769    }
16770
16771    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16772    unsafe fn test_mm_mask_storeu_epi16() {
16773        let mut r = [42_i16; 8];
16774        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16775        let a = _mm_loadu_epi16(a.as_ptr());
16776        let m = 0b11001010;
16777        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16778        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16779        let e = _mm_loadu_epi16(e.as_ptr());
16780        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
16781    }
16782
16783    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16784    unsafe fn test_mm_mask_loadu_epi8() {
16785        let src = _mm_set1_epi8(42);
16786        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16787        let p = a.as_ptr();
16788        let m = 0b11101000_11001010;
16789        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
16790        let e = &[
16791            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16792        ];
16793        let e = _mm_loadu_epi8(e.as_ptr());
16794        assert_eq_m128i(r, e);
16795    }
16796
16797    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16798    unsafe fn test_mm_maskz_loadu_epi8() {
16799        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16800        let p = a.as_ptr();
16801        let m = 0b11101000_11001010;
16802        let r = _mm_maskz_loadu_epi8(m, black_box(p));
16803        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16804        let e = _mm_loadu_epi8(e.as_ptr());
16805        assert_eq_m128i(r, e);
16806    }
16807
16808    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16809    unsafe fn test_mm_mask_storeu_epi8() {
16810        let mut r = [42_i8; 16];
16811        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16812        let a = _mm_loadu_epi8(a.as_ptr());
16813        let m = 0b11101000_11001010;
16814        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16815        let e = &[
16816            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16817        ];
16818        let e = _mm_loadu_epi8(e.as_ptr());
16819        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
16820    }
16821
16822    #[simd_test(enable = "avx512bw")]
16823    unsafe fn test_mm512_madd_epi16() {
16824        let a = _mm512_set1_epi16(1);
16825        let b = _mm512_set1_epi16(1);
16826        let r = _mm512_madd_epi16(a, b);
16827        let e = _mm512_set1_epi32(2);
16828        assert_eq_m512i(r, e);
16829    }
16830
16831    #[simd_test(enable = "avx512bw")]
16832    unsafe fn test_mm512_mask_madd_epi16() {
16833        let a = _mm512_set1_epi16(1);
16834        let b = _mm512_set1_epi16(1);
16835        let r = _mm512_mask_madd_epi16(a, 0, a, b);
16836        assert_eq_m512i(r, a);
16837        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
16838        let e = _mm512_set_epi32(
16839            1 << 16 | 1,
16840            1 << 16 | 1,
16841            1 << 16 | 1,
16842            1 << 16 | 1,
16843            1 << 16 | 1,
16844            1 << 16 | 1,
16845            1 << 16 | 1,
16846            1 << 16 | 1,
16847            1 << 16 | 1,
16848            1 << 16 | 1,
16849            1 << 16 | 1,
16850            1 << 16 | 1,
16851            2,
16852            2,
16853            2,
16854            2,
16855        );
16856        assert_eq_m512i(r, e);
16857    }
16858
16859    #[simd_test(enable = "avx512bw")]
16860    unsafe fn test_mm512_maskz_madd_epi16() {
16861        let a = _mm512_set1_epi16(1);
16862        let b = _mm512_set1_epi16(1);
16863        let r = _mm512_maskz_madd_epi16(0, a, b);
16864        assert_eq_m512i(r, _mm512_setzero_si512());
16865        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
16866        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
16867        assert_eq_m512i(r, e);
16868    }
16869
16870    #[simd_test(enable = "avx512bw,avx512vl")]
16871    unsafe fn test_mm256_mask_madd_epi16() {
16872        let a = _mm256_set1_epi16(1);
16873        let b = _mm256_set1_epi16(1);
16874        let r = _mm256_mask_madd_epi16(a, 0, a, b);
16875        assert_eq_m256i(r, a);
16876        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
16877        let e = _mm256_set_epi32(
16878            1 << 16 | 1,
16879            1 << 16 | 1,
16880            1 << 16 | 1,
16881            1 << 16 | 1,
16882            2,
16883            2,
16884            2,
16885            2,
16886        );
16887        assert_eq_m256i(r, e);
16888    }
16889
16890    #[simd_test(enable = "avx512bw,avx512vl")]
16891    unsafe fn test_mm256_maskz_madd_epi16() {
16892        let a = _mm256_set1_epi16(1);
16893        let b = _mm256_set1_epi16(1);
16894        let r = _mm256_maskz_madd_epi16(0, a, b);
16895        assert_eq_m256i(r, _mm256_setzero_si256());
16896        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
16897        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
16898        assert_eq_m256i(r, e);
16899    }
16900
16901    #[simd_test(enable = "avx512bw,avx512vl")]
16902    unsafe fn test_mm_mask_madd_epi16() {
16903        let a = _mm_set1_epi16(1);
16904        let b = _mm_set1_epi16(1);
16905        let r = _mm_mask_madd_epi16(a, 0, a, b);
16906        assert_eq_m128i(r, a);
16907        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
16908        let e = _mm_set_epi32(2, 2, 2, 2);
16909        assert_eq_m128i(r, e);
16910    }
16911
16912    #[simd_test(enable = "avx512bw,avx512vl")]
16913    unsafe fn test_mm_maskz_madd_epi16() {
16914        let a = _mm_set1_epi16(1);
16915        let b = _mm_set1_epi16(1);
16916        let r = _mm_maskz_madd_epi16(0, a, b);
16917        assert_eq_m128i(r, _mm_setzero_si128());
16918        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
16919        let e = _mm_set_epi32(2, 2, 2, 2);
16920        assert_eq_m128i(r, e);
16921    }
16922
16923    #[simd_test(enable = "avx512bw")]
16924    unsafe fn test_mm512_maddubs_epi16() {
16925        let a = _mm512_set1_epi8(1);
16926        let b = _mm512_set1_epi8(1);
16927        let r = _mm512_maddubs_epi16(a, b);
16928        let e = _mm512_set1_epi16(2);
16929        assert_eq_m512i(r, e);
16930    }
16931
16932    #[simd_test(enable = "avx512bw")]
16933    unsafe fn test_mm512_mask_maddubs_epi16() {
16934        let a = _mm512_set1_epi8(1);
16935        let b = _mm512_set1_epi8(1);
16936        let src = _mm512_set1_epi16(1);
16937        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
16938        assert_eq_m512i(r, src);
16939        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
16940        #[rustfmt::skip]
16941        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16942                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
16943        assert_eq_m512i(r, e);
16944    }
16945
16946    #[simd_test(enable = "avx512bw")]
16947    unsafe fn test_mm512_maskz_maddubs_epi16() {
16948        let a = _mm512_set1_epi8(1);
16949        let b = _mm512_set1_epi8(1);
16950        let r = _mm512_maskz_maddubs_epi16(0, a, b);
16951        assert_eq_m512i(r, _mm512_setzero_si512());
16952        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
16953        #[rustfmt::skip]
16954        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
16955                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16956        assert_eq_m512i(r, e);
16957    }
16958
16959    #[simd_test(enable = "avx512bw,avx512vl")]
16960    unsafe fn test_mm256_mask_maddubs_epi16() {
16961        let a = _mm256_set1_epi8(1);
16962        let b = _mm256_set1_epi8(1);
16963        let src = _mm256_set1_epi16(1);
16964        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
16965        assert_eq_m256i(r, src);
16966        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
16967        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16968        assert_eq_m256i(r, e);
16969    }
16970
16971    #[simd_test(enable = "avx512bw,avx512vl")]
16972    unsafe fn test_mm256_maskz_maddubs_epi16() {
16973        let a = _mm256_set1_epi8(1);
16974        let b = _mm256_set1_epi8(1);
16975        let r = _mm256_maskz_maddubs_epi16(0, a, b);
16976        assert_eq_m256i(r, _mm256_setzero_si256());
16977        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
16978        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16979        assert_eq_m256i(r, e);
16980    }
16981
16982    #[simd_test(enable = "avx512bw,avx512vl")]
16983    unsafe fn test_mm_mask_maddubs_epi16() {
16984        let a = _mm_set1_epi8(1);
16985        let b = _mm_set1_epi8(1);
16986        let src = _mm_set1_epi16(1);
16987        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
16988        assert_eq_m128i(r, src);
16989        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
16990        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16991        assert_eq_m128i(r, e);
16992    }
16993
16994    #[simd_test(enable = "avx512bw,avx512vl")]
16995    unsafe fn test_mm_maskz_maddubs_epi16() {
16996        let a = _mm_set1_epi8(1);
16997        let b = _mm_set1_epi8(1);
16998        let r = _mm_maskz_maddubs_epi16(0, a, b);
16999        assert_eq_m128i(r, _mm_setzero_si128());
17000        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
17001        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
17002        assert_eq_m128i(r, e);
17003    }
17004
17005    #[simd_test(enable = "avx512bw")]
17006    unsafe fn test_mm512_packs_epi32() {
17007        let a = _mm512_set1_epi32(i32::MAX);
17008        let b = _mm512_set1_epi32(1);
17009        let r = _mm512_packs_epi32(a, b);
17010        #[rustfmt::skip]
17011        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
17012                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17013        assert_eq_m512i(r, e);
17014    }
17015
17016    #[simd_test(enable = "avx512bw")]
17017    unsafe fn test_mm512_mask_packs_epi32() {
17018        let a = _mm512_set1_epi32(i32::MAX);
17019        let b = _mm512_set1_epi32(1 << 16 | 1);
17020        let r = _mm512_mask_packs_epi32(a, 0, a, b);
17021        assert_eq_m512i(r, a);
17022        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17023        #[rustfmt::skip]
17024        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17025                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17026        assert_eq_m512i(r, e);
17027    }
17028
17029    #[simd_test(enable = "avx512bw")]
17030    unsafe fn test_mm512_maskz_packs_epi32() {
17031        let a = _mm512_set1_epi32(i32::MAX);
17032        let b = _mm512_set1_epi32(1);
17033        let r = _mm512_maskz_packs_epi32(0, a, b);
17034        assert_eq_m512i(r, _mm512_setzero_si512());
17035        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
17036        #[rustfmt::skip]
17037        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17038                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17039        assert_eq_m512i(r, e);
17040    }
17041
17042    #[simd_test(enable = "avx512bw,avx512vl")]
17043    unsafe fn test_mm256_mask_packs_epi32() {
17044        let a = _mm256_set1_epi32(i32::MAX);
17045        let b = _mm256_set1_epi32(1 << 16 | 1);
17046        let r = _mm256_mask_packs_epi32(a, 0, a, b);
17047        assert_eq_m256i(r, a);
17048        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
17049        #[rustfmt::skip]
17050        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17051        assert_eq_m256i(r, e);
17052    }
17053
17054    #[simd_test(enable = "avx512bw,avx512vl")]
17055    unsafe fn test_mm256_maskz_packs_epi32() {
17056        let a = _mm256_set1_epi32(i32::MAX);
17057        let b = _mm256_set1_epi32(1);
17058        let r = _mm256_maskz_packs_epi32(0, a, b);
17059        assert_eq_m256i(r, _mm256_setzero_si256());
17060        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
17061        #[rustfmt::skip]
17062        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17063        assert_eq_m256i(r, e);
17064    }
17065
17066    #[simd_test(enable = "avx512bw,avx512vl")]
17067    unsafe fn test_mm_mask_packs_epi32() {
17068        let a = _mm_set1_epi32(i32::MAX);
17069        let b = _mm_set1_epi32(1 << 16 | 1);
17070        let r = _mm_mask_packs_epi32(a, 0, a, b);
17071        assert_eq_m128i(r, a);
17072        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
17073        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17074        assert_eq_m128i(r, e);
17075    }
17076
17077    #[simd_test(enable = "avx512bw,avx512vl")]
17078    unsafe fn test_mm_maskz_packs_epi32() {
17079        let a = _mm_set1_epi32(i32::MAX);
17080        let b = _mm_set1_epi32(1);
17081        let r = _mm_maskz_packs_epi32(0, a, b);
17082        assert_eq_m128i(r, _mm_setzero_si128());
17083        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
17084        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17085        assert_eq_m128i(r, e);
17086    }
17087
17088    #[simd_test(enable = "avx512bw")]
17089    unsafe fn test_mm512_packs_epi16() {
17090        let a = _mm512_set1_epi16(i16::MAX);
17091        let b = _mm512_set1_epi16(1);
17092        let r = _mm512_packs_epi16(a, b);
17093        #[rustfmt::skip]
17094        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17095                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17096                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17097                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17098        assert_eq_m512i(r, e);
17099    }
17100
17101    #[simd_test(enable = "avx512bw")]
17102    unsafe fn test_mm512_mask_packs_epi16() {
17103        let a = _mm512_set1_epi16(i16::MAX);
17104        let b = _mm512_set1_epi16(1 << 8 | 1);
17105        let r = _mm512_mask_packs_epi16(a, 0, a, b);
17106        assert_eq_m512i(r, a);
17107        let r = _mm512_mask_packs_epi16(
17108            b,
17109            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17110            a,
17111            b,
17112        );
17113        #[rustfmt::skip]
17114        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17115                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17116                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17117                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17118        assert_eq_m512i(r, e);
17119    }
17120
17121    #[simd_test(enable = "avx512bw")]
17122    unsafe fn test_mm512_maskz_packs_epi16() {
17123        let a = _mm512_set1_epi16(i16::MAX);
17124        let b = _mm512_set1_epi16(1);
17125        let r = _mm512_maskz_packs_epi16(0, a, b);
17126        assert_eq_m512i(r, _mm512_setzero_si512());
17127        let r = _mm512_maskz_packs_epi16(
17128            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17129            a,
17130            b,
17131        );
17132        #[rustfmt::skip]
17133        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17134                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17135                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17136                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17137        assert_eq_m512i(r, e);
17138    }
17139
17140    #[simd_test(enable = "avx512bw,avx512vl")]
17141    unsafe fn test_mm256_mask_packs_epi16() {
17142        let a = _mm256_set1_epi16(i16::MAX);
17143        let b = _mm256_set1_epi16(1 << 8 | 1);
17144        let r = _mm256_mask_packs_epi16(a, 0, a, b);
17145        assert_eq_m256i(r, a);
17146        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17147        #[rustfmt::skip]
17148        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17149                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17150        assert_eq_m256i(r, e);
17151    }
17152
17153    #[simd_test(enable = "avx512bw,avx512vl")]
17154    unsafe fn test_mm256_maskz_packs_epi16() {
17155        let a = _mm256_set1_epi16(i16::MAX);
17156        let b = _mm256_set1_epi16(1);
17157        let r = _mm256_maskz_packs_epi16(0, a, b);
17158        assert_eq_m256i(r, _mm256_setzero_si256());
17159        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
17160        #[rustfmt::skip]
17161        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17162                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17163        assert_eq_m256i(r, e);
17164    }
17165
17166    #[simd_test(enable = "avx512bw,avx512vl")]
17167    unsafe fn test_mm_mask_packs_epi16() {
17168        let a = _mm_set1_epi16(i16::MAX);
17169        let b = _mm_set1_epi16(1 << 8 | 1);
17170        let r = _mm_mask_packs_epi16(a, 0, a, b);
17171        assert_eq_m128i(r, a);
17172        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
17173        #[rustfmt::skip]
17174        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17175        assert_eq_m128i(r, e);
17176    }
17177
17178    #[simd_test(enable = "avx512bw,avx512vl")]
17179    unsafe fn test_mm_maskz_packs_epi16() {
17180        let a = _mm_set1_epi16(i16::MAX);
17181        let b = _mm_set1_epi16(1);
17182        let r = _mm_maskz_packs_epi16(0, a, b);
17183        assert_eq_m128i(r, _mm_setzero_si128());
17184        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
17185        #[rustfmt::skip]
17186        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17187        assert_eq_m128i(r, e);
17188    }
17189
17190    #[simd_test(enable = "avx512bw")]
17191    unsafe fn test_mm512_packus_epi32() {
17192        let a = _mm512_set1_epi32(-1);
17193        let b = _mm512_set1_epi32(1);
17194        let r = _mm512_packus_epi32(a, b);
17195        #[rustfmt::skip]
17196        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
17197                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
17198        assert_eq_m512i(r, e);
17199    }
17200
17201    #[simd_test(enable = "avx512bw")]
17202    unsafe fn test_mm512_mask_packus_epi32() {
17203        let a = _mm512_set1_epi32(-1);
17204        let b = _mm512_set1_epi32(1 << 16 | 1);
17205        let r = _mm512_mask_packus_epi32(a, 0, a, b);
17206        assert_eq_m512i(r, a);
17207        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17208        #[rustfmt::skip]
17209        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17210                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17211        assert_eq_m512i(r, e);
17212    }
17213
17214    #[simd_test(enable = "avx512bw")]
17215    unsafe fn test_mm512_maskz_packus_epi32() {
17216        let a = _mm512_set1_epi32(-1);
17217        let b = _mm512_set1_epi32(1);
17218        let r = _mm512_maskz_packus_epi32(0, a, b);
17219        assert_eq_m512i(r, _mm512_setzero_si512());
17220        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
17221        #[rustfmt::skip]
17222        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17223                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17224        assert_eq_m512i(r, e);
17225    }
17226
17227    #[simd_test(enable = "avx512bw,avx512vl")]
17228    unsafe fn test_mm256_mask_packus_epi32() {
17229        let a = _mm256_set1_epi32(-1);
17230        let b = _mm256_set1_epi32(1 << 16 | 1);
17231        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17232        assert_eq_m256i(r, a);
17233        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17234        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17235        assert_eq_m256i(r, e);
17236    }
17237
17238    #[simd_test(enable = "avx512bw,avx512vl")]
17239    unsafe fn test_mm256_maskz_packus_epi32() {
17240        let a = _mm256_set1_epi32(-1);
17241        let b = _mm256_set1_epi32(1);
17242        let r = _mm256_maskz_packus_epi32(0, a, b);
17243        assert_eq_m256i(r, _mm256_setzero_si256());
17244        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17245        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17246        assert_eq_m256i(r, e);
17247    }
17248
17249    #[simd_test(enable = "avx512bw,avx512vl")]
17250    unsafe fn test_mm_mask_packus_epi32() {
17251        let a = _mm_set1_epi32(-1);
17252        let b = _mm_set1_epi32(1 << 16 | 1);
17253        let r = _mm_mask_packus_epi32(a, 0, a, b);
17254        assert_eq_m128i(r, a);
17255        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17256        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17257        assert_eq_m128i(r, e);
17258    }
17259
17260    #[simd_test(enable = "avx512bw,avx512vl")]
17261    unsafe fn test_mm_maskz_packus_epi32() {
17262        let a = _mm_set1_epi32(-1);
17263        let b = _mm_set1_epi32(1);
17264        let r = _mm_maskz_packus_epi32(0, a, b);
17265        assert_eq_m128i(r, _mm_setzero_si128());
17266        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17267        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17268        assert_eq_m128i(r, e);
17269    }
17270
17271    #[simd_test(enable = "avx512bw")]
17272    unsafe fn test_mm512_packus_epi16() {
17273        let a = _mm512_set1_epi16(-1);
17274        let b = _mm512_set1_epi16(1);
17275        let r = _mm512_packus_epi16(a, b);
17276        #[rustfmt::skip]
17277        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17278                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17279                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17280                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17281        assert_eq_m512i(r, e);
17282    }
17283
17284    #[simd_test(enable = "avx512bw")]
17285    unsafe fn test_mm512_mask_packus_epi16() {
17286        let a = _mm512_set1_epi16(-1);
17287        let b = _mm512_set1_epi16(1 << 8 | 1);
17288        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17289        assert_eq_m512i(r, a);
17290        let r = _mm512_mask_packus_epi16(
17291            b,
17292            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17293            a,
17294            b,
17295        );
17296        #[rustfmt::skip]
17297        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17298                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17299                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17300                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17301        assert_eq_m512i(r, e);
17302    }
17303
17304    #[simd_test(enable = "avx512bw")]
17305    unsafe fn test_mm512_maskz_packus_epi16() {
17306        let a = _mm512_set1_epi16(-1);
17307        let b = _mm512_set1_epi16(1);
17308        let r = _mm512_maskz_packus_epi16(0, a, b);
17309        assert_eq_m512i(r, _mm512_setzero_si512());
17310        let r = _mm512_maskz_packus_epi16(
17311            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17312            a,
17313            b,
17314        );
17315        #[rustfmt::skip]
17316        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17317                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17318                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17319                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17320        assert_eq_m512i(r, e);
17321    }
17322
17323    #[simd_test(enable = "avx512bw,avx512vl")]
17324    unsafe fn test_mm256_mask_packus_epi16() {
17325        let a = _mm256_set1_epi16(-1);
17326        let b = _mm256_set1_epi16(1 << 8 | 1);
17327        let r = _mm256_mask_packus_epi16(a, 0, a, b);
17328        assert_eq_m256i(r, a);
17329        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17330        #[rustfmt::skip]
17331        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17332                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17333        assert_eq_m256i(r, e);
17334    }
17335
17336    #[simd_test(enable = "avx512bw,avx512vl")]
17337    unsafe fn test_mm256_maskz_packus_epi16() {
17338        let a = _mm256_set1_epi16(-1);
17339        let b = _mm256_set1_epi16(1);
17340        let r = _mm256_maskz_packus_epi16(0, a, b);
17341        assert_eq_m256i(r, _mm256_setzero_si256());
17342        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
17343        #[rustfmt::skip]
17344        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17345                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17346        assert_eq_m256i(r, e);
17347    }
17348
17349    #[simd_test(enable = "avx512bw,avx512vl")]
17350    unsafe fn test_mm_mask_packus_epi16() {
17351        let a = _mm_set1_epi16(-1);
17352        let b = _mm_set1_epi16(1 << 8 | 1);
17353        let r = _mm_mask_packus_epi16(a, 0, a, b);
17354        assert_eq_m128i(r, a);
17355        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
17356        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17357        assert_eq_m128i(r, e);
17358    }
17359
17360    #[simd_test(enable = "avx512bw,avx512vl")]
17361    unsafe fn test_mm_maskz_packus_epi16() {
17362        let a = _mm_set1_epi16(-1);
17363        let b = _mm_set1_epi16(1);
17364        let r = _mm_maskz_packus_epi16(0, a, b);
17365        assert_eq_m128i(r, _mm_setzero_si128());
17366        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
17367        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17368        assert_eq_m128i(r, e);
17369    }
17370
17371    #[simd_test(enable = "avx512bw")]
17372    unsafe fn test_mm512_avg_epu16() {
17373        let a = _mm512_set1_epi16(1);
17374        let b = _mm512_set1_epi16(1);
17375        let r = _mm512_avg_epu16(a, b);
17376        let e = _mm512_set1_epi16(1);
17377        assert_eq_m512i(r, e);
17378    }
17379
17380    #[simd_test(enable = "avx512bw")]
17381    unsafe fn test_mm512_mask_avg_epu16() {
17382        let a = _mm512_set1_epi16(1);
17383        let b = _mm512_set1_epi16(1);
17384        let r = _mm512_mask_avg_epu16(a, 0, a, b);
17385        assert_eq_m512i(r, a);
17386        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
17387        #[rustfmt::skip]
17388        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17389                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17390        assert_eq_m512i(r, e);
17391    }
17392
17393    #[simd_test(enable = "avx512bw")]
17394    unsafe fn test_mm512_maskz_avg_epu16() {
17395        let a = _mm512_set1_epi16(1);
17396        let b = _mm512_set1_epi16(1);
17397        let r = _mm512_maskz_avg_epu16(0, a, b);
17398        assert_eq_m512i(r, _mm512_setzero_si512());
17399        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
17400        #[rustfmt::skip]
17401        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17402                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17403        assert_eq_m512i(r, e);
17404    }
17405
17406    #[simd_test(enable = "avx512bw,avx512vl")]
17407    unsafe fn test_mm256_mask_avg_epu16() {
17408        let a = _mm256_set1_epi16(1);
17409        let b = _mm256_set1_epi16(1);
17410        let r = _mm256_mask_avg_epu16(a, 0, a, b);
17411        assert_eq_m256i(r, a);
17412        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
17413        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17414        assert_eq_m256i(r, e);
17415    }
17416
17417    #[simd_test(enable = "avx512bw,avx512vl")]
17418    unsafe fn test_mm256_maskz_avg_epu16() {
17419        let a = _mm256_set1_epi16(1);
17420        let b = _mm256_set1_epi16(1);
17421        let r = _mm256_maskz_avg_epu16(0, a, b);
17422        assert_eq_m256i(r, _mm256_setzero_si256());
17423        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
17424        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17425        assert_eq_m256i(r, e);
17426    }
17427
17428    #[simd_test(enable = "avx512bw,avx512vl")]
17429    unsafe fn test_mm_mask_avg_epu16() {
17430        let a = _mm_set1_epi16(1);
17431        let b = _mm_set1_epi16(1);
17432        let r = _mm_mask_avg_epu16(a, 0, a, b);
17433        assert_eq_m128i(r, a);
17434        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
17435        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
17436        assert_eq_m128i(r, e);
17437    }
17438
17439    #[simd_test(enable = "avx512bw,avx512vl")]
17440    unsafe fn test_mm_maskz_avg_epu16() {
17441        let a = _mm_set1_epi16(1);
17442        let b = _mm_set1_epi16(1);
17443        let r = _mm_maskz_avg_epu16(0, a, b);
17444        assert_eq_m128i(r, _mm_setzero_si128());
17445        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
17446        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
17447        assert_eq_m128i(r, e);
17448    }
17449
17450    #[simd_test(enable = "avx512bw")]
17451    unsafe fn test_mm512_avg_epu8() {
17452        let a = _mm512_set1_epi8(1);
17453        let b = _mm512_set1_epi8(1);
17454        let r = _mm512_avg_epu8(a, b);
17455        let e = _mm512_set1_epi8(1);
17456        assert_eq_m512i(r, e);
17457    }
17458
17459    #[simd_test(enable = "avx512bw")]
17460    unsafe fn test_mm512_mask_avg_epu8() {
17461        let a = _mm512_set1_epi8(1);
17462        let b = _mm512_set1_epi8(1);
17463        let r = _mm512_mask_avg_epu8(a, 0, a, b);
17464        assert_eq_m512i(r, a);
17465        let r = _mm512_mask_avg_epu8(
17466            a,
17467            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17468            a,
17469            b,
17470        );
17471        #[rustfmt::skip]
17472        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17473                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17474                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17475                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17476        assert_eq_m512i(r, e);
17477    }
17478
17479    #[simd_test(enable = "avx512bw")]
17480    unsafe fn test_mm512_maskz_avg_epu8() {
17481        let a = _mm512_set1_epi8(1);
17482        let b = _mm512_set1_epi8(1);
17483        let r = _mm512_maskz_avg_epu8(0, a, b);
17484        assert_eq_m512i(r, _mm512_setzero_si512());
17485        let r = _mm512_maskz_avg_epu8(
17486            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
17487            a,
17488            b,
17489        );
17490        #[rustfmt::skip]
17491        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17492                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17493                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17494                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17495        assert_eq_m512i(r, e);
17496    }
17497
17498    #[simd_test(enable = "avx512bw,avx512vl")]
17499    unsafe fn test_mm256_mask_avg_epu8() {
17500        let a = _mm256_set1_epi8(1);
17501        let b = _mm256_set1_epi8(1);
17502        let r = _mm256_mask_avg_epu8(a, 0, a, b);
17503        assert_eq_m256i(r, a);
17504        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
17505        #[rustfmt::skip]
17506        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17507                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17508        assert_eq_m256i(r, e);
17509    }
17510
17511    #[simd_test(enable = "avx512bw,avx512vl")]
17512    unsafe fn test_mm256_maskz_avg_epu8() {
17513        let a = _mm256_set1_epi8(1);
17514        let b = _mm256_set1_epi8(1);
17515        let r = _mm256_maskz_avg_epu8(0, a, b);
17516        assert_eq_m256i(r, _mm256_setzero_si256());
17517        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
17518        #[rustfmt::skip]
17519        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17520                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17521        assert_eq_m256i(r, e);
17522    }
17523
17524    #[simd_test(enable = "avx512bw,avx512vl")]
17525    unsafe fn test_mm_mask_avg_epu8() {
17526        let a = _mm_set1_epi8(1);
17527        let b = _mm_set1_epi8(1);
17528        let r = _mm_mask_avg_epu8(a, 0, a, b);
17529        assert_eq_m128i(r, a);
17530        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
17531        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17532        assert_eq_m128i(r, e);
17533    }
17534
17535    #[simd_test(enable = "avx512bw,avx512vl")]
17536    unsafe fn test_mm_maskz_avg_epu8() {
17537        let a = _mm_set1_epi8(1);
17538        let b = _mm_set1_epi8(1);
17539        let r = _mm_maskz_avg_epu8(0, a, b);
17540        assert_eq_m128i(r, _mm_setzero_si128());
17541        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
17542        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
17543        assert_eq_m128i(r, e);
17544    }
17545
17546    #[simd_test(enable = "avx512bw")]
17547    unsafe fn test_mm512_sll_epi16() {
17548        let a = _mm512_set1_epi16(1 << 15);
17549        let count = _mm_set1_epi16(2);
17550        let r = _mm512_sll_epi16(a, count);
17551        let e = _mm512_set1_epi16(0);
17552        assert_eq_m512i(r, e);
17553    }
17554
17555    #[simd_test(enable = "avx512bw")]
17556    unsafe fn test_mm512_mask_sll_epi16() {
17557        let a = _mm512_set1_epi16(1 << 15);
17558        let count = _mm_set1_epi16(2);
17559        let r = _mm512_mask_sll_epi16(a, 0, a, count);
17560        assert_eq_m512i(r, a);
17561        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17562        let e = _mm512_set1_epi16(0);
17563        assert_eq_m512i(r, e);
17564    }
17565
17566    #[simd_test(enable = "avx512bw")]
17567    unsafe fn test_mm512_maskz_sll_epi16() {
17568        let a = _mm512_set1_epi16(1 << 15);
17569        let count = _mm_set1_epi16(2);
17570        let r = _mm512_maskz_sll_epi16(0, a, count);
17571        assert_eq_m512i(r, _mm512_setzero_si512());
17572        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
17573        let e = _mm512_set1_epi16(0);
17574        assert_eq_m512i(r, e);
17575    }
17576
17577    #[simd_test(enable = "avx512bw,avx512vl")]
17578    unsafe fn test_mm256_mask_sll_epi16() {
17579        let a = _mm256_set1_epi16(1 << 15);
17580        let count = _mm_set1_epi16(2);
17581        let r = _mm256_mask_sll_epi16(a, 0, a, count);
17582        assert_eq_m256i(r, a);
17583        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
17584        let e = _mm256_set1_epi16(0);
17585        assert_eq_m256i(r, e);
17586    }
17587
17588    #[simd_test(enable = "avx512bw,avx512vl")]
17589    unsafe fn test_mm256_maskz_sll_epi16() {
17590        let a = _mm256_set1_epi16(1 << 15);
17591        let count = _mm_set1_epi16(2);
17592        let r = _mm256_maskz_sll_epi16(0, a, count);
17593        assert_eq_m256i(r, _mm256_setzero_si256());
17594        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
17595        let e = _mm256_set1_epi16(0);
17596        assert_eq_m256i(r, e);
17597    }
17598
17599    #[simd_test(enable = "avx512bw,avx512vl")]
17600    unsafe fn test_mm_mask_sll_epi16() {
17601        let a = _mm_set1_epi16(1 << 15);
17602        let count = _mm_set1_epi16(2);
17603        let r = _mm_mask_sll_epi16(a, 0, a, count);
17604        assert_eq_m128i(r, a);
17605        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
17606        let e = _mm_set1_epi16(0);
17607        assert_eq_m128i(r, e);
17608    }
17609
17610    #[simd_test(enable = "avx512bw,avx512vl")]
17611    unsafe fn test_mm_maskz_sll_epi16() {
17612        let a = _mm_set1_epi16(1 << 15);
17613        let count = _mm_set1_epi16(2);
17614        let r = _mm_maskz_sll_epi16(0, a, count);
17615        assert_eq_m128i(r, _mm_setzero_si128());
17616        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
17617        let e = _mm_set1_epi16(0);
17618        assert_eq_m128i(r, e);
17619    }
17620
17621    #[simd_test(enable = "avx512bw")]
17622    unsafe fn test_mm512_slli_epi16() {
17623        let a = _mm512_set1_epi16(1 << 15);
17624        let r = _mm512_slli_epi16::<1>(a);
17625        let e = _mm512_set1_epi16(0);
17626        assert_eq_m512i(r, e);
17627    }
17628
17629    #[simd_test(enable = "avx512bw")]
17630    unsafe fn test_mm512_mask_slli_epi16() {
17631        let a = _mm512_set1_epi16(1 << 15);
17632        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
17633        assert_eq_m512i(r, a);
17634        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
17635        let e = _mm512_set1_epi16(0);
17636        assert_eq_m512i(r, e);
17637    }
17638
17639    #[simd_test(enable = "avx512bw")]
17640    unsafe fn test_mm512_maskz_slli_epi16() {
17641        let a = _mm512_set1_epi16(1 << 15);
17642        let r = _mm512_maskz_slli_epi16::<1>(0, a);
17643        assert_eq_m512i(r, _mm512_setzero_si512());
17644        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
17645        let e = _mm512_set1_epi16(0);
17646        assert_eq_m512i(r, e);
17647    }
17648
17649    #[simd_test(enable = "avx512bw,avx512vl")]
17650    unsafe fn test_mm256_mask_slli_epi16() {
17651        let a = _mm256_set1_epi16(1 << 15);
17652        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
17653        assert_eq_m256i(r, a);
17654        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
17655        let e = _mm256_set1_epi16(0);
17656        assert_eq_m256i(r, e);
17657    }
17658
17659    #[simd_test(enable = "avx512bw,avx512vl")]
17660    unsafe fn test_mm256_maskz_slli_epi16() {
17661        let a = _mm256_set1_epi16(1 << 15);
17662        let r = _mm256_maskz_slli_epi16::<1>(0, a);
17663        assert_eq_m256i(r, _mm256_setzero_si256());
17664        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
17665        let e = _mm256_set1_epi16(0);
17666        assert_eq_m256i(r, e);
17667    }
17668
17669    #[simd_test(enable = "avx512bw,avx512vl")]
17670    unsafe fn test_mm_mask_slli_epi16() {
17671        let a = _mm_set1_epi16(1 << 15);
17672        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
17673        assert_eq_m128i(r, a);
17674        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
17675        let e = _mm_set1_epi16(0);
17676        assert_eq_m128i(r, e);
17677    }
17678
17679    #[simd_test(enable = "avx512bw,avx512vl")]
17680    unsafe fn test_mm_maskz_slli_epi16() {
17681        let a = _mm_set1_epi16(1 << 15);
17682        let r = _mm_maskz_slli_epi16::<1>(0, a);
17683        assert_eq_m128i(r, _mm_setzero_si128());
17684        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
17685        let e = _mm_set1_epi16(0);
17686        assert_eq_m128i(r, e);
17687    }
17688
17689    #[simd_test(enable = "avx512bw")]
17690    unsafe fn test_mm512_sllv_epi16() {
17691        let a = _mm512_set1_epi16(1 << 15);
17692        let count = _mm512_set1_epi16(2);
17693        let r = _mm512_sllv_epi16(a, count);
17694        let e = _mm512_set1_epi16(0);
17695        assert_eq_m512i(r, e);
17696    }
17697
17698    #[simd_test(enable = "avx512bw")]
17699    unsafe fn test_mm512_mask_sllv_epi16() {
17700        let a = _mm512_set1_epi16(1 << 15);
17701        let count = _mm512_set1_epi16(2);
17702        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
17703        assert_eq_m512i(r, a);
17704        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17705        let e = _mm512_set1_epi16(0);
17706        assert_eq_m512i(r, e);
17707    }
17708
17709    #[simd_test(enable = "avx512bw")]
17710    unsafe fn test_mm512_maskz_sllv_epi16() {
17711        let a = _mm512_set1_epi16(1 << 15);
17712        let count = _mm512_set1_epi16(2);
17713        let r = _mm512_maskz_sllv_epi16(0, a, count);
17714        assert_eq_m512i(r, _mm512_setzero_si512());
17715        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17716        let e = _mm512_set1_epi16(0);
17717        assert_eq_m512i(r, e);
17718    }
17719
17720    #[simd_test(enable = "avx512bw,avx512vl")]
17721    unsafe fn test_mm256_sllv_epi16() {
17722        let a = _mm256_set1_epi16(1 << 15);
17723        let count = _mm256_set1_epi16(2);
17724        let r = _mm256_sllv_epi16(a, count);
17725        let e = _mm256_set1_epi16(0);
17726        assert_eq_m256i(r, e);
17727    }
17728
17729    #[simd_test(enable = "avx512bw,avx512vl")]
17730    unsafe fn test_mm256_mask_sllv_epi16() {
17731        let a = _mm256_set1_epi16(1 << 15);
17732        let count = _mm256_set1_epi16(2);
17733        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
17734        assert_eq_m256i(r, a);
17735        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
17736        let e = _mm256_set1_epi16(0);
17737        assert_eq_m256i(r, e);
17738    }
17739
17740    #[simd_test(enable = "avx512bw,avx512vl")]
17741    unsafe fn test_mm256_maskz_sllv_epi16() {
17742        let a = _mm256_set1_epi16(1 << 15);
17743        let count = _mm256_set1_epi16(2);
17744        let r = _mm256_maskz_sllv_epi16(0, a, count);
17745        assert_eq_m256i(r, _mm256_setzero_si256());
17746        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
17747        let e = _mm256_set1_epi16(0);
17748        assert_eq_m256i(r, e);
17749    }
17750
17751    #[simd_test(enable = "avx512bw,avx512vl")]
17752    unsafe fn test_mm_sllv_epi16() {
17753        let a = _mm_set1_epi16(1 << 15);
17754        let count = _mm_set1_epi16(2);
17755        let r = _mm_sllv_epi16(a, count);
17756        let e = _mm_set1_epi16(0);
17757        assert_eq_m128i(r, e);
17758    }
17759
17760    #[simd_test(enable = "avx512bw,avx512vl")]
17761    unsafe fn test_mm_mask_sllv_epi16() {
17762        let a = _mm_set1_epi16(1 << 15);
17763        let count = _mm_set1_epi16(2);
17764        let r = _mm_mask_sllv_epi16(a, 0, a, count);
17765        assert_eq_m128i(r, a);
17766        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
17767        let e = _mm_set1_epi16(0);
17768        assert_eq_m128i(r, e);
17769    }
17770
17771    #[simd_test(enable = "avx512bw,avx512vl")]
17772    unsafe fn test_mm_maskz_sllv_epi16() {
17773        let a = _mm_set1_epi16(1 << 15);
17774        let count = _mm_set1_epi16(2);
17775        let r = _mm_maskz_sllv_epi16(0, a, count);
17776        assert_eq_m128i(r, _mm_setzero_si128());
17777        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
17778        let e = _mm_set1_epi16(0);
17779        assert_eq_m128i(r, e);
17780    }
17781
17782    #[simd_test(enable = "avx512bw")]
17783    unsafe fn test_mm512_srl_epi16() {
17784        let a = _mm512_set1_epi16(1 << 1);
17785        let count = _mm_set1_epi16(2);
17786        let r = _mm512_srl_epi16(a, count);
17787        let e = _mm512_set1_epi16(0);
17788        assert_eq_m512i(r, e);
17789    }
17790
17791    #[simd_test(enable = "avx512bw")]
17792    unsafe fn test_mm512_mask_srl_epi16() {
17793        let a = _mm512_set1_epi16(1 << 1);
17794        let count = _mm_set1_epi16(2);
17795        let r = _mm512_mask_srl_epi16(a, 0, a, count);
17796        assert_eq_m512i(r, a);
17797        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17798        let e = _mm512_set1_epi16(0);
17799        assert_eq_m512i(r, e);
17800    }
17801
17802    #[simd_test(enable = "avx512bw")]
17803    unsafe fn test_mm512_maskz_srl_epi16() {
17804        let a = _mm512_set1_epi16(1 << 1);
17805        let count = _mm_set1_epi16(2);
17806        let r = _mm512_maskz_srl_epi16(0, a, count);
17807        assert_eq_m512i(r, _mm512_setzero_si512());
17808        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
17809        let e = _mm512_set1_epi16(0);
17810        assert_eq_m512i(r, e);
17811    }
17812
17813    #[simd_test(enable = "avx512bw,avx512vl")]
17814    unsafe fn test_mm256_mask_srl_epi16() {
17815        let a = _mm256_set1_epi16(1 << 1);
17816        let count = _mm_set1_epi16(2);
17817        let r = _mm256_mask_srl_epi16(a, 0, a, count);
17818        assert_eq_m256i(r, a);
17819        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
17820        let e = _mm256_set1_epi16(0);
17821        assert_eq_m256i(r, e);
17822    }
17823
17824    #[simd_test(enable = "avx512bw,avx512vl")]
17825    unsafe fn test_mm256_maskz_srl_epi16() {
17826        let a = _mm256_set1_epi16(1 << 1);
17827        let count = _mm_set1_epi16(2);
17828        let r = _mm256_maskz_srl_epi16(0, a, count);
17829        assert_eq_m256i(r, _mm256_setzero_si256());
17830        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
17831        let e = _mm256_set1_epi16(0);
17832        assert_eq_m256i(r, e);
17833    }
17834
17835    #[simd_test(enable = "avx512bw,avx512vl")]
17836    unsafe fn test_mm_mask_srl_epi16() {
17837        let a = _mm_set1_epi16(1 << 1);
17838        let count = _mm_set1_epi16(2);
17839        let r = _mm_mask_srl_epi16(a, 0, a, count);
17840        assert_eq_m128i(r, a);
17841        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
17842        let e = _mm_set1_epi16(0);
17843        assert_eq_m128i(r, e);
17844    }
17845
17846    #[simd_test(enable = "avx512bw,avx512vl")]
17847    unsafe fn test_mm_maskz_srl_epi16() {
17848        let a = _mm_set1_epi16(1 << 1);
17849        let count = _mm_set1_epi16(2);
17850        let r = _mm_maskz_srl_epi16(0, a, count);
17851        assert_eq_m128i(r, _mm_setzero_si128());
17852        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
17853        let e = _mm_set1_epi16(0);
17854        assert_eq_m128i(r, e);
17855    }
17856
17857    #[simd_test(enable = "avx512bw")]
17858    unsafe fn test_mm512_srli_epi16() {
17859        let a = _mm512_set1_epi16(1 << 1);
17860        let r = _mm512_srli_epi16::<2>(a);
17861        let e = _mm512_set1_epi16(0);
17862        assert_eq_m512i(r, e);
17863    }
17864
17865    #[simd_test(enable = "avx512bw")]
17866    unsafe fn test_mm512_mask_srli_epi16() {
17867        let a = _mm512_set1_epi16(1 << 1);
17868        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
17869        assert_eq_m512i(r, a);
17870        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17871        let e = _mm512_set1_epi16(0);
17872        assert_eq_m512i(r, e);
17873    }
17874
17875    #[simd_test(enable = "avx512bw")]
17876    unsafe fn test_mm512_maskz_srli_epi16() {
17877        let a = _mm512_set1_epi16(1 << 1);
17878        let r = _mm512_maskz_srli_epi16::<2>(0, a);
17879        assert_eq_m512i(r, _mm512_setzero_si512());
17880        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17881        let e = _mm512_set1_epi16(0);
17882        assert_eq_m512i(r, e);
17883    }
17884
17885    #[simd_test(enable = "avx512bw,avx512vl")]
17886    unsafe fn test_mm256_mask_srli_epi16() {
17887        let a = _mm256_set1_epi16(1 << 1);
17888        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
17889        assert_eq_m256i(r, a);
17890        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
17891        let e = _mm256_set1_epi16(0);
17892        assert_eq_m256i(r, e);
17893    }
17894
17895    #[simd_test(enable = "avx512bw,avx512vl")]
17896    unsafe fn test_mm256_maskz_srli_epi16() {
17897        let a = _mm256_set1_epi16(1 << 1);
17898        let r = _mm256_maskz_srli_epi16::<2>(0, a);
17899        assert_eq_m256i(r, _mm256_setzero_si256());
17900        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
17901        let e = _mm256_set1_epi16(0);
17902        assert_eq_m256i(r, e);
17903    }
17904
17905    #[simd_test(enable = "avx512bw,avx512vl")]
17906    unsafe fn test_mm_mask_srli_epi16() {
17907        let a = _mm_set1_epi16(1 << 1);
17908        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
17909        assert_eq_m128i(r, a);
17910        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
17911        let e = _mm_set1_epi16(0);
17912        assert_eq_m128i(r, e);
17913    }
17914
17915    #[simd_test(enable = "avx512bw,avx512vl")]
17916    unsafe fn test_mm_maskz_srli_epi16() {
17917        let a = _mm_set1_epi16(1 << 1);
17918        let r = _mm_maskz_srli_epi16::<2>(0, a);
17919        assert_eq_m128i(r, _mm_setzero_si128());
17920        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
17921        let e = _mm_set1_epi16(0);
17922        assert_eq_m128i(r, e);
17923    }
17924
17925    #[simd_test(enable = "avx512bw")]
17926    unsafe fn test_mm512_srlv_epi16() {
17927        let a = _mm512_set1_epi16(1 << 1);
17928        let count = _mm512_set1_epi16(2);
17929        let r = _mm512_srlv_epi16(a, count);
17930        let e = _mm512_set1_epi16(0);
17931        assert_eq_m512i(r, e);
17932    }
17933
17934    #[simd_test(enable = "avx512bw")]
17935    unsafe fn test_mm512_mask_srlv_epi16() {
17936        let a = _mm512_set1_epi16(1 << 1);
17937        let count = _mm512_set1_epi16(2);
17938        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
17939        assert_eq_m512i(r, a);
17940        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17941        let e = _mm512_set1_epi16(0);
17942        assert_eq_m512i(r, e);
17943    }
17944
17945    #[simd_test(enable = "avx512bw")]
17946    unsafe fn test_mm512_maskz_srlv_epi16() {
17947        let a = _mm512_set1_epi16(1 << 1);
17948        let count = _mm512_set1_epi16(2);
17949        let r = _mm512_maskz_srlv_epi16(0, a, count);
17950        assert_eq_m512i(r, _mm512_setzero_si512());
17951        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17952        let e = _mm512_set1_epi16(0);
17953        assert_eq_m512i(r, e);
17954    }
17955
17956    #[simd_test(enable = "avx512bw,avx512vl")]
17957    unsafe fn test_mm256_srlv_epi16() {
17958        let a = _mm256_set1_epi16(1 << 1);
17959        let count = _mm256_set1_epi16(2);
17960        let r = _mm256_srlv_epi16(a, count);
17961        let e = _mm256_set1_epi16(0);
17962        assert_eq_m256i(r, e);
17963    }
17964
17965    #[simd_test(enable = "avx512bw,avx512vl")]
17966    unsafe fn test_mm256_mask_srlv_epi16() {
17967        let a = _mm256_set1_epi16(1 << 1);
17968        let count = _mm256_set1_epi16(2);
17969        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
17970        assert_eq_m256i(r, a);
17971        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
17972        let e = _mm256_set1_epi16(0);
17973        assert_eq_m256i(r, e);
17974    }
17975
17976    #[simd_test(enable = "avx512bw,avx512vl")]
17977    unsafe fn test_mm256_maskz_srlv_epi16() {
17978        let a = _mm256_set1_epi16(1 << 1);
17979        let count = _mm256_set1_epi16(2);
17980        let r = _mm256_maskz_srlv_epi16(0, a, count);
17981        assert_eq_m256i(r, _mm256_setzero_si256());
17982        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
17983        let e = _mm256_set1_epi16(0);
17984        assert_eq_m256i(r, e);
17985    }
17986
17987    #[simd_test(enable = "avx512bw,avx512vl")]
17988    unsafe fn test_mm_srlv_epi16() {
17989        let a = _mm_set1_epi16(1 << 1);
17990        let count = _mm_set1_epi16(2);
17991        let r = _mm_srlv_epi16(a, count);
17992        let e = _mm_set1_epi16(0);
17993        assert_eq_m128i(r, e);
17994    }
17995
17996    #[simd_test(enable = "avx512bw,avx512vl")]
17997    unsafe fn test_mm_mask_srlv_epi16() {
17998        let a = _mm_set1_epi16(1 << 1);
17999        let count = _mm_set1_epi16(2);
18000        let r = _mm_mask_srlv_epi16(a, 0, a, count);
18001        assert_eq_m128i(r, a);
18002        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
18003        let e = _mm_set1_epi16(0);
18004        assert_eq_m128i(r, e);
18005    }
18006
18007    #[simd_test(enable = "avx512bw,avx512vl")]
18008    unsafe fn test_mm_maskz_srlv_epi16() {
18009        let a = _mm_set1_epi16(1 << 1);
18010        let count = _mm_set1_epi16(2);
18011        let r = _mm_maskz_srlv_epi16(0, a, count);
18012        assert_eq_m128i(r, _mm_setzero_si128());
18013        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
18014        let e = _mm_set1_epi16(0);
18015        assert_eq_m128i(r, e);
18016    }
18017
18018    #[simd_test(enable = "avx512bw")]
18019    unsafe fn test_mm512_sra_epi16() {
18020        let a = _mm512_set1_epi16(8);
18021        let count = _mm_set1_epi16(1);
18022        let r = _mm512_sra_epi16(a, count);
18023        let e = _mm512_set1_epi16(0);
18024        assert_eq_m512i(r, e);
18025    }
18026
18027    #[simd_test(enable = "avx512bw")]
18028    unsafe fn test_mm512_mask_sra_epi16() {
18029        let a = _mm512_set1_epi16(8);
18030        let count = _mm_set1_epi16(1);
18031        let r = _mm512_mask_sra_epi16(a, 0, a, count);
18032        assert_eq_m512i(r, a);
18033        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18034        let e = _mm512_set1_epi16(0);
18035        assert_eq_m512i(r, e);
18036    }
18037
18038    #[simd_test(enable = "avx512bw")]
18039    unsafe fn test_mm512_maskz_sra_epi16() {
18040        let a = _mm512_set1_epi16(8);
18041        let count = _mm_set1_epi16(1);
18042        let r = _mm512_maskz_sra_epi16(0, a, count);
18043        assert_eq_m512i(r, _mm512_setzero_si512());
18044        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
18045        let e = _mm512_set1_epi16(0);
18046        assert_eq_m512i(r, e);
18047    }
18048
18049    #[simd_test(enable = "avx512bw,avx512vl")]
18050    unsafe fn test_mm256_mask_sra_epi16() {
18051        let a = _mm256_set1_epi16(8);
18052        let count = _mm_set1_epi16(1);
18053        let r = _mm256_mask_sra_epi16(a, 0, a, count);
18054        assert_eq_m256i(r, a);
18055        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
18056        let e = _mm256_set1_epi16(0);
18057        assert_eq_m256i(r, e);
18058    }
18059
18060    #[simd_test(enable = "avx512bw,avx512vl")]
18061    unsafe fn test_mm256_maskz_sra_epi16() {
18062        let a = _mm256_set1_epi16(8);
18063        let count = _mm_set1_epi16(1);
18064        let r = _mm256_maskz_sra_epi16(0, a, count);
18065        assert_eq_m256i(r, _mm256_setzero_si256());
18066        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
18067        let e = _mm256_set1_epi16(0);
18068        assert_eq_m256i(r, e);
18069    }
18070
18071    #[simd_test(enable = "avx512bw,avx512vl")]
18072    unsafe fn test_mm_mask_sra_epi16() {
18073        let a = _mm_set1_epi16(8);
18074        let count = _mm_set1_epi16(1);
18075        let r = _mm_mask_sra_epi16(a, 0, a, count);
18076        assert_eq_m128i(r, a);
18077        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
18078        let e = _mm_set1_epi16(0);
18079        assert_eq_m128i(r, e);
18080    }
18081
18082    #[simd_test(enable = "avx512bw,avx512vl")]
18083    unsafe fn test_mm_maskz_sra_epi16() {
18084        let a = _mm_set1_epi16(8);
18085        let count = _mm_set1_epi16(1);
18086        let r = _mm_maskz_sra_epi16(0, a, count);
18087        assert_eq_m128i(r, _mm_setzero_si128());
18088        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
18089        let e = _mm_set1_epi16(0);
18090        assert_eq_m128i(r, e);
18091    }
18092
18093    #[simd_test(enable = "avx512bw")]
18094    unsafe fn test_mm512_srai_epi16() {
18095        let a = _mm512_set1_epi16(8);
18096        let r = _mm512_srai_epi16::<2>(a);
18097        let e = _mm512_set1_epi16(2);
18098        assert_eq_m512i(r, e);
18099    }
18100
18101    #[simd_test(enable = "avx512bw")]
18102    unsafe fn test_mm512_mask_srai_epi16() {
18103        let a = _mm512_set1_epi16(8);
18104        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
18105        assert_eq_m512i(r, a);
18106        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18107        let e = _mm512_set1_epi16(2);
18108        assert_eq_m512i(r, e);
18109    }
18110
18111    #[simd_test(enable = "avx512bw")]
18112    unsafe fn test_mm512_maskz_srai_epi16() {
18113        let a = _mm512_set1_epi16(8);
18114        let r = _mm512_maskz_srai_epi16::<2>(0, a);
18115        assert_eq_m512i(r, _mm512_setzero_si512());
18116        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18117        let e = _mm512_set1_epi16(2);
18118        assert_eq_m512i(r, e);
18119    }
18120
18121    #[simd_test(enable = "avx512bw,avx512vl")]
18122    unsafe fn test_mm256_mask_srai_epi16() {
18123        let a = _mm256_set1_epi16(8);
18124        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
18125        assert_eq_m256i(r, a);
18126        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
18127        let e = _mm256_set1_epi16(2);
18128        assert_eq_m256i(r, e);
18129    }
18130
18131    #[simd_test(enable = "avx512bw,avx512vl")]
18132    unsafe fn test_mm256_maskz_srai_epi16() {
18133        let a = _mm256_set1_epi16(8);
18134        let r = _mm256_maskz_srai_epi16::<2>(0, a);
18135        assert_eq_m256i(r, _mm256_setzero_si256());
18136        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
18137        let e = _mm256_set1_epi16(2);
18138        assert_eq_m256i(r, e);
18139    }
18140
18141    #[simd_test(enable = "avx512bw,avx512vl")]
18142    unsafe fn test_mm_mask_srai_epi16() {
18143        let a = _mm_set1_epi16(8);
18144        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
18145        assert_eq_m128i(r, a);
18146        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
18147        let e = _mm_set1_epi16(2);
18148        assert_eq_m128i(r, e);
18149    }
18150
18151    #[simd_test(enable = "avx512bw,avx512vl")]
18152    unsafe fn test_mm_maskz_srai_epi16() {
18153        let a = _mm_set1_epi16(8);
18154        let r = _mm_maskz_srai_epi16::<2>(0, a);
18155        assert_eq_m128i(r, _mm_setzero_si128());
18156        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
18157        let e = _mm_set1_epi16(2);
18158        assert_eq_m128i(r, e);
18159    }
18160
18161    #[simd_test(enable = "avx512bw")]
18162    unsafe fn test_mm512_srav_epi16() {
18163        let a = _mm512_set1_epi16(8);
18164        let count = _mm512_set1_epi16(2);
18165        let r = _mm512_srav_epi16(a, count);
18166        let e = _mm512_set1_epi16(2);
18167        assert_eq_m512i(r, e);
18168    }
18169
18170    #[simd_test(enable = "avx512bw")]
18171    unsafe fn test_mm512_mask_srav_epi16() {
18172        let a = _mm512_set1_epi16(8);
18173        let count = _mm512_set1_epi16(2);
18174        let r = _mm512_mask_srav_epi16(a, 0, a, count);
18175        assert_eq_m512i(r, a);
18176        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18177        let e = _mm512_set1_epi16(2);
18178        assert_eq_m512i(r, e);
18179    }
18180
18181    #[simd_test(enable = "avx512bw")]
18182    unsafe fn test_mm512_maskz_srav_epi16() {
18183        let a = _mm512_set1_epi16(8);
18184        let count = _mm512_set1_epi16(2);
18185        let r = _mm512_maskz_srav_epi16(0, a, count);
18186        assert_eq_m512i(r, _mm512_setzero_si512());
18187        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
18188        let e = _mm512_set1_epi16(2);
18189        assert_eq_m512i(r, e);
18190    }
18191
18192    #[simd_test(enable = "avx512bw,avx512vl")]
18193    unsafe fn test_mm256_srav_epi16() {
18194        let a = _mm256_set1_epi16(8);
18195        let count = _mm256_set1_epi16(2);
18196        let r = _mm256_srav_epi16(a, count);
18197        let e = _mm256_set1_epi16(2);
18198        assert_eq_m256i(r, e);
18199    }
18200
18201    #[simd_test(enable = "avx512bw,avx512vl")]
18202    unsafe fn test_mm256_mask_srav_epi16() {
18203        let a = _mm256_set1_epi16(8);
18204        let count = _mm256_set1_epi16(2);
18205        let r = _mm256_mask_srav_epi16(a, 0, a, count);
18206        assert_eq_m256i(r, a);
18207        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
18208        let e = _mm256_set1_epi16(2);
18209        assert_eq_m256i(r, e);
18210    }
18211
18212    #[simd_test(enable = "avx512bw,avx512vl")]
18213    unsafe fn test_mm256_maskz_srav_epi16() {
18214        let a = _mm256_set1_epi16(8);
18215        let count = _mm256_set1_epi16(2);
18216        let r = _mm256_maskz_srav_epi16(0, a, count);
18217        assert_eq_m256i(r, _mm256_setzero_si256());
18218        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
18219        let e = _mm256_set1_epi16(2);
18220        assert_eq_m256i(r, e);
18221    }
18222
18223    #[simd_test(enable = "avx512bw,avx512vl")]
18224    unsafe fn test_mm_srav_epi16() {
18225        let a = _mm_set1_epi16(8);
18226        let count = _mm_set1_epi16(2);
18227        let r = _mm_srav_epi16(a, count);
18228        let e = _mm_set1_epi16(2);
18229        assert_eq_m128i(r, e);
18230    }
18231
18232    #[simd_test(enable = "avx512bw,avx512vl")]
18233    unsafe fn test_mm_mask_srav_epi16() {
18234        let a = _mm_set1_epi16(8);
18235        let count = _mm_set1_epi16(2);
18236        let r = _mm_mask_srav_epi16(a, 0, a, count);
18237        assert_eq_m128i(r, a);
18238        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18239        let e = _mm_set1_epi16(2);
18240        assert_eq_m128i(r, e);
18241    }
18242
18243    #[simd_test(enable = "avx512bw,avx512vl")]
18244    unsafe fn test_mm_maskz_srav_epi16() {
18245        let a = _mm_set1_epi16(8);
18246        let count = _mm_set1_epi16(2);
18247        let r = _mm_maskz_srav_epi16(0, a, count);
18248        assert_eq_m128i(r, _mm_setzero_si128());
18249        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18250        let e = _mm_set1_epi16(2);
18251        assert_eq_m128i(r, e);
18252    }
18253
18254    #[simd_test(enable = "avx512bw")]
18255    unsafe fn test_mm512_permutex2var_epi16() {
18256        #[rustfmt::skip]
18257        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18258                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18259        #[rustfmt::skip]
18260        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18261                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18262        let b = _mm512_set1_epi16(100);
18263        let r = _mm512_permutex2var_epi16(a, idx, b);
18264        #[rustfmt::skip]
18265        let e = _mm512_set_epi16(
18266            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18267            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18268        );
18269        assert_eq_m512i(r, e);
18270    }
18271
18272    #[simd_test(enable = "avx512bw")]
18273    unsafe fn test_mm512_mask_permutex2var_epi16() {
18274        #[rustfmt::skip]
18275        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18276                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18277        #[rustfmt::skip]
18278        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18279                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18280        let b = _mm512_set1_epi16(100);
18281        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18282        assert_eq_m512i(r, a);
18283        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18284        #[rustfmt::skip]
18285        let e = _mm512_set_epi16(
18286            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18287            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18288        );
18289        assert_eq_m512i(r, e);
18290    }
18291
18292    #[simd_test(enable = "avx512bw")]
18293    unsafe fn test_mm512_maskz_permutex2var_epi16() {
18294        #[rustfmt::skip]
18295        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18296                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18297        #[rustfmt::skip]
18298        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18299                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18300        let b = _mm512_set1_epi16(100);
18301        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18302        assert_eq_m512i(r, _mm512_setzero_si512());
18303        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18304        #[rustfmt::skip]
18305        let e = _mm512_set_epi16(
18306            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18307            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18308        );
18309        assert_eq_m512i(r, e);
18310    }
18311
18312    #[simd_test(enable = "avx512bw")]
18313    unsafe fn test_mm512_mask2_permutex2var_epi16() {
18314        #[rustfmt::skip]
18315        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18316                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18317        #[rustfmt::skip]
18318        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18319                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18320        let b = _mm512_set1_epi16(100);
18321        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
18322        assert_eq_m512i(r, idx);
18323        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
18324        #[rustfmt::skip]
18325        let e = _mm512_set_epi16(
18326            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18327            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18328        );
18329        assert_eq_m512i(r, e);
18330    }
18331
18332    #[simd_test(enable = "avx512bw,avx512vl")]
18333    unsafe fn test_mm256_permutex2var_epi16() {
18334        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18335        #[rustfmt::skip]
18336        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18337        let b = _mm256_set1_epi16(100);
18338        let r = _mm256_permutex2var_epi16(a, idx, b);
18339        let e = _mm256_set_epi16(
18340            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18341        );
18342        assert_eq_m256i(r, e);
18343    }
18344
18345    #[simd_test(enable = "avx512bw,avx512vl")]
18346    unsafe fn test_mm256_mask_permutex2var_epi16() {
18347        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18348        #[rustfmt::skip]
18349        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18350        let b = _mm256_set1_epi16(100);
18351        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
18352        assert_eq_m256i(r, a);
18353        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
18354        let e = _mm256_set_epi16(
18355            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18356        );
18357        assert_eq_m256i(r, e);
18358    }
18359
18360    #[simd_test(enable = "avx512bw,avx512vl")]
18361    unsafe fn test_mm256_maskz_permutex2var_epi16() {
18362        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18363        #[rustfmt::skip]
18364        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18365        let b = _mm256_set1_epi16(100);
18366        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
18367        assert_eq_m256i(r, _mm256_setzero_si256());
18368        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
18369        let e = _mm256_set_epi16(
18370            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18371        );
18372        assert_eq_m256i(r, e);
18373    }
18374
18375    #[simd_test(enable = "avx512bw,avx512vl")]
18376    unsafe fn test_mm256_mask2_permutex2var_epi16() {
18377        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18378        #[rustfmt::skip]
18379        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
18380        let b = _mm256_set1_epi16(100);
18381        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
18382        assert_eq_m256i(r, idx);
18383        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
18384        #[rustfmt::skip]
18385        let e = _mm256_set_epi16(
18386            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
18387        );
18388        assert_eq_m256i(r, e);
18389    }
18390
18391    #[simd_test(enable = "avx512bw,avx512vl")]
18392    unsafe fn test_mm_permutex2var_epi16() {
18393        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18394        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18395        let b = _mm_set1_epi16(100);
18396        let r = _mm_permutex2var_epi16(a, idx, b);
18397        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18398        assert_eq_m128i(r, e);
18399    }
18400
18401    #[simd_test(enable = "avx512bw,avx512vl")]
18402    unsafe fn test_mm_mask_permutex2var_epi16() {
18403        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18404        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18405        let b = _mm_set1_epi16(100);
18406        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
18407        assert_eq_m128i(r, a);
18408        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
18409        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18410        assert_eq_m128i(r, e);
18411    }
18412
18413    #[simd_test(enable = "avx512bw,avx512vl")]
18414    unsafe fn test_mm_maskz_permutex2var_epi16() {
18415        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18416        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18417        let b = _mm_set1_epi16(100);
18418        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
18419        assert_eq_m128i(r, _mm_setzero_si128());
18420        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
18421        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18422        assert_eq_m128i(r, e);
18423    }
18424
18425    #[simd_test(enable = "avx512bw,avx512vl")]
18426    unsafe fn test_mm_mask2_permutex2var_epi16() {
18427        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18428        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
18429        let b = _mm_set1_epi16(100);
18430        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
18431        assert_eq_m128i(r, idx);
18432        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
18433        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
18434        assert_eq_m128i(r, e);
18435    }
18436
18437    #[simd_test(enable = "avx512bw")]
18438    unsafe fn test_mm512_permutexvar_epi16() {
18439        let idx = _mm512_set1_epi16(1);
18440        #[rustfmt::skip]
18441        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18442                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18443        let r = _mm512_permutexvar_epi16(idx, a);
18444        let e = _mm512_set1_epi16(30);
18445        assert_eq_m512i(r, e);
18446    }
18447
18448    #[simd_test(enable = "avx512bw")]
18449    unsafe fn test_mm512_mask_permutexvar_epi16() {
18450        let idx = _mm512_set1_epi16(1);
18451        #[rustfmt::skip]
18452        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18453                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18454        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
18455        assert_eq_m512i(r, a);
18456        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
18457        let e = _mm512_set1_epi16(30);
18458        assert_eq_m512i(r, e);
18459    }
18460
18461    #[simd_test(enable = "avx512bw")]
18462    unsafe fn test_mm512_maskz_permutexvar_epi16() {
18463        let idx = _mm512_set1_epi16(1);
18464        #[rustfmt::skip]
18465        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18466                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18467        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
18468        assert_eq_m512i(r, _mm512_setzero_si512());
18469        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
18470        let e = _mm512_set1_epi16(30);
18471        assert_eq_m512i(r, e);
18472    }
18473
18474    #[simd_test(enable = "avx512bw,avx512vl")]
18475    unsafe fn test_mm256_permutexvar_epi16() {
18476        let idx = _mm256_set1_epi16(1);
18477        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18478        let r = _mm256_permutexvar_epi16(idx, a);
18479        let e = _mm256_set1_epi16(14);
18480        assert_eq_m256i(r, e);
18481    }
18482
18483    #[simd_test(enable = "avx512bw,avx512vl")]
18484    unsafe fn test_mm256_mask_permutexvar_epi16() {
18485        let idx = _mm256_set1_epi16(1);
18486        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18487        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
18488        assert_eq_m256i(r, a);
18489        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
18490        let e = _mm256_set1_epi16(14);
18491        assert_eq_m256i(r, e);
18492    }
18493
18494    #[simd_test(enable = "avx512bw,avx512vl")]
18495    unsafe fn test_mm256_maskz_permutexvar_epi16() {
18496        let idx = _mm256_set1_epi16(1);
18497        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18498        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
18499        assert_eq_m256i(r, _mm256_setzero_si256());
18500        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
18501        let e = _mm256_set1_epi16(14);
18502        assert_eq_m256i(r, e);
18503    }
18504
18505    #[simd_test(enable = "avx512bw,avx512vl")]
18506    unsafe fn test_mm_permutexvar_epi16() {
18507        let idx = _mm_set1_epi16(1);
18508        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18509        let r = _mm_permutexvar_epi16(idx, a);
18510        let e = _mm_set1_epi16(6);
18511        assert_eq_m128i(r, e);
18512    }
18513
18514    #[simd_test(enable = "avx512bw,avx512vl")]
18515    unsafe fn test_mm_mask_permutexvar_epi16() {
18516        let idx = _mm_set1_epi16(1);
18517        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18518        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
18519        assert_eq_m128i(r, a);
18520        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
18521        let e = _mm_set1_epi16(6);
18522        assert_eq_m128i(r, e);
18523    }
18524
18525    #[simd_test(enable = "avx512bw,avx512vl")]
18526    unsafe fn test_mm_maskz_permutexvar_epi16() {
18527        let idx = _mm_set1_epi16(1);
18528        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18529        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
18530        assert_eq_m128i(r, _mm_setzero_si128());
18531        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
18532        let e = _mm_set1_epi16(6);
18533        assert_eq_m128i(r, e);
18534    }
18535
18536    #[simd_test(enable = "avx512bw")]
18537    unsafe fn test_mm512_mask_blend_epi16() {
18538        let a = _mm512_set1_epi16(1);
18539        let b = _mm512_set1_epi16(2);
18540        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
18541        #[rustfmt::skip]
18542        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18543                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18544        assert_eq_m512i(r, e);
18545    }
18546
18547    #[simd_test(enable = "avx512bw,avx512vl")]
18548    unsafe fn test_mm256_mask_blend_epi16() {
18549        let a = _mm256_set1_epi16(1);
18550        let b = _mm256_set1_epi16(2);
18551        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
18552        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18553        assert_eq_m256i(r, e);
18554    }
18555
18556    #[simd_test(enable = "avx512bw,avx512vl")]
18557    unsafe fn test_mm_mask_blend_epi16() {
18558        let a = _mm_set1_epi16(1);
18559        let b = _mm_set1_epi16(2);
18560        let r = _mm_mask_blend_epi16(0b11110000, a, b);
18561        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
18562        assert_eq_m128i(r, e);
18563    }
18564
18565    #[simd_test(enable = "avx512bw")]
18566    unsafe fn test_mm512_mask_blend_epi8() {
18567        let a = _mm512_set1_epi8(1);
18568        let b = _mm512_set1_epi8(2);
18569        let r = _mm512_mask_blend_epi8(
18570            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
18571            a,
18572            b,
18573        );
18574        #[rustfmt::skip]
18575        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18576                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18577                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18578                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18579        assert_eq_m512i(r, e);
18580    }
18581
18582    #[simd_test(enable = "avx512bw,avx512vl")]
18583    unsafe fn test_mm256_mask_blend_epi8() {
18584        let a = _mm256_set1_epi8(1);
18585        let b = _mm256_set1_epi8(2);
18586        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
18587        #[rustfmt::skip]
18588        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
18589                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18590        assert_eq_m256i(r, e);
18591    }
18592
18593    #[simd_test(enable = "avx512bw,avx512vl")]
18594    unsafe fn test_mm_mask_blend_epi8() {
18595        let a = _mm_set1_epi8(1);
18596        let b = _mm_set1_epi8(2);
18597        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
18598        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18599        assert_eq_m128i(r, e);
18600    }
18601
18602    #[simd_test(enable = "avx512bw")]
18603    unsafe fn test_mm512_broadcastw_epi16() {
18604        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18605        let r = _mm512_broadcastw_epi16(a);
18606        let e = _mm512_set1_epi16(24);
18607        assert_eq_m512i(r, e);
18608    }
18609
18610    #[simd_test(enable = "avx512bw")]
18611    unsafe fn test_mm512_mask_broadcastw_epi16() {
18612        let src = _mm512_set1_epi16(1);
18613        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18614        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
18615        assert_eq_m512i(r, src);
18616        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18617        let e = _mm512_set1_epi16(24);
18618        assert_eq_m512i(r, e);
18619    }
18620
18621    #[simd_test(enable = "avx512bw")]
18622    unsafe fn test_mm512_maskz_broadcastw_epi16() {
18623        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18624        let r = _mm512_maskz_broadcastw_epi16(0, a);
18625        assert_eq_m512i(r, _mm512_setzero_si512());
18626        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
18627        let e = _mm512_set1_epi16(24);
18628        assert_eq_m512i(r, e);
18629    }
18630
18631    #[simd_test(enable = "avx512bw,avx512vl")]
18632    unsafe fn test_mm256_mask_broadcastw_epi16() {
18633        let src = _mm256_set1_epi16(1);
18634        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18635        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
18636        assert_eq_m256i(r, src);
18637        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
18638        let e = _mm256_set1_epi16(24);
18639        assert_eq_m256i(r, e);
18640    }
18641
18642    #[simd_test(enable = "avx512bw,avx512vl")]
18643    unsafe fn test_mm256_maskz_broadcastw_epi16() {
18644        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18645        let r = _mm256_maskz_broadcastw_epi16(0, a);
18646        assert_eq_m256i(r, _mm256_setzero_si256());
18647        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
18648        let e = _mm256_set1_epi16(24);
18649        assert_eq_m256i(r, e);
18650    }
18651
18652    #[simd_test(enable = "avx512bw,avx512vl")]
18653    unsafe fn test_mm_mask_broadcastw_epi16() {
18654        let src = _mm_set1_epi16(1);
18655        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18656        let r = _mm_mask_broadcastw_epi16(src, 0, a);
18657        assert_eq_m128i(r, src);
18658        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
18659        let e = _mm_set1_epi16(24);
18660        assert_eq_m128i(r, e);
18661    }
18662
18663    #[simd_test(enable = "avx512bw,avx512vl")]
18664    unsafe fn test_mm_maskz_broadcastw_epi16() {
18665        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18666        let r = _mm_maskz_broadcastw_epi16(0, a);
18667        assert_eq_m128i(r, _mm_setzero_si128());
18668        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
18669        let e = _mm_set1_epi16(24);
18670        assert_eq_m128i(r, e);
18671    }
18672
18673    #[simd_test(enable = "avx512bw")]
18674    unsafe fn test_mm512_broadcastb_epi8() {
18675        let a = _mm_set_epi8(
18676            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18677        );
18678        let r = _mm512_broadcastb_epi8(a);
18679        let e = _mm512_set1_epi8(32);
18680        assert_eq_m512i(r, e);
18681    }
18682
18683    #[simd_test(enable = "avx512bw")]
18684    unsafe fn test_mm512_mask_broadcastb_epi8() {
18685        let src = _mm512_set1_epi8(1);
18686        let a = _mm_set_epi8(
18687            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18688        );
18689        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
18690        assert_eq_m512i(r, src);
18691        let r = _mm512_mask_broadcastb_epi8(
18692            src,
18693            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18694            a,
18695        );
18696        let e = _mm512_set1_epi8(32);
18697        assert_eq_m512i(r, e);
18698    }
18699
18700    #[simd_test(enable = "avx512bw")]
18701    unsafe fn test_mm512_maskz_broadcastb_epi8() {
18702        let a = _mm_set_epi8(
18703            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18704        );
18705        let r = _mm512_maskz_broadcastb_epi8(0, a);
18706        assert_eq_m512i(r, _mm512_setzero_si512());
18707        let r = _mm512_maskz_broadcastb_epi8(
18708            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18709            a,
18710        );
18711        let e = _mm512_set1_epi8(32);
18712        assert_eq_m512i(r, e);
18713    }
18714
18715    #[simd_test(enable = "avx512bw,avx512vl")]
18716    unsafe fn test_mm256_mask_broadcastb_epi8() {
18717        let src = _mm256_set1_epi8(1);
18718        let a = _mm_set_epi8(
18719            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18720        );
18721        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
18722        assert_eq_m256i(r, src);
18723        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18724        let e = _mm256_set1_epi8(32);
18725        assert_eq_m256i(r, e);
18726    }
18727
18728    #[simd_test(enable = "avx512bw,avx512vl")]
18729    unsafe fn test_mm256_maskz_broadcastb_epi8() {
18730        let a = _mm_set_epi8(
18731            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18732        );
18733        let r = _mm256_maskz_broadcastb_epi8(0, a);
18734        assert_eq_m256i(r, _mm256_setzero_si256());
18735        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
18736        let e = _mm256_set1_epi8(32);
18737        assert_eq_m256i(r, e);
18738    }
18739
18740    #[simd_test(enable = "avx512bw,avx512vl")]
18741    unsafe fn test_mm_mask_broadcastb_epi8() {
18742        let src = _mm_set1_epi8(1);
18743        let a = _mm_set_epi8(
18744            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18745        );
18746        let r = _mm_mask_broadcastb_epi8(src, 0, a);
18747        assert_eq_m128i(r, src);
18748        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
18749        let e = _mm_set1_epi8(32);
18750        assert_eq_m128i(r, e);
18751    }
18752
18753    #[simd_test(enable = "avx512bw,avx512vl")]
18754    unsafe fn test_mm_maskz_broadcastb_epi8() {
18755        let a = _mm_set_epi8(
18756            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18757        );
18758        let r = _mm_maskz_broadcastb_epi8(0, a);
18759        assert_eq_m128i(r, _mm_setzero_si128());
18760        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
18761        let e = _mm_set1_epi8(32);
18762        assert_eq_m128i(r, e);
18763    }
18764
18765    #[simd_test(enable = "avx512bw")]
18766    unsafe fn test_mm512_unpackhi_epi16() {
18767        #[rustfmt::skip]
18768        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18769                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18770        #[rustfmt::skip]
18771        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18772                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18773        let r = _mm512_unpackhi_epi16(a, b);
18774        #[rustfmt::skip]
18775        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18776                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18777        assert_eq_m512i(r, e);
18778    }
18779
18780    #[simd_test(enable = "avx512bw")]
18781    unsafe fn test_mm512_mask_unpackhi_epi16() {
18782        #[rustfmt::skip]
18783        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18784                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18785        #[rustfmt::skip]
18786        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18787                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18788        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
18789        assert_eq_m512i(r, a);
18790        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18791        #[rustfmt::skip]
18792        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18793                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18794        assert_eq_m512i(r, e);
18795    }
18796
18797    #[simd_test(enable = "avx512bw")]
18798    unsafe fn test_mm512_maskz_unpackhi_epi16() {
18799        #[rustfmt::skip]
18800        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18801                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18802        #[rustfmt::skip]
18803        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18804                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18805        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
18806        assert_eq_m512i(r, _mm512_setzero_si512());
18807        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
18808        #[rustfmt::skip]
18809        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18810                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18811        assert_eq_m512i(r, e);
18812    }
18813
18814    #[simd_test(enable = "avx512bw,avx512vl")]
18815    unsafe fn test_mm256_mask_unpackhi_epi16() {
18816        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18817        let b = _mm256_set_epi16(
18818            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18819        );
18820        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
18821        assert_eq_m256i(r, a);
18822        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
18823        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18824        assert_eq_m256i(r, e);
18825    }
18826
18827    #[simd_test(enable = "avx512bw,avx512vl")]
18828    unsafe fn test_mm256_maskz_unpackhi_epi16() {
18829        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18830        let b = _mm256_set_epi16(
18831            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18832        );
18833        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
18834        assert_eq_m256i(r, _mm256_setzero_si256());
18835        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
18836        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18837        assert_eq_m256i(r, e);
18838    }
18839
18840    #[simd_test(enable = "avx512bw,avx512vl")]
18841    unsafe fn test_mm_mask_unpackhi_epi16() {
18842        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18843        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18844        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
18845        assert_eq_m128i(r, a);
18846        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
18847        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18848        assert_eq_m128i(r, e);
18849    }
18850
18851    #[simd_test(enable = "avx512bw,avx512vl")]
18852    unsafe fn test_mm_maskz_unpackhi_epi16() {
18853        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18854        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18855        let r = _mm_maskz_unpackhi_epi16(0, a, b);
18856        assert_eq_m128i(r, _mm_setzero_si128());
18857        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
18858        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18859        assert_eq_m128i(r, e);
18860    }
18861
18862    #[simd_test(enable = "avx512bw")]
18863    unsafe fn test_mm512_unpackhi_epi8() {
18864        #[rustfmt::skip]
18865        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18866                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18867                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18868                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18869        #[rustfmt::skip]
18870        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18871                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18872                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18873                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18874        let r = _mm512_unpackhi_epi8(a, b);
18875        #[rustfmt::skip]
18876        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18877                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18878                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18879                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18880        assert_eq_m512i(r, e);
18881    }
18882
18883    #[simd_test(enable = "avx512bw")]
18884    unsafe fn test_mm512_mask_unpackhi_epi8() {
18885        #[rustfmt::skip]
18886        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18887                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18888                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18889                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18890        #[rustfmt::skip]
18891        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18892                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18893                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18894                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18895        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
18896        assert_eq_m512i(r, a);
18897        let r = _mm512_mask_unpackhi_epi8(
18898            a,
18899            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18900            a,
18901            b,
18902        );
18903        #[rustfmt::skip]
18904        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18905                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18906                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18907                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18908        assert_eq_m512i(r, e);
18909    }
18910
18911    #[simd_test(enable = "avx512bw")]
18912    unsafe fn test_mm512_maskz_unpackhi_epi8() {
18913        #[rustfmt::skip]
18914        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18915                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18916                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18917                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18918        #[rustfmt::skip]
18919        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18920                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18921                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18922                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18923        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
18924        assert_eq_m512i(r, _mm512_setzero_si512());
18925        let r = _mm512_maskz_unpackhi_epi8(
18926            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18927            a,
18928            b,
18929        );
18930        #[rustfmt::skip]
18931        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18932                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18933                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18934                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18935        assert_eq_m512i(r, e);
18936    }
18937
18938    #[simd_test(enable = "avx512bw,avx512vl")]
18939    unsafe fn test_mm256_mask_unpackhi_epi8() {
18940        #[rustfmt::skip]
18941        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18942                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18943        #[rustfmt::skip]
18944        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18945                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18946        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
18947        assert_eq_m256i(r, a);
18948        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18949        #[rustfmt::skip]
18950        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18951                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18952        assert_eq_m256i(r, e);
18953    }
18954
18955    #[simd_test(enable = "avx512bw,avx512vl")]
18956    unsafe fn test_mm256_maskz_unpackhi_epi8() {
18957        #[rustfmt::skip]
18958        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18959                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18960        #[rustfmt::skip]
18961        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18962                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18963        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
18964        assert_eq_m256i(r, _mm256_setzero_si256());
18965        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
18966        #[rustfmt::skip]
18967        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18968                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18969        assert_eq_m256i(r, e);
18970    }
18971
18972    #[simd_test(enable = "avx512bw,avx512vl")]
18973    unsafe fn test_mm_mask_unpackhi_epi8() {
18974        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18975        let b = _mm_set_epi8(
18976            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18977        );
18978        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
18979        assert_eq_m128i(r, a);
18980        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
18981        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18982        assert_eq_m128i(r, e);
18983    }
18984
18985    #[simd_test(enable = "avx512bw,avx512vl")]
18986    unsafe fn test_mm_maskz_unpackhi_epi8() {
18987        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18988        let b = _mm_set_epi8(
18989            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18990        );
18991        let r = _mm_maskz_unpackhi_epi8(0, a, b);
18992        assert_eq_m128i(r, _mm_setzero_si128());
18993        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
18994        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18995        assert_eq_m128i(r, e);
18996    }
18997
18998    #[simd_test(enable = "avx512bw")]
18999    unsafe fn test_mm512_unpacklo_epi16() {
19000        #[rustfmt::skip]
19001        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19002                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19003        #[rustfmt::skip]
19004        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19005                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19006        let r = _mm512_unpacklo_epi16(a, b);
19007        #[rustfmt::skip]
19008        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19009                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19010        assert_eq_m512i(r, e);
19011    }
19012
19013    #[simd_test(enable = "avx512bw")]
19014    unsafe fn test_mm512_mask_unpacklo_epi16() {
19015        #[rustfmt::skip]
19016        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19017                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19018        #[rustfmt::skip]
19019        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19020                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19021        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
19022        assert_eq_m512i(r, a);
19023        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19024        #[rustfmt::skip]
19025        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19026                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19027        assert_eq_m512i(r, e);
19028    }
19029
19030    #[simd_test(enable = "avx512bw")]
19031    unsafe fn test_mm512_maskz_unpacklo_epi16() {
19032        #[rustfmt::skip]
19033        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19034                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19035        #[rustfmt::skip]
19036        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19037                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19038        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
19039        assert_eq_m512i(r, _mm512_setzero_si512());
19040        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
19041        #[rustfmt::skip]
19042        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19043                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19044        assert_eq_m512i(r, e);
19045    }
19046
19047    #[simd_test(enable = "avx512bw,avx512vl")]
19048    unsafe fn test_mm256_mask_unpacklo_epi16() {
19049        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19050        let b = _mm256_set_epi16(
19051            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19052        );
19053        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
19054        assert_eq_m256i(r, a);
19055        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
19056        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19057        assert_eq_m256i(r, e);
19058    }
19059
19060    #[simd_test(enable = "avx512bw,avx512vl")]
19061    unsafe fn test_mm256_maskz_unpacklo_epi16() {
19062        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19063        let b = _mm256_set_epi16(
19064            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19065        );
19066        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
19067        assert_eq_m256i(r, _mm256_setzero_si256());
19068        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
19069        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19070        assert_eq_m256i(r, e);
19071    }
19072
19073    #[simd_test(enable = "avx512bw,avx512vl")]
19074    unsafe fn test_mm_mask_unpacklo_epi16() {
19075        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19076        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19077        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
19078        assert_eq_m128i(r, a);
19079        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
19080        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19081        assert_eq_m128i(r, e);
19082    }
19083
19084    #[simd_test(enable = "avx512bw,avx512vl")]
19085    unsafe fn test_mm_maskz_unpacklo_epi16() {
19086        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19087        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19088        let r = _mm_maskz_unpacklo_epi16(0, a, b);
19089        assert_eq_m128i(r, _mm_setzero_si128());
19090        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
19091        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19092        assert_eq_m128i(r, e);
19093    }
19094
19095    #[simd_test(enable = "avx512bw")]
19096    unsafe fn test_mm512_unpacklo_epi8() {
19097        #[rustfmt::skip]
19098        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19099                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19100                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19101                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19102        #[rustfmt::skip]
19103        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19104                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19105                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19106                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19107        let r = _mm512_unpacklo_epi8(a, b);
19108        #[rustfmt::skip]
19109        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19110                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19111                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19112                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19113        assert_eq_m512i(r, e);
19114    }
19115
19116    #[simd_test(enable = "avx512bw")]
19117    unsafe fn test_mm512_mask_unpacklo_epi8() {
19118        #[rustfmt::skip]
19119        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19120                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19121                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19122                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19123        #[rustfmt::skip]
19124        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19125                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19126                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19127                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19128        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
19129        assert_eq_m512i(r, a);
19130        let r = _mm512_mask_unpacklo_epi8(
19131            a,
19132            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19133            a,
19134            b,
19135        );
19136        #[rustfmt::skip]
19137        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19138                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19139                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19140                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19141        assert_eq_m512i(r, e);
19142    }
19143
19144    #[simd_test(enable = "avx512bw")]
19145    unsafe fn test_mm512_maskz_unpacklo_epi8() {
19146        #[rustfmt::skip]
19147        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19148                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19149                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19150                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19151        #[rustfmt::skip]
19152        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19153                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19154                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19155                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19156        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
19157        assert_eq_m512i(r, _mm512_setzero_si512());
19158        let r = _mm512_maskz_unpacklo_epi8(
19159            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19160            a,
19161            b,
19162        );
19163        #[rustfmt::skip]
19164        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19165                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19166                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19167                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19168        assert_eq_m512i(r, e);
19169    }
19170
19171    #[simd_test(enable = "avx512bw,avx512vl")]
19172    unsafe fn test_mm256_mask_unpacklo_epi8() {
19173        #[rustfmt::skip]
19174        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19175                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19176        #[rustfmt::skip]
19177        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19178                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19179        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
19180        assert_eq_m256i(r, a);
19181        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19182        #[rustfmt::skip]
19183        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19184                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19185        assert_eq_m256i(r, e);
19186    }
19187
19188    #[simd_test(enable = "avx512bw,avx512vl")]
19189    unsafe fn test_mm256_maskz_unpacklo_epi8() {
19190        #[rustfmt::skip]
19191        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19192                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19193        #[rustfmt::skip]
19194        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19195                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19196        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
19197        assert_eq_m256i(r, _mm256_setzero_si256());
19198        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
19199        #[rustfmt::skip]
19200        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19201                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19202        assert_eq_m256i(r, e);
19203    }
19204
19205    #[simd_test(enable = "avx512bw,avx512vl")]
19206    unsafe fn test_mm_mask_unpacklo_epi8() {
19207        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19208        let b = _mm_set_epi8(
19209            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19210        );
19211        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
19212        assert_eq_m128i(r, a);
19213        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
19214        let e = _mm_set_epi8(
19215            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19216        );
19217        assert_eq_m128i(r, e);
19218    }
19219
19220    #[simd_test(enable = "avx512bw,avx512vl")]
19221    unsafe fn test_mm_maskz_unpacklo_epi8() {
19222        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19223        let b = _mm_set_epi8(
19224            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19225        );
19226        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19227        assert_eq_m128i(r, _mm_setzero_si128());
19228        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19229        let e = _mm_set_epi8(
19230            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19231        );
19232        assert_eq_m128i(r, e);
19233    }
19234
19235    #[simd_test(enable = "avx512bw")]
19236    unsafe fn test_mm512_mask_mov_epi16() {
19237        let src = _mm512_set1_epi16(1);
19238        let a = _mm512_set1_epi16(2);
19239        let r = _mm512_mask_mov_epi16(src, 0, a);
19240        assert_eq_m512i(r, src);
19241        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19242        assert_eq_m512i(r, a);
19243    }
19244
19245    #[simd_test(enable = "avx512bw")]
19246    unsafe fn test_mm512_maskz_mov_epi16() {
19247        let a = _mm512_set1_epi16(2);
19248        let r = _mm512_maskz_mov_epi16(0, a);
19249        assert_eq_m512i(r, _mm512_setzero_si512());
19250        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19251        assert_eq_m512i(r, a);
19252    }
19253
19254    #[simd_test(enable = "avx512bw,avx512vl")]
19255    unsafe fn test_mm256_mask_mov_epi16() {
19256        let src = _mm256_set1_epi16(1);
19257        let a = _mm256_set1_epi16(2);
19258        let r = _mm256_mask_mov_epi16(src, 0, a);
19259        assert_eq_m256i(r, src);
19260        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19261        assert_eq_m256i(r, a);
19262    }
19263
19264    #[simd_test(enable = "avx512bw,avx512vl")]
19265    unsafe fn test_mm256_maskz_mov_epi16() {
19266        let a = _mm256_set1_epi16(2);
19267        let r = _mm256_maskz_mov_epi16(0, a);
19268        assert_eq_m256i(r, _mm256_setzero_si256());
19269        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19270        assert_eq_m256i(r, a);
19271    }
19272
19273    #[simd_test(enable = "avx512bw,avx512vl")]
19274    unsafe fn test_mm_mask_mov_epi16() {
19275        let src = _mm_set1_epi16(1);
19276        let a = _mm_set1_epi16(2);
19277        let r = _mm_mask_mov_epi16(src, 0, a);
19278        assert_eq_m128i(r, src);
19279        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19280        assert_eq_m128i(r, a);
19281    }
19282
19283    #[simd_test(enable = "avx512bw,avx512vl")]
19284    unsafe fn test_mm_maskz_mov_epi16() {
19285        let a = _mm_set1_epi16(2);
19286        let r = _mm_maskz_mov_epi16(0, a);
19287        assert_eq_m128i(r, _mm_setzero_si128());
19288        let r = _mm_maskz_mov_epi16(0b11111111, a);
19289        assert_eq_m128i(r, a);
19290    }
19291
19292    #[simd_test(enable = "avx512bw")]
19293    unsafe fn test_mm512_mask_mov_epi8() {
19294        let src = _mm512_set1_epi8(1);
19295        let a = _mm512_set1_epi8(2);
19296        let r = _mm512_mask_mov_epi8(src, 0, a);
19297        assert_eq_m512i(r, src);
19298        let r = _mm512_mask_mov_epi8(
19299            src,
19300            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19301            a,
19302        );
19303        assert_eq_m512i(r, a);
19304    }
19305
19306    #[simd_test(enable = "avx512bw")]
19307    unsafe fn test_mm512_maskz_mov_epi8() {
19308        let a = _mm512_set1_epi8(2);
19309        let r = _mm512_maskz_mov_epi8(0, a);
19310        assert_eq_m512i(r, _mm512_setzero_si512());
19311        let r = _mm512_maskz_mov_epi8(
19312            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19313            a,
19314        );
19315        assert_eq_m512i(r, a);
19316    }
19317
19318    #[simd_test(enable = "avx512bw,avx512vl")]
19319    unsafe fn test_mm256_mask_mov_epi8() {
19320        let src = _mm256_set1_epi8(1);
19321        let a = _mm256_set1_epi8(2);
19322        let r = _mm256_mask_mov_epi8(src, 0, a);
19323        assert_eq_m256i(r, src);
19324        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19325        assert_eq_m256i(r, a);
19326    }
19327
19328    #[simd_test(enable = "avx512bw,avx512vl")]
19329    unsafe fn test_mm256_maskz_mov_epi8() {
19330        let a = _mm256_set1_epi8(2);
19331        let r = _mm256_maskz_mov_epi8(0, a);
19332        assert_eq_m256i(r, _mm256_setzero_si256());
19333        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
19334        assert_eq_m256i(r, a);
19335    }
19336
19337    #[simd_test(enable = "avx512bw,avx512vl")]
19338    unsafe fn test_mm_mask_mov_epi8() {
19339        let src = _mm_set1_epi8(1);
19340        let a = _mm_set1_epi8(2);
19341        let r = _mm_mask_mov_epi8(src, 0, a);
19342        assert_eq_m128i(r, src);
19343        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
19344        assert_eq_m128i(r, a);
19345    }
19346
19347    #[simd_test(enable = "avx512bw,avx512vl")]
19348    unsafe fn test_mm_maskz_mov_epi8() {
19349        let a = _mm_set1_epi8(2);
19350        let r = _mm_maskz_mov_epi8(0, a);
19351        assert_eq_m128i(r, _mm_setzero_si128());
19352        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
19353        assert_eq_m128i(r, a);
19354    }
19355
19356    #[simd_test(enable = "avx512bw")]
19357    unsafe fn test_mm512_mask_set1_epi16() {
19358        let src = _mm512_set1_epi16(2);
19359        let a: i16 = 11;
19360        let r = _mm512_mask_set1_epi16(src, 0, a);
19361        assert_eq_m512i(r, src);
19362        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19363        let e = _mm512_set1_epi16(11);
19364        assert_eq_m512i(r, e);
19365    }
19366
19367    #[simd_test(enable = "avx512bw")]
19368    unsafe fn test_mm512_maskz_set1_epi16() {
19369        let a: i16 = 11;
19370        let r = _mm512_maskz_set1_epi16(0, a);
19371        assert_eq_m512i(r, _mm512_setzero_si512());
19372        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
19373        let e = _mm512_set1_epi16(11);
19374        assert_eq_m512i(r, e);
19375    }
19376
19377    #[simd_test(enable = "avx512bw,avx512vl")]
19378    unsafe fn test_mm256_mask_set1_epi16() {
19379        let src = _mm256_set1_epi16(2);
19380        let a: i16 = 11;
19381        let r = _mm256_mask_set1_epi16(src, 0, a);
19382        assert_eq_m256i(r, src);
19383        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
19384        let e = _mm256_set1_epi16(11);
19385        assert_eq_m256i(r, e);
19386    }
19387
19388    #[simd_test(enable = "avx512bw,avx512vl")]
19389    unsafe fn test_mm256_maskz_set1_epi16() {
19390        let a: i16 = 11;
19391        let r = _mm256_maskz_set1_epi16(0, a);
19392        assert_eq_m256i(r, _mm256_setzero_si256());
19393        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
19394        let e = _mm256_set1_epi16(11);
19395        assert_eq_m256i(r, e);
19396    }
19397
19398    #[simd_test(enable = "avx512bw,avx512vl")]
19399    unsafe fn test_mm_mask_set1_epi16() {
19400        let src = _mm_set1_epi16(2);
19401        let a: i16 = 11;
19402        let r = _mm_mask_set1_epi16(src, 0, a);
19403        assert_eq_m128i(r, src);
19404        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
19405        let e = _mm_set1_epi16(11);
19406        assert_eq_m128i(r, e);
19407    }
19408
19409    #[simd_test(enable = "avx512bw,avx512vl")]
19410    unsafe fn test_mm_maskz_set1_epi16() {
19411        let a: i16 = 11;
19412        let r = _mm_maskz_set1_epi16(0, a);
19413        assert_eq_m128i(r, _mm_setzero_si128());
19414        let r = _mm_maskz_set1_epi16(0b11111111, a);
19415        let e = _mm_set1_epi16(11);
19416        assert_eq_m128i(r, e);
19417    }
19418
19419    #[simd_test(enable = "avx512bw")]
19420    unsafe fn test_mm512_mask_set1_epi8() {
19421        let src = _mm512_set1_epi8(2);
19422        let a: i8 = 11;
19423        let r = _mm512_mask_set1_epi8(src, 0, a);
19424        assert_eq_m512i(r, src);
19425        let r = _mm512_mask_set1_epi8(
19426            src,
19427            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19428            a,
19429        );
19430        let e = _mm512_set1_epi8(11);
19431        assert_eq_m512i(r, e);
19432    }
19433
19434    #[simd_test(enable = "avx512bw")]
19435    unsafe fn test_mm512_maskz_set1_epi8() {
19436        let a: i8 = 11;
19437        let r = _mm512_maskz_set1_epi8(0, a);
19438        assert_eq_m512i(r, _mm512_setzero_si512());
19439        let r = _mm512_maskz_set1_epi8(
19440            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19441            a,
19442        );
19443        let e = _mm512_set1_epi8(11);
19444        assert_eq_m512i(r, e);
19445    }
19446
19447    #[simd_test(enable = "avx512bw,avx512vl")]
19448    unsafe fn test_mm256_mask_set1_epi8() {
19449        let src = _mm256_set1_epi8(2);
19450        let a: i8 = 11;
19451        let r = _mm256_mask_set1_epi8(src, 0, a);
19452        assert_eq_m256i(r, src);
19453        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19454        let e = _mm256_set1_epi8(11);
19455        assert_eq_m256i(r, e);
19456    }
19457
19458    #[simd_test(enable = "avx512bw,avx512vl")]
19459    unsafe fn test_mm256_maskz_set1_epi8() {
19460        let a: i8 = 11;
19461        let r = _mm256_maskz_set1_epi8(0, a);
19462        assert_eq_m256i(r, _mm256_setzero_si256());
19463        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
19464        let e = _mm256_set1_epi8(11);
19465        assert_eq_m256i(r, e);
19466    }
19467
19468    #[simd_test(enable = "avx512bw,avx512vl")]
19469    unsafe fn test_mm_mask_set1_epi8() {
19470        let src = _mm_set1_epi8(2);
19471        let a: i8 = 11;
19472        let r = _mm_mask_set1_epi8(src, 0, a);
19473        assert_eq_m128i(r, src);
19474        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
19475        let e = _mm_set1_epi8(11);
19476        assert_eq_m128i(r, e);
19477    }
19478
19479    #[simd_test(enable = "avx512bw,avx512vl")]
19480    unsafe fn test_mm_maskz_set1_epi8() {
19481        let a: i8 = 11;
19482        let r = _mm_maskz_set1_epi8(0, a);
19483        assert_eq_m128i(r, _mm_setzero_si128());
19484        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
19485        let e = _mm_set1_epi8(11);
19486        assert_eq_m128i(r, e);
19487    }
19488
19489    #[simd_test(enable = "avx512bw")]
19490    unsafe fn test_mm512_shufflelo_epi16() {
19491        #[rustfmt::skip]
19492        let a = _mm512_set_epi16(
19493            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19494            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19495        );
19496        #[rustfmt::skip]
19497        let e = _mm512_set_epi16(
19498            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19499            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19500        );
19501        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
19502        assert_eq_m512i(r, e);
19503    }
19504
19505    #[simd_test(enable = "avx512bw")]
19506    unsafe fn test_mm512_mask_shufflelo_epi16() {
19507        #[rustfmt::skip]
19508        let a = _mm512_set_epi16(
19509            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19510            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19511        );
19512        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19513        assert_eq_m512i(r, a);
19514        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
19515            a,
19516            0b11111111_11111111_11111111_11111111,
19517            a,
19518        );
19519        #[rustfmt::skip]
19520        let e = _mm512_set_epi16(
19521            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19522            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19523        );
19524        assert_eq_m512i(r, e);
19525    }
19526
19527    #[simd_test(enable = "avx512bw")]
19528    unsafe fn test_mm512_maskz_shufflelo_epi16() {
19529        #[rustfmt::skip]
19530        let a = _mm512_set_epi16(
19531            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19532            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19533        );
19534        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19535        assert_eq_m512i(r, _mm512_setzero_si512());
19536        let r =
19537            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19538        #[rustfmt::skip]
19539        let e = _mm512_set_epi16(
19540            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
19541            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
19542        );
19543        assert_eq_m512i(r, e);
19544    }
19545
19546    #[simd_test(enable = "avx512bw,avx512vl")]
19547    unsafe fn test_mm256_mask_shufflelo_epi16() {
19548        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19549        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19550        assert_eq_m256i(r, a);
19551        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19552        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19553        assert_eq_m256i(r, e);
19554    }
19555
19556    #[simd_test(enable = "avx512bw,avx512vl")]
19557    unsafe fn test_mm256_maskz_shufflelo_epi16() {
19558        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19559        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19560        assert_eq_m256i(r, _mm256_setzero_si256());
19561        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19562        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
19563        assert_eq_m256i(r, e);
19564    }
19565
19566    #[simd_test(enable = "avx512bw,avx512vl")]
19567    unsafe fn test_mm_mask_shufflelo_epi16() {
19568        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19569        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
19570        assert_eq_m128i(r, a);
19571        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19572        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19573        assert_eq_m128i(r, e);
19574    }
19575
19576    #[simd_test(enable = "avx512bw,avx512vl")]
19577    unsafe fn test_mm_maskz_shufflelo_epi16() {
19578        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19579        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
19580        assert_eq_m128i(r, _mm_setzero_si128());
19581        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
19582        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
19583        assert_eq_m128i(r, e);
19584    }
19585
19586    #[simd_test(enable = "avx512bw")]
19587    unsafe fn test_mm512_shufflehi_epi16() {
19588        #[rustfmt::skip]
19589        let a = _mm512_set_epi16(
19590            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19591            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19592        );
19593        #[rustfmt::skip]
19594        let e = _mm512_set_epi16(
19595            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19596            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19597        );
19598        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
19599        assert_eq_m512i(r, e);
19600    }
19601
19602    #[simd_test(enable = "avx512bw")]
19603    unsafe fn test_mm512_mask_shufflehi_epi16() {
19604        #[rustfmt::skip]
19605        let a = _mm512_set_epi16(
19606            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19607            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19608        );
19609        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19610        assert_eq_m512i(r, a);
19611        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
19612            a,
19613            0b11111111_11111111_11111111_11111111,
19614            a,
19615        );
19616        #[rustfmt::skip]
19617        let e = _mm512_set_epi16(
19618            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19619            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19620        );
19621        assert_eq_m512i(r, e);
19622    }
19623
19624    #[simd_test(enable = "avx512bw")]
19625    unsafe fn test_mm512_maskz_shufflehi_epi16() {
19626        #[rustfmt::skip]
19627        let a = _mm512_set_epi16(
19628            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19629            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19630        );
19631        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19632        assert_eq_m512i(r, _mm512_setzero_si512());
19633        let r =
19634            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19635        #[rustfmt::skip]
19636        let e = _mm512_set_epi16(
19637            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19638            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19639        );
19640        assert_eq_m512i(r, e);
19641    }
19642
19643    #[simd_test(enable = "avx512bw,avx512vl")]
19644    unsafe fn test_mm256_mask_shufflehi_epi16() {
19645        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19646        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19647        assert_eq_m256i(r, a);
19648        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19649        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19650        assert_eq_m256i(r, e);
19651    }
19652
19653    #[simd_test(enable = "avx512bw,avx512vl")]
19654    unsafe fn test_mm256_maskz_shufflehi_epi16() {
19655        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19656        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19657        assert_eq_m256i(r, _mm256_setzero_si256());
19658        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19659        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19660        assert_eq_m256i(r, e);
19661    }
19662
19663    #[simd_test(enable = "avx512bw,avx512vl")]
19664    unsafe fn test_mm_mask_shufflehi_epi16() {
19665        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19666        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19667        assert_eq_m128i(r, a);
19668        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19669        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19670        assert_eq_m128i(r, e);
19671    }
19672
19673    #[simd_test(enable = "avx512bw,avx512vl")]
19674    unsafe fn test_mm_maskz_shufflehi_epi16() {
19675        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19676        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19677        assert_eq_m128i(r, _mm_setzero_si128());
19678        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
19679        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19680        assert_eq_m128i(r, e);
19681    }
19682
19683    #[simd_test(enable = "avx512bw")]
19684    unsafe fn test_mm512_shuffle_epi8() {
19685        #[rustfmt::skip]
19686        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19687                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19688                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19689                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19690        let b = _mm512_set1_epi8(1);
19691        let r = _mm512_shuffle_epi8(a, b);
19692        #[rustfmt::skip]
19693        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19694                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19695                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19696                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19697        assert_eq_m512i(r, e);
19698    }
19699
19700    #[simd_test(enable = "avx512bw")]
19701    unsafe fn test_mm512_mask_shuffle_epi8() {
19702        #[rustfmt::skip]
19703        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19704                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19705                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19706                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19707        let b = _mm512_set1_epi8(1);
19708        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
19709        assert_eq_m512i(r, a);
19710        let r = _mm512_mask_shuffle_epi8(
19711            a,
19712            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19713            a,
19714            b,
19715        );
19716        #[rustfmt::skip]
19717        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19718                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19719                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19720                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19721        assert_eq_m512i(r, e);
19722    }
19723
19724    #[simd_test(enable = "avx512bw")]
19725    unsafe fn test_mm512_maskz_shuffle_epi8() {
19726        #[rustfmt::skip]
19727        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19728                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19729                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19730                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19731        let b = _mm512_set1_epi8(1);
19732        let r = _mm512_maskz_shuffle_epi8(0, a, b);
19733        assert_eq_m512i(r, _mm512_setzero_si512());
19734        let r = _mm512_maskz_shuffle_epi8(
19735            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19736            a,
19737            b,
19738        );
19739        #[rustfmt::skip]
19740        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19741                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19742                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19743                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19744        assert_eq_m512i(r, e);
19745    }
19746
19747    #[simd_test(enable = "avx512bw,avx512vl")]
19748    unsafe fn test_mm256_mask_shuffle_epi8() {
19749        #[rustfmt::skip]
19750        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19751                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19752        let b = _mm256_set1_epi8(1);
19753        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
19754        assert_eq_m256i(r, a);
19755        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19756        #[rustfmt::skip]
19757        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19758                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19759        assert_eq_m256i(r, e);
19760    }
19761
19762    #[simd_test(enable = "avx512bw,avx512vl")]
19763    unsafe fn test_mm256_maskz_shuffle_epi8() {
19764        #[rustfmt::skip]
19765        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19766                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19767        let b = _mm256_set1_epi8(1);
19768        let r = _mm256_maskz_shuffle_epi8(0, a, b);
19769        assert_eq_m256i(r, _mm256_setzero_si256());
19770        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
19771        #[rustfmt::skip]
19772        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19773                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19774        assert_eq_m256i(r, e);
19775    }
19776
19777    #[simd_test(enable = "avx512bw,avx512vl")]
19778    unsafe fn test_mm_mask_shuffle_epi8() {
19779        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19780        let b = _mm_set1_epi8(1);
19781        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
19782        assert_eq_m128i(r, a);
19783        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
19784        let e = _mm_set_epi8(
19785            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19786        );
19787        assert_eq_m128i(r, e);
19788    }
19789
19790    #[simd_test(enable = "avx512bw,avx512vl")]
19791    unsafe fn test_mm_maskz_shuffle_epi8() {
19792        #[rustfmt::skip]
19793        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
19794        let b = _mm_set1_epi8(1);
19795        let r = _mm_maskz_shuffle_epi8(0, a, b);
19796        assert_eq_m128i(r, _mm_setzero_si128());
19797        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
19798        let e = _mm_set_epi8(
19799            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19800        );
19801        assert_eq_m128i(r, e);
19802    }
19803
19804    #[simd_test(enable = "avx512bw")]
19805    unsafe fn test_mm512_test_epi16_mask() {
19806        let a = _mm512_set1_epi16(1 << 0);
19807        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19808        let r = _mm512_test_epi16_mask(a, b);
19809        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19810        assert_eq!(r, e);
19811    }
19812
19813    #[simd_test(enable = "avx512bw")]
19814    unsafe fn test_mm512_mask_test_epi16_mask() {
19815        let a = _mm512_set1_epi16(1 << 0);
19816        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19817        let r = _mm512_mask_test_epi16_mask(0, a, b);
19818        assert_eq!(r, 0);
19819        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19820        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19821        assert_eq!(r, e);
19822    }
19823
19824    #[simd_test(enable = "avx512bw,avx512vl")]
19825    unsafe fn test_mm256_test_epi16_mask() {
19826        let a = _mm256_set1_epi16(1 << 0);
19827        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19828        let r = _mm256_test_epi16_mask(a, b);
19829        let e: __mmask16 = 0b11111111_11111111;
19830        assert_eq!(r, e);
19831    }
19832
19833    #[simd_test(enable = "avx512bw,avx512vl")]
19834    unsafe fn test_mm256_mask_test_epi16_mask() {
19835        let a = _mm256_set1_epi16(1 << 0);
19836        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19837        let r = _mm256_mask_test_epi16_mask(0, a, b);
19838        assert_eq!(r, 0);
19839        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
19840        let e: __mmask16 = 0b11111111_11111111;
19841        assert_eq!(r, e);
19842    }
19843
19844    #[simd_test(enable = "avx512bw,avx512vl")]
19845    unsafe fn test_mm_test_epi16_mask() {
19846        let a = _mm_set1_epi16(1 << 0);
19847        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19848        let r = _mm_test_epi16_mask(a, b);
19849        let e: __mmask8 = 0b11111111;
19850        assert_eq!(r, e);
19851    }
19852
19853    #[simd_test(enable = "avx512bw,avx512vl")]
19854    unsafe fn test_mm_mask_test_epi16_mask() {
19855        let a = _mm_set1_epi16(1 << 0);
19856        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19857        let r = _mm_mask_test_epi16_mask(0, a, b);
19858        assert_eq!(r, 0);
19859        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
19860        let e: __mmask8 = 0b11111111;
19861        assert_eq!(r, e);
19862    }
19863
19864    #[simd_test(enable = "avx512bw")]
19865    unsafe fn test_mm512_test_epi8_mask() {
19866        let a = _mm512_set1_epi8(1 << 0);
19867        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19868        let r = _mm512_test_epi8_mask(a, b);
19869        let e: __mmask64 =
19870            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19871        assert_eq!(r, e);
19872    }
19873
19874    #[simd_test(enable = "avx512bw")]
19875    unsafe fn test_mm512_mask_test_epi8_mask() {
19876        let a = _mm512_set1_epi8(1 << 0);
19877        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19878        let r = _mm512_mask_test_epi8_mask(0, a, b);
19879        assert_eq!(r, 0);
19880        let r = _mm512_mask_test_epi8_mask(
19881            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19882            a,
19883            b,
19884        );
19885        let e: __mmask64 =
19886            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19887        assert_eq!(r, e);
19888    }
19889
19890    #[simd_test(enable = "avx512bw,avx512vl")]
19891    unsafe fn test_mm256_test_epi8_mask() {
19892        let a = _mm256_set1_epi8(1 << 0);
19893        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19894        let r = _mm256_test_epi8_mask(a, b);
19895        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19896        assert_eq!(r, e);
19897    }
19898
19899    #[simd_test(enable = "avx512bw,avx512vl")]
19900    unsafe fn test_mm256_mask_test_epi8_mask() {
19901        let a = _mm256_set1_epi8(1 << 0);
19902        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19903        let r = _mm256_mask_test_epi8_mask(0, a, b);
19904        assert_eq!(r, 0);
19905        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19906        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19907        assert_eq!(r, e);
19908    }
19909
19910    #[simd_test(enable = "avx512bw,avx512vl")]
19911    unsafe fn test_mm_test_epi8_mask() {
19912        let a = _mm_set1_epi8(1 << 0);
19913        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19914        let r = _mm_test_epi8_mask(a, b);
19915        let e: __mmask16 = 0b11111111_11111111;
19916        assert_eq!(r, e);
19917    }
19918
19919    #[simd_test(enable = "avx512bw,avx512vl")]
19920    unsafe fn test_mm_mask_test_epi8_mask() {
19921        let a = _mm_set1_epi8(1 << 0);
19922        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19923        let r = _mm_mask_test_epi8_mask(0, a, b);
19924        assert_eq!(r, 0);
19925        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
19926        let e: __mmask16 = 0b11111111_11111111;
19927        assert_eq!(r, e);
19928    }
19929
19930    #[simd_test(enable = "avx512bw")]
19931    unsafe fn test_mm512_testn_epi16_mask() {
19932        let a = _mm512_set1_epi16(1 << 0);
19933        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19934        let r = _mm512_testn_epi16_mask(a, b);
19935        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19936        assert_eq!(r, e);
19937    }
19938
19939    #[simd_test(enable = "avx512bw")]
19940    unsafe fn test_mm512_mask_testn_epi16_mask() {
19941        let a = _mm512_set1_epi16(1 << 0);
19942        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19943        let r = _mm512_mask_testn_epi16_mask(0, a, b);
19944        assert_eq!(r, 0);
19945        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19946        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19947        assert_eq!(r, e);
19948    }
19949
19950    #[simd_test(enable = "avx512bw,avx512vl")]
19951    unsafe fn test_mm256_testn_epi16_mask() {
19952        let a = _mm256_set1_epi16(1 << 0);
19953        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19954        let r = _mm256_testn_epi16_mask(a, b);
19955        let e: __mmask16 = 0b00000000_00000000;
19956        assert_eq!(r, e);
19957    }
19958
19959    #[simd_test(enable = "avx512bw,avx512vl")]
19960    unsafe fn test_mm256_mask_testn_epi16_mask() {
19961        let a = _mm256_set1_epi16(1 << 0);
19962        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19963        let r = _mm256_mask_testn_epi16_mask(0, a, b);
19964        assert_eq!(r, 0);
19965        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
19966        let e: __mmask16 = 0b00000000_00000000;
19967        assert_eq!(r, e);
19968    }
19969
19970    #[simd_test(enable = "avx512bw,avx512vl")]
19971    unsafe fn test_mm_testn_epi16_mask() {
19972        let a = _mm_set1_epi16(1 << 0);
19973        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19974        let r = _mm_testn_epi16_mask(a, b);
19975        let e: __mmask8 = 0b00000000;
19976        assert_eq!(r, e);
19977    }
19978
19979    #[simd_test(enable = "avx512bw,avx512vl")]
19980    unsafe fn test_mm_mask_testn_epi16_mask() {
19981        let a = _mm_set1_epi16(1 << 0);
19982        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19983        let r = _mm_mask_testn_epi16_mask(0, a, b);
19984        assert_eq!(r, 0);
19985        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
19986        let e: __mmask8 = 0b00000000;
19987        assert_eq!(r, e);
19988    }
19989
19990    #[simd_test(enable = "avx512bw")]
19991    unsafe fn test_mm512_testn_epi8_mask() {
19992        let a = _mm512_set1_epi8(1 << 0);
19993        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19994        let r = _mm512_testn_epi8_mask(a, b);
19995        let e: __mmask64 =
19996            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19997        assert_eq!(r, e);
19998    }
19999
20000    #[simd_test(enable = "avx512bw")]
20001    unsafe fn test_mm512_mask_testn_epi8_mask() {
20002        let a = _mm512_set1_epi8(1 << 0);
20003        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20004        let r = _mm512_mask_testn_epi8_mask(0, a, b);
20005        assert_eq!(r, 0);
20006        let r = _mm512_mask_testn_epi8_mask(
20007            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20008            a,
20009            b,
20010        );
20011        let e: __mmask64 =
20012            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20013        assert_eq!(r, e);
20014    }
20015
20016    #[simd_test(enable = "avx512bw,avx512vl")]
20017    unsafe fn test_mm256_testn_epi8_mask() {
20018        let a = _mm256_set1_epi8(1 << 0);
20019        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20020        let r = _mm256_testn_epi8_mask(a, b);
20021        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20022        assert_eq!(r, e);
20023    }
20024
20025    #[simd_test(enable = "avx512bw,avx512vl")]
20026    unsafe fn test_mm256_mask_testn_epi8_mask() {
20027        let a = _mm256_set1_epi8(1 << 0);
20028        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20029        let r = _mm256_mask_testn_epi8_mask(0, a, b);
20030        assert_eq!(r, 0);
20031        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20032        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20033        assert_eq!(r, e);
20034    }
20035
20036    #[simd_test(enable = "avx512bw,avx512vl")]
20037    unsafe fn test_mm_testn_epi8_mask() {
20038        let a = _mm_set1_epi8(1 << 0);
20039        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20040        let r = _mm_testn_epi8_mask(a, b);
20041        let e: __mmask16 = 0b00000000_00000000;
20042        assert_eq!(r, e);
20043    }
20044
20045    #[simd_test(enable = "avx512bw,avx512vl")]
20046    unsafe fn test_mm_mask_testn_epi8_mask() {
20047        let a = _mm_set1_epi8(1 << 0);
20048        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20049        let r = _mm_mask_testn_epi8_mask(0, a, b);
20050        assert_eq!(r, 0);
20051        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
20052        let e: __mmask16 = 0b00000000_00000000;
20053        assert_eq!(r, e);
20054    }
20055
20056    #[simd_test(enable = "avx512bw")]
20057    unsafe fn test_store_mask64() {
20058        let a: __mmask64 =
20059            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20060        let mut r = 0;
20061        _store_mask64(&mut r, a);
20062        assert_eq!(r, a);
20063    }
20064
20065    #[simd_test(enable = "avx512bw")]
20066    unsafe fn test_store_mask32() {
20067        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
20068        let mut r = 0;
20069        _store_mask32(&mut r, a);
20070        assert_eq!(r, a);
20071    }
20072
20073    #[simd_test(enable = "avx512bw")]
20074    unsafe fn test_load_mask64() {
20075        let p: __mmask64 =
20076            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20077        let r = _load_mask64(&p);
20078        let e: __mmask64 =
20079            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20080        assert_eq!(r, e);
20081    }
20082
20083    #[simd_test(enable = "avx512bw")]
20084    unsafe fn test_load_mask32() {
20085        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
20086        let r = _load_mask32(&p);
20087        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
20088        assert_eq!(r, e);
20089    }
20090
20091    #[simd_test(enable = "avx512bw")]
20092    unsafe fn test_mm512_sad_epu8() {
20093        let a = _mm512_set1_epi8(2);
20094        let b = _mm512_set1_epi8(4);
20095        let r = _mm512_sad_epu8(a, b);
20096        let e = _mm512_set1_epi64(16);
20097        assert_eq_m512i(r, e);
20098    }
20099
20100    #[simd_test(enable = "avx512bw")]
20101    unsafe fn test_mm512_dbsad_epu8() {
20102        let a = _mm512_set1_epi8(2);
20103        let b = _mm512_set1_epi8(4);
20104        let r = _mm512_dbsad_epu8::<0>(a, b);
20105        let e = _mm512_set1_epi16(8);
20106        assert_eq_m512i(r, e);
20107    }
20108
20109    #[simd_test(enable = "avx512bw")]
20110    unsafe fn test_mm512_mask_dbsad_epu8() {
20111        let src = _mm512_set1_epi16(1);
20112        let a = _mm512_set1_epi8(2);
20113        let b = _mm512_set1_epi8(4);
20114        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
20115        assert_eq_m512i(r, src);
20116        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
20117        let e = _mm512_set1_epi16(8);
20118        assert_eq_m512i(r, e);
20119    }
20120
20121    #[simd_test(enable = "avx512bw")]
20122    unsafe fn test_mm512_maskz_dbsad_epu8() {
20123        let a = _mm512_set1_epi8(2);
20124        let b = _mm512_set1_epi8(4);
20125        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
20126        assert_eq_m512i(r, _mm512_setzero_si512());
20127        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
20128        let e = _mm512_set1_epi16(8);
20129        assert_eq_m512i(r, e);
20130    }
20131
20132    #[simd_test(enable = "avx512bw,avx512vl")]
20133    unsafe fn test_mm256_dbsad_epu8() {
20134        let a = _mm256_set1_epi8(2);
20135        let b = _mm256_set1_epi8(4);
20136        let r = _mm256_dbsad_epu8::<0>(a, b);
20137        let e = _mm256_set1_epi16(8);
20138        assert_eq_m256i(r, e);
20139    }
20140
20141    #[simd_test(enable = "avx512bw,avx512vl")]
20142    unsafe fn test_mm256_mask_dbsad_epu8() {
20143        let src = _mm256_set1_epi16(1);
20144        let a = _mm256_set1_epi8(2);
20145        let b = _mm256_set1_epi8(4);
20146        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
20147        assert_eq_m256i(r, src);
20148        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
20149        let e = _mm256_set1_epi16(8);
20150        assert_eq_m256i(r, e);
20151    }
20152
20153    #[simd_test(enable = "avx512bw,avx512vl")]
20154    unsafe fn test_mm256_maskz_dbsad_epu8() {
20155        let a = _mm256_set1_epi8(2);
20156        let b = _mm256_set1_epi8(4);
20157        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
20158        assert_eq_m256i(r, _mm256_setzero_si256());
20159        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
20160        let e = _mm256_set1_epi16(8);
20161        assert_eq_m256i(r, e);
20162    }
20163
20164    #[simd_test(enable = "avx512bw,avx512vl")]
20165    unsafe fn test_mm_dbsad_epu8() {
20166        let a = _mm_set1_epi8(2);
20167        let b = _mm_set1_epi8(4);
20168        let r = _mm_dbsad_epu8::<0>(a, b);
20169        let e = _mm_set1_epi16(8);
20170        assert_eq_m128i(r, e);
20171    }
20172
20173    #[simd_test(enable = "avx512bw,avx512vl")]
20174    unsafe fn test_mm_mask_dbsad_epu8() {
20175        let src = _mm_set1_epi16(1);
20176        let a = _mm_set1_epi8(2);
20177        let b = _mm_set1_epi8(4);
20178        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
20179        assert_eq_m128i(r, src);
20180        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
20181        let e = _mm_set1_epi16(8);
20182        assert_eq_m128i(r, e);
20183    }
20184
20185    #[simd_test(enable = "avx512bw,avx512vl")]
20186    unsafe fn test_mm_maskz_dbsad_epu8() {
20187        let a = _mm_set1_epi8(2);
20188        let b = _mm_set1_epi8(4);
20189        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
20190        assert_eq_m128i(r, _mm_setzero_si128());
20191        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
20192        let e = _mm_set1_epi16(8);
20193        assert_eq_m128i(r, e);
20194    }
20195
20196    #[simd_test(enable = "avx512bw")]
20197    unsafe fn test_mm512_movepi16_mask() {
20198        let a = _mm512_set1_epi16(1 << 15);
20199        let r = _mm512_movepi16_mask(a);
20200        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20201        assert_eq!(r, e);
20202    }
20203
20204    #[simd_test(enable = "avx512bw,avx512vl")]
20205    unsafe fn test_mm256_movepi16_mask() {
20206        let a = _mm256_set1_epi16(1 << 15);
20207        let r = _mm256_movepi16_mask(a);
20208        let e: __mmask16 = 0b11111111_11111111;
20209        assert_eq!(r, e);
20210    }
20211
20212    #[simd_test(enable = "avx512bw,avx512vl")]
20213    unsafe fn test_mm_movepi16_mask() {
20214        let a = _mm_set1_epi16(1 << 15);
20215        let r = _mm_movepi16_mask(a);
20216        let e: __mmask8 = 0b11111111;
20217        assert_eq!(r, e);
20218    }
20219
20220    #[simd_test(enable = "avx512bw")]
20221    unsafe fn test_mm512_movepi8_mask() {
20222        let a = _mm512_set1_epi8(1 << 7);
20223        let r = _mm512_movepi8_mask(a);
20224        let e: __mmask64 =
20225            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20226        assert_eq!(r, e);
20227    }
20228
20229    #[simd_test(enable = "avx512bw,avx512vl")]
20230    unsafe fn test_mm256_movepi8_mask() {
20231        let a = _mm256_set1_epi8(1 << 7);
20232        let r = _mm256_movepi8_mask(a);
20233        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20234        assert_eq!(r, e);
20235    }
20236
20237    #[simd_test(enable = "avx512bw,avx512vl")]
20238    unsafe fn test_mm_movepi8_mask() {
20239        let a = _mm_set1_epi8(1 << 7);
20240        let r = _mm_movepi8_mask(a);
20241        let e: __mmask16 = 0b11111111_11111111;
20242        assert_eq!(r, e);
20243    }
20244
20245    #[simd_test(enable = "avx512bw")]
20246    unsafe fn test_mm512_movm_epi16() {
20247        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20248        let r = _mm512_movm_epi16(a);
20249        let e = _mm512_set1_epi16(
20250            1 << 15
20251                | 1 << 14
20252                | 1 << 13
20253                | 1 << 12
20254                | 1 << 11
20255                | 1 << 10
20256                | 1 << 9
20257                | 1 << 8
20258                | 1 << 7
20259                | 1 << 6
20260                | 1 << 5
20261                | 1 << 4
20262                | 1 << 3
20263                | 1 << 2
20264                | 1 << 1
20265                | 1 << 0,
20266        );
20267        assert_eq_m512i(r, e);
20268    }
20269
20270    #[simd_test(enable = "avx512bw,avx512vl")]
20271    unsafe fn test_mm256_movm_epi16() {
20272        let a: __mmask16 = 0b11111111_11111111;
20273        let r = _mm256_movm_epi16(a);
20274        let e = _mm256_set1_epi16(
20275            1 << 15
20276                | 1 << 14
20277                | 1 << 13
20278                | 1 << 12
20279                | 1 << 11
20280                | 1 << 10
20281                | 1 << 9
20282                | 1 << 8
20283                | 1 << 7
20284                | 1 << 6
20285                | 1 << 5
20286                | 1 << 4
20287                | 1 << 3
20288                | 1 << 2
20289                | 1 << 1
20290                | 1 << 0,
20291        );
20292        assert_eq_m256i(r, e);
20293    }
20294
20295    #[simd_test(enable = "avx512bw,avx512vl")]
20296    unsafe fn test_mm_movm_epi16() {
20297        let a: __mmask8 = 0b11111111;
20298        let r = _mm_movm_epi16(a);
20299        let e = _mm_set1_epi16(
20300            1 << 15
20301                | 1 << 14
20302                | 1 << 13
20303                | 1 << 12
20304                | 1 << 11
20305                | 1 << 10
20306                | 1 << 9
20307                | 1 << 8
20308                | 1 << 7
20309                | 1 << 6
20310                | 1 << 5
20311                | 1 << 4
20312                | 1 << 3
20313                | 1 << 2
20314                | 1 << 1
20315                | 1 << 0,
20316        );
20317        assert_eq_m128i(r, e);
20318    }
20319
20320    #[simd_test(enable = "avx512bw")]
20321    unsafe fn test_mm512_movm_epi8() {
20322        let a: __mmask64 =
20323            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20324        let r = _mm512_movm_epi8(a);
20325        let e =
20326            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20327        assert_eq_m512i(r, e);
20328    }
20329
20330    #[simd_test(enable = "avx512bw,avx512vl")]
20331    unsafe fn test_mm256_movm_epi8() {
20332        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20333        let r = _mm256_movm_epi8(a);
20334        let e =
20335            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20336        assert_eq_m256i(r, e);
20337    }
20338
20339    #[simd_test(enable = "avx512bw,avx512vl")]
20340    unsafe fn test_mm_movm_epi8() {
20341        let a: __mmask16 = 0b11111111_11111111;
20342        let r = _mm_movm_epi8(a);
20343        let e =
20344            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20345        assert_eq_m128i(r, e);
20346    }
20347
20348    #[simd_test(enable = "avx512bw")]
20349    unsafe fn test_cvtmask32_u32() {
20350        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
20351        let r = _cvtmask32_u32(a);
20352        let e: u32 = 0b11001100_00110011_01100110_10011001;
20353        assert_eq!(r, e);
20354    }
20355
20356    #[simd_test(enable = "avx512bw")]
20357    unsafe fn test_cvtu32_mask32() {
20358        let a: u32 = 0b11001100_00110011_01100110_10011001;
20359        let r = _cvtu32_mask32(a);
20360        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
20361        assert_eq!(r, e);
20362    }
20363
20364    #[simd_test(enable = "avx512bw")]
20365    unsafe fn test_kadd_mask32() {
20366        let a: __mmask32 = 11;
20367        let b: __mmask32 = 22;
20368        let r = _kadd_mask32(a, b);
20369        let e: __mmask32 = 33;
20370        assert_eq!(r, e);
20371    }
20372
20373    #[simd_test(enable = "avx512bw")]
20374    unsafe fn test_kadd_mask64() {
20375        let a: __mmask64 = 11;
20376        let b: __mmask64 = 22;
20377        let r = _kadd_mask64(a, b);
20378        let e: __mmask64 = 33;
20379        assert_eq!(r, e);
20380    }
20381
20382    #[simd_test(enable = "avx512bw")]
20383    unsafe fn test_kand_mask32() {
20384        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20385        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20386        let r = _kand_mask32(a, b);
20387        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
20388        assert_eq!(r, e);
20389    }
20390
20391    #[simd_test(enable = "avx512bw")]
20392    unsafe fn test_kand_mask64() {
20393        let a: __mmask64 =
20394            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20395        let b: __mmask64 =
20396            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20397        let r = _kand_mask64(a, b);
20398        let e: __mmask64 =
20399            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20400        assert_eq!(r, e);
20401    }
20402
20403    #[simd_test(enable = "avx512bw")]
20404    unsafe fn test_knot_mask32() {
20405        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20406        let r = _knot_mask32(a);
20407        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
20408        assert_eq!(r, e);
20409    }
20410
20411    #[simd_test(enable = "avx512bw")]
20412    unsafe fn test_knot_mask64() {
20413        let a: __mmask64 =
20414            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20415        let r = _knot_mask64(a);
20416        let e: __mmask64 =
20417            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20418        assert_eq!(r, e);
20419    }
20420
20421    #[simd_test(enable = "avx512bw")]
20422    unsafe fn test_kandn_mask32() {
20423        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
20424        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20425        let r = _kandn_mask32(a, b);
20426        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20427        assert_eq!(r, e);
20428    }
20429
20430    #[simd_test(enable = "avx512bw")]
20431    unsafe fn test_kandn_mask64() {
20432        let a: __mmask64 =
20433            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20434        let b: __mmask64 =
20435            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20436        let r = _kandn_mask64(a, b);
20437        let e: __mmask64 =
20438            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20439        assert_eq!(r, e);
20440    }
20441
20442    #[simd_test(enable = "avx512bw")]
20443    unsafe fn test_kor_mask32() {
20444        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20445        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20446        let r = _kor_mask32(a, b);
20447        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20448        assert_eq!(r, e);
20449    }
20450
20451    #[simd_test(enable = "avx512bw")]
20452    unsafe fn test_kor_mask64() {
20453        let a: __mmask64 =
20454            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20455        let b: __mmask64 =
20456            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20457        let r = _kor_mask64(a, b);
20458        let e: __mmask64 =
20459            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20460        assert_eq!(r, e);
20461    }
20462
20463    #[simd_test(enable = "avx512bw")]
20464    unsafe fn test_kxor_mask32() {
20465        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20466        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20467        let r = _kxor_mask32(a, b);
20468        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20469        assert_eq!(r, e);
20470    }
20471
20472    #[simd_test(enable = "avx512bw")]
20473    unsafe fn test_kxor_mask64() {
20474        let a: __mmask64 =
20475            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20476        let b: __mmask64 =
20477            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20478        let r = _kxor_mask64(a, b);
20479        let e: __mmask64 =
20480            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20481        assert_eq!(r, e);
20482    }
20483
20484    #[simd_test(enable = "avx512bw")]
20485    unsafe fn test_kxnor_mask32() {
20486        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
20487        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
20488        let r = _kxnor_mask32(a, b);
20489        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20490        assert_eq!(r, e);
20491    }
20492
20493    #[simd_test(enable = "avx512bw")]
20494    unsafe fn test_kxnor_mask64() {
20495        let a: __mmask64 =
20496            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
20497        let b: __mmask64 =
20498            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
20499        let r = _kxnor_mask64(a, b);
20500        let e: __mmask64 =
20501            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20502        assert_eq!(r, e);
20503    }
20504
20505    #[simd_test(enable = "avx512bw")]
20506    unsafe fn test_kortest_mask32_u8() {
20507        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20508        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20509        let mut all_ones: u8 = 0;
20510        let r = _kortest_mask32_u8(a, b, &mut all_ones);
20511        assert_eq!(r, 0);
20512        assert_eq!(all_ones, 1);
20513    }
20514
20515    #[simd_test(enable = "avx512bw")]
20516    unsafe fn test_kortest_mask64_u8() {
20517        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20518        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20519        let mut all_ones: u8 = 0;
20520        let r = _kortest_mask64_u8(a, b, &mut all_ones);
20521        assert_eq!(r, 0);
20522        assert_eq!(all_ones, 0);
20523    }
20524
20525    #[simd_test(enable = "avx512bw")]
20526    unsafe fn test_kortestc_mask32_u8() {
20527        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20528        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20529        let r = _kortestc_mask32_u8(a, b);
20530        assert_eq!(r, 1);
20531    }
20532
20533    #[simd_test(enable = "avx512bw")]
20534    unsafe fn test_kortestc_mask64_u8() {
20535        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20536        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20537        let r = _kortestc_mask64_u8(a, b);
20538        assert_eq!(r, 0);
20539    }
20540
20541    #[simd_test(enable = "avx512bw")]
20542    unsafe fn test_kortestz_mask32_u8() {
20543        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20544        let b: __mmask32 = 0b1011011010110110_1011011010110110;
20545        let r = _kortestz_mask32_u8(a, b);
20546        assert_eq!(r, 0);
20547    }
20548
20549    #[simd_test(enable = "avx512bw")]
20550    unsafe fn test_kortestz_mask64_u8() {
20551        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20552        let b: __mmask64 = 0b1011011010110110_1011011010110110;
20553        let r = _kortestz_mask64_u8(a, b);
20554        assert_eq!(r, 0);
20555    }
20556
20557    #[simd_test(enable = "avx512bw")]
20558    unsafe fn test_kshiftli_mask32() {
20559        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20560        let r = _kshiftli_mask32::<3>(a);
20561        let e: __mmask32 = 0b0100101101001011_0100101101001000;
20562        assert_eq!(r, e);
20563    }
20564
20565    #[simd_test(enable = "avx512bw")]
20566    unsafe fn test_kshiftli_mask64() {
20567        let a: __mmask64 = 0b0110100101101001_0110100101101001;
20568        let r = _kshiftli_mask64::<3>(a);
20569        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
20570        assert_eq!(r, e);
20571    }
20572
20573    #[simd_test(enable = "avx512bw")]
20574    unsafe fn test_kshiftri_mask32() {
20575        let a: __mmask32 = 0b0110100101101001_0110100101101001;
20576        let r = _kshiftri_mask32::<3>(a);
20577        let e: __mmask32 = 0b0000110100101101_0010110100101101;
20578        assert_eq!(r, e);
20579    }
20580
20581    #[simd_test(enable = "avx512bw")]
20582    unsafe fn test_kshiftri_mask64() {
20583        let a: __mmask64 = 0b0110100101101001011_0100101101001000;
20584        let r = _kshiftri_mask64::<3>(a);
20585        let e: __mmask64 = 0b0110100101101001_0110100101101001;
20586        assert_eq!(r, e);
20587    }
20588
20589    #[simd_test(enable = "avx512bw")]
20590    unsafe fn test_ktest_mask32_u8() {
20591        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20592        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20593        let mut and_not: u8 = 0;
20594        let r = _ktest_mask32_u8(a, b, &mut and_not);
20595        assert_eq!(r, 1);
20596        assert_eq!(and_not, 0);
20597    }
20598
20599    #[simd_test(enable = "avx512bw")]
20600    unsafe fn test_ktestc_mask32_u8() {
20601        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20602        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20603        let r = _ktestc_mask32_u8(a, b);
20604        assert_eq!(r, 0);
20605    }
20606
20607    #[simd_test(enable = "avx512bw")]
20608    unsafe fn test_ktestz_mask32_u8() {
20609        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20610        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20611        let r = _ktestz_mask32_u8(a, b);
20612        assert_eq!(r, 1);
20613    }
20614
20615    #[simd_test(enable = "avx512bw")]
20616    unsafe fn test_ktest_mask64_u8() {
20617        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20618        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20619        let mut and_not: u8 = 0;
20620        let r = _ktest_mask64_u8(a, b, &mut and_not);
20621        assert_eq!(r, 1);
20622        assert_eq!(and_not, 0);
20623    }
20624
20625    #[simd_test(enable = "avx512bw")]
20626    unsafe fn test_ktestc_mask64_u8() {
20627        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20628        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20629        let r = _ktestc_mask64_u8(a, b);
20630        assert_eq!(r, 0);
20631    }
20632
20633    #[simd_test(enable = "avx512bw")]
20634    unsafe fn test_ktestz_mask64_u8() {
20635        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20636        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20637        let r = _ktestz_mask64_u8(a, b);
20638        assert_eq!(r, 1);
20639    }
20640
20641    #[simd_test(enable = "avx512bw")]
20642    unsafe fn test_mm512_kunpackw() {
20643        let a: u32 = 0x00110011;
20644        let b: u32 = 0x00001011;
20645        let r = _mm512_kunpackw(a, b);
20646        let e: u32 = 0x00111011;
20647        assert_eq!(r, e);
20648    }
20649
20650    #[simd_test(enable = "avx512bw")]
20651    unsafe fn test_mm512_kunpackd() {
20652        let a: u64 = 0x11001100_00110011;
20653        let b: u64 = 0x00101110_00001011;
20654        let r = _mm512_kunpackd(a, b);
20655        let e: u64 = 0x00110011_00001011;
20656        assert_eq!(r, e);
20657    }
20658
20659    #[simd_test(enable = "avx512bw")]
20660    unsafe fn test_mm512_cvtepi16_epi8() {
20661        let a = _mm512_set1_epi16(2);
20662        let r = _mm512_cvtepi16_epi8(a);
20663        let e = _mm256_set1_epi8(2);
20664        assert_eq_m256i(r, e);
20665    }
20666
20667    #[simd_test(enable = "avx512bw")]
20668    unsafe fn test_mm512_mask_cvtepi16_epi8() {
20669        let src = _mm256_set1_epi8(1);
20670        let a = _mm512_set1_epi16(2);
20671        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
20672        assert_eq_m256i(r, src);
20673        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20674        let e = _mm256_set1_epi8(2);
20675        assert_eq_m256i(r, e);
20676    }
20677
20678    #[simd_test(enable = "avx512bw")]
20679    unsafe fn test_mm512_maskz_cvtepi16_epi8() {
20680        let a = _mm512_set1_epi16(2);
20681        let r = _mm512_maskz_cvtepi16_epi8(0, a);
20682        assert_eq_m256i(r, _mm256_setzero_si256());
20683        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20684        let e = _mm256_set1_epi8(2);
20685        assert_eq_m256i(r, e);
20686    }
20687
20688    #[simd_test(enable = "avx512bw,avx512vl")]
20689    unsafe fn test_mm256_cvtepi16_epi8() {
20690        let a = _mm256_set1_epi16(2);
20691        let r = _mm256_cvtepi16_epi8(a);
20692        let e = _mm_set1_epi8(2);
20693        assert_eq_m128i(r, e);
20694    }
20695
20696    #[simd_test(enable = "avx512bw,avx512vl")]
20697    unsafe fn test_mm256_mask_cvtepi16_epi8() {
20698        let src = _mm_set1_epi8(1);
20699        let a = _mm256_set1_epi16(2);
20700        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
20701        assert_eq_m128i(r, src);
20702        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
20703        let e = _mm_set1_epi8(2);
20704        assert_eq_m128i(r, e);
20705    }
20706
20707    #[simd_test(enable = "avx512bw,avx512vl")]
20708    unsafe fn test_mm256_maskz_cvtepi16_epi8() {
20709        let a = _mm256_set1_epi16(2);
20710        let r = _mm256_maskz_cvtepi16_epi8(0, a);
20711        assert_eq_m128i(r, _mm_setzero_si128());
20712        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
20713        let e = _mm_set1_epi8(2);
20714        assert_eq_m128i(r, e);
20715    }
20716
20717    #[simd_test(enable = "avx512bw,avx512vl")]
20718    unsafe fn test_mm_cvtepi16_epi8() {
20719        let a = _mm_set1_epi16(2);
20720        let r = _mm_cvtepi16_epi8(a);
20721        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20722        assert_eq_m128i(r, e);
20723    }
20724
20725    #[simd_test(enable = "avx512bw,avx512vl")]
20726    unsafe fn test_mm_mask_cvtepi16_epi8() {
20727        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20728        let a = _mm_set1_epi16(2);
20729        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
20730        assert_eq_m128i(r, src);
20731        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
20732        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20733        assert_eq_m128i(r, e);
20734    }
20735
20736    #[simd_test(enable = "avx512bw,avx512vl")]
20737    unsafe fn test_mm_maskz_cvtepi16_epi8() {
20738        let a = _mm_set1_epi16(2);
20739        let r = _mm_maskz_cvtepi16_epi8(0, a);
20740        assert_eq_m128i(r, _mm_setzero_si128());
20741        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
20742        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20743        assert_eq_m128i(r, e);
20744    }
20745
20746    #[simd_test(enable = "avx512bw")]
20747    unsafe fn test_mm512_cvtsepi16_epi8() {
20748        let a = _mm512_set1_epi16(i16::MAX);
20749        let r = _mm512_cvtsepi16_epi8(a);
20750        let e = _mm256_set1_epi8(i8::MAX);
20751        assert_eq_m256i(r, e);
20752    }
20753
20754    #[simd_test(enable = "avx512bw")]
20755    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
20756        let src = _mm256_set1_epi8(1);
20757        let a = _mm512_set1_epi16(i16::MAX);
20758        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
20759        assert_eq_m256i(r, src);
20760        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20761        let e = _mm256_set1_epi8(i8::MAX);
20762        assert_eq_m256i(r, e);
20763    }
20764
20765    #[simd_test(enable = "avx512bw,avx512vl")]
20766    unsafe fn test_mm256_cvtsepi16_epi8() {
20767        let a = _mm256_set1_epi16(i16::MAX);
20768        let r = _mm256_cvtsepi16_epi8(a);
20769        let e = _mm_set1_epi8(i8::MAX);
20770        assert_eq_m128i(r, e);
20771    }
20772
20773    #[simd_test(enable = "avx512bw,avx512vl")]
20774    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
20775        let src = _mm_set1_epi8(1);
20776        let a = _mm256_set1_epi16(i16::MAX);
20777        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
20778        assert_eq_m128i(r, src);
20779        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
20780        let e = _mm_set1_epi8(i8::MAX);
20781        assert_eq_m128i(r, e);
20782    }
20783
20784    #[simd_test(enable = "avx512bw,avx512vl")]
20785    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
20786        let a = _mm256_set1_epi16(i16::MAX);
20787        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
20788        assert_eq_m128i(r, _mm_setzero_si128());
20789        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
20790        let e = _mm_set1_epi8(i8::MAX);
20791        assert_eq_m128i(r, e);
20792    }
20793
20794    #[simd_test(enable = "avx512bw,avx512vl")]
20795    unsafe fn test_mm_cvtsepi16_epi8() {
20796        let a = _mm_set1_epi16(i16::MAX);
20797        let r = _mm_cvtsepi16_epi8(a);
20798        #[rustfmt::skip]
20799        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20800        assert_eq_m128i(r, e);
20801    }
20802
20803    #[simd_test(enable = "avx512bw,avx512vl")]
20804    unsafe fn test_mm_mask_cvtsepi16_epi8() {
20805        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20806        let a = _mm_set1_epi16(i16::MAX);
20807        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
20808        assert_eq_m128i(r, src);
20809        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
20810        #[rustfmt::skip]
20811        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20812        assert_eq_m128i(r, e);
20813    }
20814
20815    #[simd_test(enable = "avx512bw,avx512vl")]
20816    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
20817        let a = _mm_set1_epi16(i16::MAX);
20818        let r = _mm_maskz_cvtsepi16_epi8(0, a);
20819        assert_eq_m128i(r, _mm_setzero_si128());
20820        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
20821        #[rustfmt::skip]
20822        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20823        assert_eq_m128i(r, e);
20824    }
20825
20826    #[simd_test(enable = "avx512bw")]
20827    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
20828        let a = _mm512_set1_epi16(i16::MAX);
20829        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
20830        assert_eq_m256i(r, _mm256_setzero_si256());
20831        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20832        let e = _mm256_set1_epi8(i8::MAX);
20833        assert_eq_m256i(r, e);
20834    }
20835
20836    #[simd_test(enable = "avx512bw")]
20837    unsafe fn test_mm512_cvtusepi16_epi8() {
20838        let a = _mm512_set1_epi16(i16::MIN);
20839        let r = _mm512_cvtusepi16_epi8(a);
20840        let e = _mm256_set1_epi8(-1);
20841        assert_eq_m256i(r, e);
20842    }
20843
20844    #[simd_test(enable = "avx512bw")]
20845    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
20846        let src = _mm256_set1_epi8(1);
20847        let a = _mm512_set1_epi16(i16::MIN);
20848        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
20849        assert_eq_m256i(r, src);
20850        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20851        let e = _mm256_set1_epi8(-1);
20852        assert_eq_m256i(r, e);
20853    }
20854
20855    #[simd_test(enable = "avx512bw")]
20856    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
20857        let a = _mm512_set1_epi16(i16::MIN);
20858        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
20859        assert_eq_m256i(r, _mm256_setzero_si256());
20860        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20861        let e = _mm256_set1_epi8(-1);
20862        assert_eq_m256i(r, e);
20863    }
20864
20865    #[simd_test(enable = "avx512bw,avx512vl")]
20866    unsafe fn test_mm256_cvtusepi16_epi8() {
20867        let a = _mm256_set1_epi16(i16::MIN);
20868        let r = _mm256_cvtusepi16_epi8(a);
20869        let e = _mm_set1_epi8(-1);
20870        assert_eq_m128i(r, e);
20871    }
20872
20873    #[simd_test(enable = "avx512bw,avx512vl")]
20874    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
20875        let src = _mm_set1_epi8(1);
20876        let a = _mm256_set1_epi16(i16::MIN);
20877        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
20878        assert_eq_m128i(r, src);
20879        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
20880        let e = _mm_set1_epi8(-1);
20881        assert_eq_m128i(r, e);
20882    }
20883
20884    #[simd_test(enable = "avx512bw,avx512vl")]
20885    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
20886        let a = _mm256_set1_epi16(i16::MIN);
20887        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
20888        assert_eq_m128i(r, _mm_setzero_si128());
20889        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
20890        let e = _mm_set1_epi8(-1);
20891        assert_eq_m128i(r, e);
20892    }
20893
20894    #[simd_test(enable = "avx512bw,avx512vl")]
20895    unsafe fn test_mm_cvtusepi16_epi8() {
20896        let a = _mm_set1_epi16(i16::MIN);
20897        let r = _mm_cvtusepi16_epi8(a);
20898        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20899        assert_eq_m128i(r, e);
20900    }
20901
20902    #[simd_test(enable = "avx512bw,avx512vl")]
20903    unsafe fn test_mm_mask_cvtusepi16_epi8() {
20904        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20905        let a = _mm_set1_epi16(i16::MIN);
20906        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
20907        assert_eq_m128i(r, src);
20908        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
20909        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20910        assert_eq_m128i(r, e);
20911    }
20912
20913    #[simd_test(enable = "avx512bw,avx512vl")]
20914    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
20915        let a = _mm_set1_epi16(i16::MIN);
20916        let r = _mm_maskz_cvtusepi16_epi8(0, a);
20917        assert_eq_m128i(r, _mm_setzero_si128());
20918        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
20919        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20920        assert_eq_m128i(r, e);
20921    }
20922
20923    #[simd_test(enable = "avx512bw")]
20924    unsafe fn test_mm512_cvtepi8_epi16() {
20925        let a = _mm256_set1_epi8(2);
20926        let r = _mm512_cvtepi8_epi16(a);
20927        let e = _mm512_set1_epi16(2);
20928        assert_eq_m512i(r, e);
20929    }
20930
20931    #[simd_test(enable = "avx512bw")]
20932    unsafe fn test_mm512_mask_cvtepi8_epi16() {
20933        let src = _mm512_set1_epi16(1);
20934        let a = _mm256_set1_epi8(2);
20935        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
20936        assert_eq_m512i(r, src);
20937        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20938        let e = _mm512_set1_epi16(2);
20939        assert_eq_m512i(r, e);
20940    }
20941
20942    #[simd_test(enable = "avx512bw")]
20943    unsafe fn test_mm512_maskz_cvtepi8_epi16() {
20944        let a = _mm256_set1_epi8(2);
20945        let r = _mm512_maskz_cvtepi8_epi16(0, a);
20946        assert_eq_m512i(r, _mm512_setzero_si512());
20947        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
20948        let e = _mm512_set1_epi16(2);
20949        assert_eq_m512i(r, e);
20950    }
20951
20952    #[simd_test(enable = "avx512bw,avx512vl")]
20953    unsafe fn test_mm256_mask_cvtepi8_epi16() {
20954        let src = _mm256_set1_epi16(1);
20955        let a = _mm_set1_epi8(2);
20956        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
20957        assert_eq_m256i(r, src);
20958        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
20959        let e = _mm256_set1_epi16(2);
20960        assert_eq_m256i(r, e);
20961    }
20962
20963    #[simd_test(enable = "avx512bw,avx512vl")]
20964    unsafe fn test_mm256_maskz_cvtepi8_epi16() {
20965        let a = _mm_set1_epi8(2);
20966        let r = _mm256_maskz_cvtepi8_epi16(0, a);
20967        assert_eq_m256i(r, _mm256_setzero_si256());
20968        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
20969        let e = _mm256_set1_epi16(2);
20970        assert_eq_m256i(r, e);
20971    }
20972
20973    #[simd_test(enable = "avx512bw,avx512vl")]
20974    unsafe fn test_mm_mask_cvtepi8_epi16() {
20975        let src = _mm_set1_epi16(1);
20976        let a = _mm_set1_epi8(2);
20977        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
20978        assert_eq_m128i(r, src);
20979        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
20980        let e = _mm_set1_epi16(2);
20981        assert_eq_m128i(r, e);
20982    }
20983
20984    #[simd_test(enable = "avx512bw,avx512vl")]
20985    unsafe fn test_mm_maskz_cvtepi8_epi16() {
20986        let a = _mm_set1_epi8(2);
20987        let r = _mm_maskz_cvtepi8_epi16(0, a);
20988        assert_eq_m128i(r, _mm_setzero_si128());
20989        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
20990        let e = _mm_set1_epi16(2);
20991        assert_eq_m128i(r, e);
20992    }
20993
20994    #[simd_test(enable = "avx512bw")]
20995    unsafe fn test_mm512_cvtepu8_epi16() {
20996        let a = _mm256_set1_epi8(2);
20997        let r = _mm512_cvtepu8_epi16(a);
20998        let e = _mm512_set1_epi16(2);
20999        assert_eq_m512i(r, e);
21000    }
21001
21002    #[simd_test(enable = "avx512bw")]
21003    unsafe fn test_mm512_mask_cvtepu8_epi16() {
21004        let src = _mm512_set1_epi16(1);
21005        let a = _mm256_set1_epi8(2);
21006        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
21007        assert_eq_m512i(r, src);
21008        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21009        let e = _mm512_set1_epi16(2);
21010        assert_eq_m512i(r, e);
21011    }
21012
21013    #[simd_test(enable = "avx512bw")]
21014    unsafe fn test_mm512_maskz_cvtepu8_epi16() {
21015        let a = _mm256_set1_epi8(2);
21016        let r = _mm512_maskz_cvtepu8_epi16(0, a);
21017        assert_eq_m512i(r, _mm512_setzero_si512());
21018        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
21019        let e = _mm512_set1_epi16(2);
21020        assert_eq_m512i(r, e);
21021    }
21022
21023    #[simd_test(enable = "avx512bw,avx512vl")]
21024    unsafe fn test_mm256_mask_cvtepu8_epi16() {
21025        let src = _mm256_set1_epi16(1);
21026        let a = _mm_set1_epi8(2);
21027        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
21028        assert_eq_m256i(r, src);
21029        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
21030        let e = _mm256_set1_epi16(2);
21031        assert_eq_m256i(r, e);
21032    }
21033
21034    #[simd_test(enable = "avx512bw,avx512vl")]
21035    unsafe fn test_mm256_maskz_cvtepu8_epi16() {
21036        let a = _mm_set1_epi8(2);
21037        let r = _mm256_maskz_cvtepu8_epi16(0, a);
21038        assert_eq_m256i(r, _mm256_setzero_si256());
21039        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
21040        let e = _mm256_set1_epi16(2);
21041        assert_eq_m256i(r, e);
21042    }
21043
21044    #[simd_test(enable = "avx512bw,avx512vl")]
21045    unsafe fn test_mm_mask_cvtepu8_epi16() {
21046        let src = _mm_set1_epi16(1);
21047        let a = _mm_set1_epi8(2);
21048        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
21049        assert_eq_m128i(r, src);
21050        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
21051        let e = _mm_set1_epi16(2);
21052        assert_eq_m128i(r, e);
21053    }
21054
21055    #[simd_test(enable = "avx512bw,avx512vl")]
21056    unsafe fn test_mm_maskz_cvtepu8_epi16() {
21057        let a = _mm_set1_epi8(2);
21058        let r = _mm_maskz_cvtepu8_epi16(0, a);
21059        assert_eq_m128i(r, _mm_setzero_si128());
21060        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
21061        let e = _mm_set1_epi16(2);
21062        assert_eq_m128i(r, e);
21063    }
21064
21065    #[simd_test(enable = "avx512bw")]
21066    unsafe fn test_mm512_bslli_epi128() {
21067        #[rustfmt::skip]
21068        let a = _mm512_set_epi8(
21069            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21070            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21071            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21072            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21073        );
21074        let r = _mm512_bslli_epi128::<9>(a);
21075        #[rustfmt::skip]
21076        let e = _mm512_set_epi8(
21077            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21078            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21079            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21080            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21081        );
21082        assert_eq_m512i(r, e);
21083    }
21084
21085    #[simd_test(enable = "avx512bw")]
21086    unsafe fn test_mm512_bsrli_epi128() {
21087        #[rustfmt::skip]
21088        let a = _mm512_set_epi8(
21089            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21090            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
21091            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
21092            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
21093        );
21094        let r = _mm512_bsrli_epi128::<3>(a);
21095        #[rustfmt::skip]
21096        let e = _mm512_set_epi8(
21097            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
21098            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
21099            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
21100            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
21101        );
21102        assert_eq_m512i(r, e);
21103    }
21104
21105    #[simd_test(enable = "avx512bw")]
21106    unsafe fn test_mm512_alignr_epi8() {
21107        #[rustfmt::skip]
21108        let a = _mm512_set_epi8(
21109            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21110            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21111            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21112            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21113        );
21114        let b = _mm512_set1_epi8(1);
21115        let r = _mm512_alignr_epi8::<14>(a, b);
21116        #[rustfmt::skip]
21117        let e = _mm512_set_epi8(
21118            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21119            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21120            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21121            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21122        );
21123        assert_eq_m512i(r, e);
21124    }
21125
21126    #[simd_test(enable = "avx512bw")]
21127    unsafe fn test_mm512_mask_alignr_epi8() {
21128        #[rustfmt::skip]
21129        let a = _mm512_set_epi8(
21130            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21131            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21132            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21133            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21134        );
21135        let b = _mm512_set1_epi8(1);
21136        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
21137        assert_eq_m512i(r, a);
21138        let r = _mm512_mask_alignr_epi8::<14>(
21139            a,
21140            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21141            a,
21142            b,
21143        );
21144        #[rustfmt::skip]
21145        let e = _mm512_set_epi8(
21146            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21147            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21148            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21149            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21150        );
21151        assert_eq_m512i(r, e);
21152    }
21153
21154    #[simd_test(enable = "avx512bw")]
21155    unsafe fn test_mm512_maskz_alignr_epi8() {
21156        #[rustfmt::skip]
21157        let a = _mm512_set_epi8(
21158            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21159            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21160            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21161            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21162        );
21163        let b = _mm512_set1_epi8(1);
21164        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
21165        assert_eq_m512i(r, _mm512_setzero_si512());
21166        let r = _mm512_maskz_alignr_epi8::<14>(
21167            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21168            a,
21169            b,
21170        );
21171        #[rustfmt::skip]
21172        let e = _mm512_set_epi8(
21173            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21174            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21175            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21176            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21177        );
21178        assert_eq_m512i(r, e);
21179    }
21180
21181    #[simd_test(enable = "avx512bw,avx512vl")]
21182    unsafe fn test_mm256_mask_alignr_epi8() {
21183        #[rustfmt::skip]
21184        let a = _mm256_set_epi8(
21185            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21186            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21187        );
21188        let b = _mm256_set1_epi8(1);
21189        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21190        assert_eq_m256i(r, a);
21191        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21192        #[rustfmt::skip]
21193        let e = _mm256_set_epi8(
21194            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21195            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21196        );
21197        assert_eq_m256i(r, e);
21198    }
21199
21200    #[simd_test(enable = "avx512bw,avx512vl")]
21201    unsafe fn test_mm256_maskz_alignr_epi8() {
21202        #[rustfmt::skip]
21203        let a = _mm256_set_epi8(
21204            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21205            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21206        );
21207        let b = _mm256_set1_epi8(1);
21208        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21209        assert_eq_m256i(r, _mm256_setzero_si256());
21210        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21211        #[rustfmt::skip]
21212        let e = _mm256_set_epi8(
21213            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21214            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21215        );
21216        assert_eq_m256i(r, e);
21217    }
21218
21219    #[simd_test(enable = "avx512bw,avx512vl")]
21220    unsafe fn test_mm_mask_alignr_epi8() {
21221        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21222        let b = _mm_set1_epi8(1);
21223        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21224        assert_eq_m128i(r, a);
21225        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21226        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21227        assert_eq_m128i(r, e);
21228    }
21229
21230    #[simd_test(enable = "avx512bw,avx512vl")]
21231    unsafe fn test_mm_maskz_alignr_epi8() {
21232        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21233        let b = _mm_set1_epi8(1);
21234        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21235        assert_eq_m128i(r, _mm_setzero_si128());
21236        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21237        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21238        assert_eq_m128i(r, e);
21239    }
21240
21241    #[simd_test(enable = "avx512bw")]
21242    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21243        let a = _mm512_set1_epi16(i16::MAX);
21244        let mut r = _mm256_undefined_si256();
21245        _mm512_mask_cvtsepi16_storeu_epi8(
21246            &mut r as *mut _ as *mut i8,
21247            0b11111111_11111111_11111111_11111111,
21248            a,
21249        );
21250        let e = _mm256_set1_epi8(i8::MAX);
21251        assert_eq_m256i(r, e);
21252    }
21253
21254    #[simd_test(enable = "avx512bw,avx512vl")]
21255    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21256        let a = _mm256_set1_epi16(i16::MAX);
21257        let mut r = _mm_undefined_si128();
21258        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21259        let e = _mm_set1_epi8(i8::MAX);
21260        assert_eq_m128i(r, e);
21261    }
21262
21263    #[simd_test(enable = "avx512bw,avx512vl")]
21264    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
21265        let a = _mm_set1_epi16(i16::MAX);
21266        let mut r = _mm_set1_epi8(0);
21267        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21268        #[rustfmt::skip]
21269        let e = _mm_set_epi8(
21270            0, 0, 0, 0, 0, 0, 0, 0,
21271            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
21272        );
21273        assert_eq_m128i(r, e);
21274    }
21275
21276    #[simd_test(enable = "avx512bw")]
21277    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
21278        let a = _mm512_set1_epi16(8);
21279        let mut r = _mm256_undefined_si256();
21280        _mm512_mask_cvtepi16_storeu_epi8(
21281            &mut r as *mut _ as *mut i8,
21282            0b11111111_11111111_11111111_11111111,
21283            a,
21284        );
21285        let e = _mm256_set1_epi8(8);
21286        assert_eq_m256i(r, e);
21287    }
21288
21289    #[simd_test(enable = "avx512bw,avx512vl")]
21290    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
21291        let a = _mm256_set1_epi16(8);
21292        let mut r = _mm_undefined_si128();
21293        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21294        let e = _mm_set1_epi8(8);
21295        assert_eq_m128i(r, e);
21296    }
21297
21298    #[simd_test(enable = "avx512bw,avx512vl")]
21299    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
21300        let a = _mm_set1_epi16(8);
21301        let mut r = _mm_set1_epi8(0);
21302        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21303        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
21304        assert_eq_m128i(r, e);
21305    }
21306
21307    #[simd_test(enable = "avx512bw")]
21308    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
21309        let a = _mm512_set1_epi16(i16::MAX);
21310        let mut r = _mm256_undefined_si256();
21311        _mm512_mask_cvtusepi16_storeu_epi8(
21312            &mut r as *mut _ as *mut i8,
21313            0b11111111_11111111_11111111_11111111,
21314            a,
21315        );
21316        let e = _mm256_set1_epi8(u8::MAX as i8);
21317        assert_eq_m256i(r, e);
21318    }
21319
21320    #[simd_test(enable = "avx512bw,avx512vl")]
21321    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
21322        let a = _mm256_set1_epi16(i16::MAX);
21323        let mut r = _mm_undefined_si128();
21324        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21325        let e = _mm_set1_epi8(u8::MAX as i8);
21326        assert_eq_m128i(r, e);
21327    }
21328
21329    #[simd_test(enable = "avx512bw,avx512vl")]
21330    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
21331        let a = _mm_set1_epi16(i16::MAX);
21332        let mut r = _mm_set1_epi8(0);
21333        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21334        #[rustfmt::skip]
21335        let e = _mm_set_epi8(
21336            0, 0, 0, 0,
21337            0, 0, 0, 0,
21338            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, 
21339            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
21340        );
21341        assert_eq_m128i(r, e);
21342    }
21343}