core/stdarch/crates/core_arch/src/x86/
avx512vbmi2.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4};
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10///
11/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
12#[inline]
13#[target_feature(enable = "avx512vbmi2")]
14#[cfg_attr(test, assert_instr(vpexpandw))]
15#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16pub unsafe fn _mm512_mask_expandloadu_epi16(
17    src: __m512i,
18    k: __mmask32,
19    mem_addr: *const i16,
20) -> __m512i {
21    transmute(expandloadw_512(mem_addr, src.as_i16x32(), k))
22}
23
24/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25///
26/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
27#[inline]
28#[target_feature(enable = "avx512vbmi2")]
29#[cfg_attr(test, assert_instr(vpexpandw))]
30#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
32    _mm512_mask_expandloadu_epi16(_mm512_setzero_si512(), k, mem_addr)
33}
34
35/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
38#[inline]
39#[target_feature(enable = "avx512vbmi2,avx512vl")]
40#[cfg_attr(test, assert_instr(vpexpandw))]
41#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42pub unsafe fn _mm256_mask_expandloadu_epi16(
43    src: __m256i,
44    k: __mmask16,
45    mem_addr: *const i16,
46) -> __m256i {
47    transmute(expandloadw_256(mem_addr, src.as_i16x16(), k))
48}
49
50/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
53#[inline]
54#[target_feature(enable = "avx512vbmi2,avx512vl")]
55#[cfg_attr(test, assert_instr(vpexpandw))]
56#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
57pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
58    _mm256_mask_expandloadu_epi16(_mm256_setzero_si256(), k, mem_addr)
59}
60
61/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
64#[inline]
65#[target_feature(enable = "avx512vbmi2,avx512vl")]
66#[cfg_attr(test, assert_instr(vpexpandw))]
67#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
68pub unsafe fn _mm_mask_expandloadu_epi16(
69    src: __m128i,
70    k: __mmask8,
71    mem_addr: *const i16,
72) -> __m128i {
73    transmute(expandloadw_128(mem_addr, src.as_i16x8(), k))
74}
75
76/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
77///
78/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
79#[inline]
80#[target_feature(enable = "avx512vbmi2,avx512vl")]
81#[cfg_attr(test, assert_instr(vpexpandw))]
82#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
83pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
84    _mm_mask_expandloadu_epi16(_mm_setzero_si128(), k, mem_addr)
85}
86
87/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
90#[inline]
91#[target_feature(enable = "avx512vbmi2")]
92#[cfg_attr(test, assert_instr(vpexpandb))]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94pub unsafe fn _mm512_mask_expandloadu_epi8(
95    src: __m512i,
96    k: __mmask64,
97    mem_addr: *const i8,
98) -> __m512i {
99    transmute(expandloadb_512(mem_addr, src.as_i8x64(), k))
100}
101
102/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
105#[inline]
106#[target_feature(enable = "avx512vbmi2")]
107#[cfg_attr(test, assert_instr(vpexpandb))]
108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
109pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
110    _mm512_mask_expandloadu_epi8(_mm512_setzero_si512(), k, mem_addr)
111}
112
113/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
116#[inline]
117#[target_feature(enable = "avx512vbmi2,avx512vl")]
118#[cfg_attr(test, assert_instr(vpexpandb))]
119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
120pub unsafe fn _mm256_mask_expandloadu_epi8(
121    src: __m256i,
122    k: __mmask32,
123    mem_addr: *const i8,
124) -> __m256i {
125    transmute(expandloadb_256(mem_addr, src.as_i8x32(), k))
126}
127
128/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
129///
130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
131#[inline]
132#[target_feature(enable = "avx512vbmi2,avx512vl")]
133#[cfg_attr(test, assert_instr(vpexpandb))]
134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
135pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
136    _mm256_mask_expandloadu_epi8(_mm256_setzero_si256(), k, mem_addr)
137}
138
139/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
142#[inline]
143#[target_feature(enable = "avx512vbmi2,avx512vl")]
144#[cfg_attr(test, assert_instr(vpexpandb))]
145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
146pub unsafe fn _mm_mask_expandloadu_epi8(
147    src: __m128i,
148    k: __mmask16,
149    mem_addr: *const i8,
150) -> __m128i {
151    transmute(expandloadb_128(mem_addr, src.as_i8x16(), k))
152}
153
154/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
157#[inline]
158#[target_feature(enable = "avx512vbmi2,avx512vl")]
159#[cfg_attr(test, assert_instr(vpexpandb))]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
162    _mm_mask_expandloadu_epi8(_mm_setzero_si128(), k, mem_addr)
163}
164
165/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
166///
167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
168#[inline]
169#[target_feature(enable = "avx512vbmi2")]
170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
171#[cfg_attr(test, assert_instr(vpcompressw))]
172pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) {
173    vcompressstorew(base_addr as *mut _, a.as_i16x32(), k)
174}
175
176/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
179#[inline]
180#[target_feature(enable = "avx512vbmi2,avx512vl")]
181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
182#[cfg_attr(test, assert_instr(vpcompressw))]
183pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) {
184    vcompressstorew256(base_addr as *mut _, a.as_i16x16(), k)
185}
186
187/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
188///
189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
190#[inline]
191#[target_feature(enable = "avx512vbmi2,avx512vl")]
192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
193#[cfg_attr(test, assert_instr(vpcompressw))]
194pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) {
195    vcompressstorew128(base_addr as *mut _, a.as_i16x8(), k)
196}
197
198/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
201#[inline]
202#[target_feature(enable = "avx512vbmi2")]
203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
204#[cfg_attr(test, assert_instr(vpcompressb))]
205pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) {
206    vcompressstoreb(base_addr as *mut _, a.as_i8x64(), k)
207}
208
209/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
212#[inline]
213#[target_feature(enable = "avx512vbmi2,avx512vl")]
214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
215#[cfg_attr(test, assert_instr(vpcompressb))]
216pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) {
217    vcompressstoreb256(base_addr as *mut _, a.as_i8x32(), k)
218}
219
220/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
223#[inline]
224#[target_feature(enable = "avx512vbmi2,avx512vl")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vpcompressb))]
227pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) {
228    vcompressstoreb128(base_addr as *mut _, a.as_i8x16(), k)
229}
230
231/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
234#[inline]
235#[target_feature(enable = "avx512vbmi2")]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237#[cfg_attr(test, assert_instr(vpcompressw))]
238pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
239    unsafe { transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k)) }
240}
241
242/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
243///
244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
245#[inline]
246#[target_feature(enable = "avx512vbmi2")]
247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
248#[cfg_attr(test, assert_instr(vpcompressw))]
249pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
250    unsafe { transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) }
251}
252
253/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
256#[inline]
257#[target_feature(enable = "avx512vbmi2,avx512vl")]
258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259#[cfg_attr(test, assert_instr(vpcompressw))]
260pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
261    unsafe { transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k)) }
262}
263
264/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
267#[inline]
268#[target_feature(enable = "avx512vbmi2,avx512vl")]
269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
270#[cfg_attr(test, assert_instr(vpcompressw))]
271pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
272    unsafe { transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) }
273}
274
275/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
278#[inline]
279#[target_feature(enable = "avx512vbmi2,avx512vl")]
280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
281#[cfg_attr(test, assert_instr(vpcompressw))]
282pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
283    unsafe { transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k)) }
284}
285
286/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
289#[inline]
290#[target_feature(enable = "avx512vbmi2,avx512vl")]
291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
292#[cfg_attr(test, assert_instr(vpcompressw))]
293pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
294    unsafe { transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) }
295}
296
297/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
300#[inline]
301#[target_feature(enable = "avx512vbmi2")]
302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
303#[cfg_attr(test, assert_instr(vpcompressb))]
304pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
305    unsafe { transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k)) }
306}
307
308/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
311#[inline]
312#[target_feature(enable = "avx512vbmi2")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpcompressb))]
315pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
316    unsafe { transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) }
317}
318
319/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
322#[inline]
323#[target_feature(enable = "avx512vbmi2,avx512vl")]
324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
325#[cfg_attr(test, assert_instr(vpcompressb))]
326pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
327    unsafe { transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k)) }
328}
329
330/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
333#[inline]
334#[target_feature(enable = "avx512vbmi2,avx512vl")]
335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
336#[cfg_attr(test, assert_instr(vpcompressb))]
337pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
338    unsafe { transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) }
339}
340
341/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
344#[inline]
345#[target_feature(enable = "avx512vbmi2,avx512vl")]
346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
347#[cfg_attr(test, assert_instr(vpcompressb))]
348pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
349    unsafe { transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k)) }
350}
351
352/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
353///
354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
355#[inline]
356#[target_feature(enable = "avx512vbmi2,avx512vl")]
357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
358#[cfg_attr(test, assert_instr(vpcompressb))]
359pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
360    unsafe { transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) }
361}
362
363/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
366#[inline]
367#[target_feature(enable = "avx512vbmi2")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369#[cfg_attr(test, assert_instr(vpexpandw))]
370pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
371    unsafe { transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k)) }
372}
373
374/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
377#[inline]
378#[target_feature(enable = "avx512vbmi2")]
379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
380#[cfg_attr(test, assert_instr(vpexpandw))]
381pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
382    unsafe { transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) }
383}
384
385/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
388#[inline]
389#[target_feature(enable = "avx512vbmi2,avx512vl")]
390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
391#[cfg_attr(test, assert_instr(vpexpandw))]
392pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
393    unsafe { transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k)) }
394}
395
396/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
399#[inline]
400#[target_feature(enable = "avx512vbmi2,avx512vl")]
401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402#[cfg_attr(test, assert_instr(vpexpandw))]
403pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
404    unsafe { transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) }
405}
406
407/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408///
409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
410#[inline]
411#[target_feature(enable = "avx512vbmi2,avx512vl")]
412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
413#[cfg_attr(test, assert_instr(vpexpandw))]
414pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
415    unsafe { transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k)) }
416}
417
418/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
419///
420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
421#[inline]
422#[target_feature(enable = "avx512vbmi2,avx512vl")]
423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
424#[cfg_attr(test, assert_instr(vpexpandw))]
425pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
426    unsafe { transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) }
427}
428
429/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
432#[inline]
433#[target_feature(enable = "avx512vbmi2")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vpexpandb))]
436pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
437    unsafe { transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k)) }
438}
439
440/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441///
442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
443#[inline]
444#[target_feature(enable = "avx512vbmi2")]
445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
446#[cfg_attr(test, assert_instr(vpexpandb))]
447pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
448    unsafe { transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) }
449}
450
451/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
454#[inline]
455#[target_feature(enable = "avx512vbmi2,avx512vl")]
456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
457#[cfg_attr(test, assert_instr(vpexpandb))]
458pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
459    unsafe { transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k)) }
460}
461
462/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
463///
464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
465#[inline]
466#[target_feature(enable = "avx512vbmi2,avx512vl")]
467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
468#[cfg_attr(test, assert_instr(vpexpandb))]
469pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
470    unsafe { transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) }
471}
472
473/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
474///
475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
476#[inline]
477#[target_feature(enable = "avx512vbmi2,avx512vl")]
478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
479#[cfg_attr(test, assert_instr(vpexpandb))]
480pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
481    unsafe { transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k)) }
482}
483
484/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
485///
486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
487#[inline]
488#[target_feature(enable = "avx512vbmi2,avx512vl")]
489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
490#[cfg_attr(test, assert_instr(vpexpandb))]
491pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
492    unsafe { transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) }
493}
494
495/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
498#[inline]
499#[target_feature(enable = "avx512vbmi2")]
500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
501#[cfg_attr(test, assert_instr(vpshldvq))]
502pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
503    unsafe { transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) }
504}
505
506/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
507///
508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
509#[inline]
510#[target_feature(enable = "avx512vbmi2")]
511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
512#[cfg_attr(test, assert_instr(vpshldvq))]
513pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
514    unsafe {
515        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
516        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
517    }
518}
519
520/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
521///
522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
523#[inline]
524#[target_feature(enable = "avx512vbmi2")]
525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
526#[cfg_attr(test, assert_instr(vpshldvq))]
527pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
528    unsafe {
529        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
530        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
531    }
532}
533
534/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
537#[inline]
538#[target_feature(enable = "avx512vbmi2,avx512vl")]
539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
540#[cfg_attr(test, assert_instr(vpshldvq))]
541pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
542    unsafe { transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) }
543}
544
545/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
546///
547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
548#[inline]
549#[target_feature(enable = "avx512vbmi2,avx512vl")]
550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
551#[cfg_attr(test, assert_instr(vpshldvq))]
552pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
553    unsafe {
554        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
555        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
556    }
557}
558
559/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
560///
561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
562#[inline]
563#[target_feature(enable = "avx512vbmi2,avx512vl")]
564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
565#[cfg_attr(test, assert_instr(vpshldvq))]
566pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
567    unsafe {
568        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
569        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
570    }
571}
572
573/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
574///
575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
576#[inline]
577#[target_feature(enable = "avx512vbmi2,avx512vl")]
578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
579#[cfg_attr(test, assert_instr(vpshldvq))]
580pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
581    unsafe { transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) }
582}
583
584/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
585///
586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
587#[inline]
588#[target_feature(enable = "avx512vbmi2,avx512vl")]
589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
590#[cfg_attr(test, assert_instr(vpshldvq))]
591pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
592    unsafe {
593        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
594        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
595    }
596}
597
598/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
599///
600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
601#[inline]
602#[target_feature(enable = "avx512vbmi2,avx512vl")]
603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
604#[cfg_attr(test, assert_instr(vpshldvq))]
605pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
606    unsafe {
607        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
608        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
609    }
610}
611
612/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
613///
614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
615#[inline]
616#[target_feature(enable = "avx512vbmi2")]
617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
618#[cfg_attr(test, assert_instr(vpshldvd))]
619pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
620    unsafe { transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) }
621}
622
623/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
624///
625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
626#[inline]
627#[target_feature(enable = "avx512vbmi2")]
628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
629#[cfg_attr(test, assert_instr(vpshldvd))]
630pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
631    unsafe {
632        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
633        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
634    }
635}
636
637/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
638///
639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
640#[inline]
641#[target_feature(enable = "avx512vbmi2")]
642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
643#[cfg_attr(test, assert_instr(vpshldvd))]
644pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
645    unsafe {
646        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
647        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
648    }
649}
650
651/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
652///
653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
654#[inline]
655#[target_feature(enable = "avx512vbmi2,avx512vl")]
656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
657#[cfg_attr(test, assert_instr(vpshldvd))]
658pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
659    unsafe { transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) }
660}
661
662/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
663///
664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
665#[inline]
666#[target_feature(enable = "avx512vbmi2,avx512vl")]
667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
668#[cfg_attr(test, assert_instr(vpshldvd))]
669pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
670    unsafe {
671        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
672        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
673    }
674}
675
676/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
677///
678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
679#[inline]
680#[target_feature(enable = "avx512vbmi2,avx512vl")]
681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
682#[cfg_attr(test, assert_instr(vpshldvd))]
683pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
684    unsafe {
685        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
686        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
687    }
688}
689
690/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
691///
692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
693#[inline]
694#[target_feature(enable = "avx512vbmi2,avx512vl")]
695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
696#[cfg_attr(test, assert_instr(vpshldvd))]
697pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
698    unsafe { transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
699}
700
701/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
702///
703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
704#[inline]
705#[target_feature(enable = "avx512vbmi2,avx512vl")]
706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
707#[cfg_attr(test, assert_instr(vpshldvd))]
708pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
709    unsafe {
710        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
711        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
712    }
713}
714
715/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
716///
717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
718#[inline]
719#[target_feature(enable = "avx512vbmi2,avx512vl")]
720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
721#[cfg_attr(test, assert_instr(vpshldvd))]
722pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
723    unsafe {
724        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
725        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
726    }
727}
728
729/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
730///
731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
732#[inline]
733#[target_feature(enable = "avx512vbmi2")]
734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
735#[cfg_attr(test, assert_instr(vpshldvw))]
736pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
737    unsafe { transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) }
738}
739
740/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
741///
742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
743#[inline]
744#[target_feature(enable = "avx512vbmi2")]
745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
746#[cfg_attr(test, assert_instr(vpshldvw))]
747pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
748    unsafe {
749        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
750        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
751    }
752}
753
754/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
755///
756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
757#[inline]
758#[target_feature(enable = "avx512vbmi2")]
759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
760#[cfg_attr(test, assert_instr(vpshldvw))]
761pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
762    unsafe {
763        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
764        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
765    }
766}
767
768/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
769///
770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
771#[inline]
772#[target_feature(enable = "avx512vbmi2,avx512vl")]
773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
774#[cfg_attr(test, assert_instr(vpshldvw))]
775pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
776    unsafe { transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) }
777}
778
779/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
780///
781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
782#[inline]
783#[target_feature(enable = "avx512vbmi2,avx512vl")]
784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
785#[cfg_attr(test, assert_instr(vpshldvw))]
786pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
787    unsafe {
788        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
789        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
790    }
791}
792
793/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
794///
795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
796#[inline]
797#[target_feature(enable = "avx512vbmi2,avx512vl")]
798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
799#[cfg_attr(test, assert_instr(vpshldvw))]
800pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
801    unsafe {
802        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
803        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
804    }
805}
806
807/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
808///
809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
810#[inline]
811#[target_feature(enable = "avx512vbmi2,avx512vl")]
812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
813#[cfg_attr(test, assert_instr(vpshldvw))]
814pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
815    unsafe { transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) }
816}
817
818/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
819///
820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
821#[inline]
822#[target_feature(enable = "avx512vbmi2,avx512vl")]
823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
824#[cfg_attr(test, assert_instr(vpshldvw))]
825pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
826    unsafe {
827        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
828        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
829    }
830}
831
832/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
833///
834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
835#[inline]
836#[target_feature(enable = "avx512vbmi2,avx512vl")]
837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
838#[cfg_attr(test, assert_instr(vpshldvw))]
839pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
840    unsafe {
841        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
842        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
843    }
844}
845
846/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
849#[inline]
850#[target_feature(enable = "avx512vbmi2")]
851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
852#[cfg_attr(test, assert_instr(vpshrdvq))]
853pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
854    unsafe { transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) }
855}
856
857/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
858///
859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
860#[inline]
861#[target_feature(enable = "avx512vbmi2")]
862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
863#[cfg_attr(test, assert_instr(vpshrdvq))]
864pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
865    unsafe {
866        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
867        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
868    }
869}
870
871/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
872///
873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
874#[inline]
875#[target_feature(enable = "avx512vbmi2")]
876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
877#[cfg_attr(test, assert_instr(vpshrdvq))]
878pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
879    unsafe {
880        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
881        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
882    }
883}
884
885/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
886///
887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
888#[inline]
889#[target_feature(enable = "avx512vbmi2,avx512vl")]
890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
891#[cfg_attr(test, assert_instr(vpshrdvq))]
892pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
893    unsafe { transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) }
894}
895
896/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
899#[inline]
900#[target_feature(enable = "avx512vbmi2,avx512vl")]
901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902#[cfg_attr(test, assert_instr(vpshrdvq))]
903pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
904    unsafe {
905        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
906        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
907    }
908}
909
910/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
913#[inline]
914#[target_feature(enable = "avx512vbmi2,avx512vl")]
915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916#[cfg_attr(test, assert_instr(vpshrdvq))]
917pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
918    unsafe {
919        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
920        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
921    }
922}
923
924/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
925///
926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
927#[inline]
928#[target_feature(enable = "avx512vbmi2,avx512vl")]
929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
930#[cfg_attr(test, assert_instr(vpshrdvq))]
931pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
932    unsafe { transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) }
933}
934
935/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
938#[inline]
939#[target_feature(enable = "avx512vbmi2,avx512vl")]
940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941#[cfg_attr(test, assert_instr(vpshrdvq))]
942pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
943    unsafe {
944        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
945        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
946    }
947}
948
949/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
952#[inline]
953#[target_feature(enable = "avx512vbmi2,avx512vl")]
954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955#[cfg_attr(test, assert_instr(vpshrdvq))]
956pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
957    unsafe {
958        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
959        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
960    }
961}
962
963/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
966#[inline]
967#[target_feature(enable = "avx512vbmi2")]
968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969#[cfg_attr(test, assert_instr(vpshrdvd))]
970pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
971    unsafe { transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) }
972}
973
974/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
975///
976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
977#[inline]
978#[target_feature(enable = "avx512vbmi2")]
979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
980#[cfg_attr(test, assert_instr(vpshrdvd))]
981pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
982    unsafe {
983        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
984        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
985    }
986}
987
988/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
989///
990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
991#[inline]
992#[target_feature(enable = "avx512vbmi2")]
993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
994#[cfg_attr(test, assert_instr(vpshrdvd))]
995pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
996    unsafe {
997        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
998        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
999    }
1000}
1001
1002/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1003///
1004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
1005#[inline]
1006#[target_feature(enable = "avx512vbmi2,avx512vl")]
1007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1008#[cfg_attr(test, assert_instr(vpshrdvd))]
1009pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1010    unsafe { transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) }
1011}
1012
1013/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1014///
1015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
1016#[inline]
1017#[target_feature(enable = "avx512vbmi2,avx512vl")]
1018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1019#[cfg_attr(test, assert_instr(vpshrdvd))]
1020pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1021    unsafe {
1022        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1023        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
1024    }
1025}
1026
1027/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1028///
1029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
1030#[inline]
1031#[target_feature(enable = "avx512vbmi2,avx512vl")]
1032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1033#[cfg_attr(test, assert_instr(vpshrdvd))]
1034pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1035    unsafe {
1036        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1037        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1038    }
1039}
1040
1041/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1042///
1043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1044#[inline]
1045#[target_feature(enable = "avx512vbmi2,avx512vl")]
1046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1047#[cfg_attr(test, assert_instr(vpshrdvd))]
1048pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1049    unsafe { transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) }
1050}
1051
1052/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1053///
1054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1055#[inline]
1056#[target_feature(enable = "avx512vbmi2,avx512vl")]
1057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1058#[cfg_attr(test, assert_instr(vpshrdvd))]
1059pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1060    unsafe {
1061        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1062        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
1063    }
1064}
1065
1066/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1067///
1068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1069#[inline]
1070#[target_feature(enable = "avx512vbmi2,avx512vl")]
1071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1072#[cfg_attr(test, assert_instr(vpshrdvd))]
1073pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1074    unsafe {
1075        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1076        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1077    }
1078}
1079
1080/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1083#[inline]
1084#[target_feature(enable = "avx512vbmi2")]
1085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1086#[cfg_attr(test, assert_instr(vpshrdvw))]
1087pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1088    unsafe { transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) }
1089}
1090
1091/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1092///
1093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1094#[inline]
1095#[target_feature(enable = "avx512vbmi2")]
1096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1097#[cfg_attr(test, assert_instr(vpshrdvw))]
1098pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1099    unsafe {
1100        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1101        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
1102    }
1103}
1104
1105/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1106///
1107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1108#[inline]
1109#[target_feature(enable = "avx512vbmi2")]
1110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1111#[cfg_attr(test, assert_instr(vpshrdvw))]
1112pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1113    unsafe {
1114        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1115        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1116    }
1117}
1118
1119/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1120///
1121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1122#[inline]
1123#[target_feature(enable = "avx512vbmi2,avx512vl")]
1124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1125#[cfg_attr(test, assert_instr(vpshrdvw))]
1126pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1127    unsafe { transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) }
1128}
1129
1130/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1131///
1132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1133#[inline]
1134#[target_feature(enable = "avx512vbmi2,avx512vl")]
1135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1136#[cfg_attr(test, assert_instr(vpshrdvw))]
1137pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1138    unsafe {
1139        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1140        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
1141    }
1142}
1143
1144/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1145///
1146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1147#[inline]
1148#[target_feature(enable = "avx512vbmi2,avx512vl")]
1149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1150#[cfg_attr(test, assert_instr(vpshrdvw))]
1151pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1152    unsafe {
1153        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1154        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1155    }
1156}
1157
1158/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1159///
1160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1161#[inline]
1162#[target_feature(enable = "avx512vbmi2,avx512vl")]
1163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1164#[cfg_attr(test, assert_instr(vpshrdvw))]
1165pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1166    unsafe { transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) }
1167}
1168
1169/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1170///
1171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1172#[inline]
1173#[target_feature(enable = "avx512vbmi2,avx512vl")]
1174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1175#[cfg_attr(test, assert_instr(vpshrdvw))]
1176pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1177    unsafe {
1178        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1179        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
1180    }
1181}
1182
1183/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184///
1185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1186#[inline]
1187#[target_feature(enable = "avx512vbmi2,avx512vl")]
1188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1189#[cfg_attr(test, assert_instr(vpshrdvw))]
1190pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1191    unsafe {
1192        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1193        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1194    }
1195}
1196
1197/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1200#[inline]
1201#[target_feature(enable = "avx512vbmi2")]
1202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1203#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1204#[rustc_legacy_const_generics(2)]
1205pub fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1206    static_assert_uimm_bits!(IMM8, 8);
1207    _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1208}
1209
1210/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1211///
1212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1213#[inline]
1214#[target_feature(enable = "avx512vbmi2")]
1215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1216#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1217#[rustc_legacy_const_generics(4)]
1218pub fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1219    src: __m512i,
1220    k: __mmask8,
1221    a: __m512i,
1222    b: __m512i,
1223) -> __m512i {
1224    unsafe {
1225        static_assert_uimm_bits!(IMM8, 8);
1226        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1227        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1228    }
1229}
1230
1231/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1232///
1233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1234#[inline]
1235#[target_feature(enable = "avx512vbmi2")]
1236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1237#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1238#[rustc_legacy_const_generics(3)]
1239pub fn _mm512_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1240    unsafe {
1241        static_assert_uimm_bits!(IMM8, 8);
1242        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1243        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1244    }
1245}
1246
1247/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1248///
1249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1250#[inline]
1251#[target_feature(enable = "avx512vbmi2,avx512vl")]
1252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1253#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1254#[rustc_legacy_const_generics(2)]
1255pub fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1256    static_assert_uimm_bits!(IMM8, 8);
1257    _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1258}
1259
1260/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1261///
1262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1263#[inline]
1264#[target_feature(enable = "avx512vbmi2,avx512vl")]
1265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1266#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1267#[rustc_legacy_const_generics(4)]
1268pub fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1269    src: __m256i,
1270    k: __mmask8,
1271    a: __m256i,
1272    b: __m256i,
1273) -> __m256i {
1274    unsafe {
1275        static_assert_uimm_bits!(IMM8, 8);
1276        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1277        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1278    }
1279}
1280
1281/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1284#[inline]
1285#[target_feature(enable = "avx512vbmi2,avx512vl")]
1286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1287#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1288#[rustc_legacy_const_generics(3)]
1289pub fn _mm256_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1290    unsafe {
1291        static_assert_uimm_bits!(IMM8, 8);
1292        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1293        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1294    }
1295}
1296
1297/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1298///
1299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1300#[inline]
1301#[target_feature(enable = "avx512vbmi2,avx512vl")]
1302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1303#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1304#[rustc_legacy_const_generics(2)]
1305pub fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1306    static_assert_uimm_bits!(IMM8, 8);
1307    _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1308}
1309
1310/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1311///
1312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1313#[inline]
1314#[target_feature(enable = "avx512vbmi2,avx512vl")]
1315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1316#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1317#[rustc_legacy_const_generics(4)]
1318pub fn _mm_mask_shldi_epi64<const IMM8: i32>(
1319    src: __m128i,
1320    k: __mmask8,
1321    a: __m128i,
1322    b: __m128i,
1323) -> __m128i {
1324    unsafe {
1325        static_assert_uimm_bits!(IMM8, 8);
1326        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1327        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1328    }
1329}
1330
1331/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1332///
1333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1334#[inline]
1335#[target_feature(enable = "avx512vbmi2,avx512vl")]
1336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1337#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1338#[rustc_legacy_const_generics(3)]
1339pub fn _mm_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1340    unsafe {
1341        static_assert_uimm_bits!(IMM8, 8);
1342        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1343        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1344    }
1345}
1346
1347/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1348///
1349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1350#[inline]
1351#[target_feature(enable = "avx512vbmi2")]
1352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1353#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1354#[rustc_legacy_const_generics(2)]
1355pub fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1356    static_assert_uimm_bits!(IMM8, 8);
1357    _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8))
1358}
1359
1360/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1361///
1362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1363#[inline]
1364#[target_feature(enable = "avx512vbmi2")]
1365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1366#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1367#[rustc_legacy_const_generics(4)]
1368pub fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1369    src: __m512i,
1370    k: __mmask16,
1371    a: __m512i,
1372    b: __m512i,
1373) -> __m512i {
1374    unsafe {
1375        static_assert_uimm_bits!(IMM8, 8);
1376        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1377        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1378    }
1379}
1380
1381/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1382///
1383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1384#[inline]
1385#[target_feature(enable = "avx512vbmi2")]
1386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1387#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1388#[rustc_legacy_const_generics(3)]
1389pub fn _mm512_maskz_shldi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1390    unsafe {
1391        static_assert_uimm_bits!(IMM8, 8);
1392        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1393        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1394    }
1395}
1396
1397/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1398///
1399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1400#[inline]
1401#[target_feature(enable = "avx512vbmi2,avx512vl")]
1402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1403#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1404#[rustc_legacy_const_generics(2)]
1405pub fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1406    static_assert_uimm_bits!(IMM8, 8);
1407    _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8))
1408}
1409
1410/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1413#[inline]
1414#[target_feature(enable = "avx512vbmi2,avx512vl")]
1415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1416#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1417#[rustc_legacy_const_generics(4)]
1418pub fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1419    src: __m256i,
1420    k: __mmask8,
1421    a: __m256i,
1422    b: __m256i,
1423) -> __m256i {
1424    unsafe {
1425        static_assert_uimm_bits!(IMM8, 8);
1426        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1427        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1428    }
1429}
1430
1431/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1434#[inline]
1435#[target_feature(enable = "avx512vbmi2,avx512vl")]
1436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1437#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1438#[rustc_legacy_const_generics(3)]
1439pub fn _mm256_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1440    unsafe {
1441        static_assert_uimm_bits!(IMM8, 8);
1442        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1443        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1444    }
1445}
1446
1447/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1448///
1449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1450#[inline]
1451#[target_feature(enable = "avx512vbmi2,avx512vl")]
1452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1453#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1454#[rustc_legacy_const_generics(2)]
1455pub fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1456    static_assert_uimm_bits!(IMM8, 8);
1457    _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8))
1458}
1459
1460/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1461///
1462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1463#[inline]
1464#[target_feature(enable = "avx512vbmi2,avx512vl")]
1465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1466#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1467#[rustc_legacy_const_generics(4)]
1468pub fn _mm_mask_shldi_epi32<const IMM8: i32>(
1469    src: __m128i,
1470    k: __mmask8,
1471    a: __m128i,
1472    b: __m128i,
1473) -> __m128i {
1474    unsafe {
1475        static_assert_uimm_bits!(IMM8, 8);
1476        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1477        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1478    }
1479}
1480
1481/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1482///
1483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1484#[inline]
1485#[target_feature(enable = "avx512vbmi2,avx512vl")]
1486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1487#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1488#[rustc_legacy_const_generics(3)]
1489pub fn _mm_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1490    unsafe {
1491        static_assert_uimm_bits!(IMM8, 8);
1492        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1493        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1494    }
1495}
1496
1497/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1498///
1499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1500#[inline]
1501#[target_feature(enable = "avx512vbmi2")]
1502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1503#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1504#[rustc_legacy_const_generics(2)]
1505pub fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1506    static_assert_uimm_bits!(IMM8, 8);
1507    _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1508}
1509
1510/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1511///
1512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1513#[inline]
1514#[target_feature(enable = "avx512vbmi2")]
1515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1516#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1517#[rustc_legacy_const_generics(4)]
1518pub fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1519    src: __m512i,
1520    k: __mmask32,
1521    a: __m512i,
1522    b: __m512i,
1523) -> __m512i {
1524    unsafe {
1525        static_assert_uimm_bits!(IMM8, 8);
1526        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1527        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1528    }
1529}
1530
1531/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1534#[inline]
1535#[target_feature(enable = "avx512vbmi2")]
1536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1537#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1538#[rustc_legacy_const_generics(3)]
1539pub fn _mm512_maskz_shldi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1540    unsafe {
1541        static_assert_uimm_bits!(IMM8, 8);
1542        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1543        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1544    }
1545}
1546
1547/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1548///
1549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1550#[inline]
1551#[target_feature(enable = "avx512vbmi2,avx512vl")]
1552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1553#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1554#[rustc_legacy_const_generics(2)]
1555pub fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1556    static_assert_uimm_bits!(IMM8, 8);
1557    _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
1558}
1559
1560/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1561///
1562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1563#[inline]
1564#[target_feature(enable = "avx512vbmi2,avx512vl")]
1565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1566#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1567#[rustc_legacy_const_generics(4)]
1568pub fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1569    src: __m256i,
1570    k: __mmask16,
1571    a: __m256i,
1572    b: __m256i,
1573) -> __m256i {
1574    unsafe {
1575        static_assert_uimm_bits!(IMM8, 8);
1576        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1577        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1578    }
1579}
1580
1581/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1582///
1583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1584#[inline]
1585#[target_feature(enable = "avx512vbmi2,avx512vl")]
1586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1588#[rustc_legacy_const_generics(3)]
1589pub fn _mm256_maskz_shldi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1590    unsafe {
1591        static_assert_uimm_bits!(IMM8, 8);
1592        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1593        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1594    }
1595}
1596
1597/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1598///
1599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1600#[inline]
1601#[target_feature(enable = "avx512vbmi2,avx512vl")]
1602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1603#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1604#[rustc_legacy_const_generics(2)]
1605pub fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1606    static_assert_uimm_bits!(IMM8, 8);
1607    _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
1608}
1609
1610/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1611///
1612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1613#[inline]
1614#[target_feature(enable = "avx512vbmi2,avx512vl")]
1615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1616#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1617#[rustc_legacy_const_generics(4)]
1618pub fn _mm_mask_shldi_epi16<const IMM8: i32>(
1619    src: __m128i,
1620    k: __mmask8,
1621    a: __m128i,
1622    b: __m128i,
1623) -> __m128i {
1624    unsafe {
1625        static_assert_uimm_bits!(IMM8, 8);
1626        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1627        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
1628    }
1629}
1630
1631/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1632///
1633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1634#[inline]
1635#[target_feature(enable = "avx512vbmi2,avx512vl")]
1636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1637#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1638#[rustc_legacy_const_generics(3)]
1639pub fn _mm_maskz_shldi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1640    unsafe {
1641        static_assert_uimm_bits!(IMM8, 8);
1642        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1643        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1644    }
1645}
1646
1647/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1648///
1649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1650#[inline]
1651#[target_feature(enable = "avx512vbmi2")]
1652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1653#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1654#[rustc_legacy_const_generics(2)]
1655pub fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1656    static_assert_uimm_bits!(IMM8, 8);
1657    _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1658}
1659
1660/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1661///
1662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1663#[inline]
1664#[target_feature(enable = "avx512vbmi2")]
1665#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1666#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1667#[rustc_legacy_const_generics(4)]
1668pub fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1669    src: __m512i,
1670    k: __mmask8,
1671    a: __m512i,
1672    b: __m512i,
1673) -> __m512i {
1674    unsafe {
1675        static_assert_uimm_bits!(IMM8, 8);
1676        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1677        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1678    }
1679}
1680
1681/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1682///
1683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1684#[inline]
1685#[target_feature(enable = "avx512vbmi2")]
1686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1687#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq
1688#[rustc_legacy_const_generics(3)]
1689pub fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1690    unsafe {
1691        static_assert_uimm_bits!(IMM8, 8);
1692        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1693        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1694    }
1695}
1696
1697/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1698///
1699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1700#[inline]
1701#[target_feature(enable = "avx512vbmi2,avx512vl")]
1702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1703#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1704#[rustc_legacy_const_generics(2)]
1705pub fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1706    static_assert_uimm_bits!(IMM8, 8);
1707    _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1708}
1709
1710/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1711///
1712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1713#[inline]
1714#[target_feature(enable = "avx512vbmi2,avx512vl")]
1715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1716#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1717#[rustc_legacy_const_generics(4)]
1718pub fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1719    src: __m256i,
1720    k: __mmask8,
1721    a: __m256i,
1722    b: __m256i,
1723) -> __m256i {
1724    unsafe {
1725        static_assert_uimm_bits!(IMM8, 8);
1726        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1727        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1728    }
1729}
1730
1731/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1734#[inline]
1735#[target_feature(enable = "avx512vbmi2,avx512vl")]
1736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1738#[rustc_legacy_const_generics(3)]
1739pub fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1740    unsafe {
1741        static_assert_uimm_bits!(IMM8, 8);
1742        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1743        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1744    }
1745}
1746
1747/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1748///
1749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
1750#[inline]
1751#[target_feature(enable = "avx512vbmi2,avx512vl")]
1752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1753#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1754#[rustc_legacy_const_generics(2)]
1755pub fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1756    static_assert_uimm_bits!(IMM8, 8);
1757    _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1758}
1759
1760/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1761///
1762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
1763#[inline]
1764#[target_feature(enable = "avx512vbmi2,avx512vl")]
1765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1766#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1767#[rustc_legacy_const_generics(4)]
1768pub fn _mm_mask_shrdi_epi64<const IMM8: i32>(
1769    src: __m128i,
1770    k: __mmask8,
1771    a: __m128i,
1772    b: __m128i,
1773) -> __m128i {
1774    unsafe {
1775        static_assert_uimm_bits!(IMM8, 8);
1776        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1777        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1778    }
1779}
1780
1781/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1782///
1783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
1784#[inline]
1785#[target_feature(enable = "avx512vbmi2,avx512vl")]
1786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1787#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1788#[rustc_legacy_const_generics(3)]
1789pub fn _mm_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1790    unsafe {
1791        static_assert_uimm_bits!(IMM8, 8);
1792        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1793        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1794    }
1795}
1796
1797/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1798///
1799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
1800#[inline]
1801#[target_feature(enable = "avx512vbmi2")]
1802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1803#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1804#[rustc_legacy_const_generics(2)]
1805pub fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1806    static_assert_uimm_bits!(IMM8, 8);
1807    _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8))
1808}
1809
1810/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1811///
1812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
1813#[inline]
1814#[target_feature(enable = "avx512vbmi2")]
1815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1816#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1817#[rustc_legacy_const_generics(4)]
1818pub fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
1819    src: __m512i,
1820    k: __mmask16,
1821    a: __m512i,
1822    b: __m512i,
1823) -> __m512i {
1824    unsafe {
1825        static_assert_uimm_bits!(IMM8, 8);
1826        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1827        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1828    }
1829}
1830
1831/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1832///
1833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
1834#[inline]
1835#[target_feature(enable = "avx512vbmi2")]
1836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1837#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1838#[rustc_legacy_const_generics(3)]
1839pub fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1840    unsafe {
1841        static_assert_uimm_bits!(IMM8, 8);
1842        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1843        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1844    }
1845}
1846
1847/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1848///
1849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
1850#[inline]
1851#[target_feature(enable = "avx512vbmi2,avx512vl")]
1852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1853#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1854#[rustc_legacy_const_generics(2)]
1855pub fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1856    static_assert_uimm_bits!(IMM8, 8);
1857    _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8))
1858}
1859
1860/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1861///
1862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
1863#[inline]
1864#[target_feature(enable = "avx512vbmi2,avx512vl")]
1865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1866#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1867#[rustc_legacy_const_generics(4)]
1868pub fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
1869    src: __m256i,
1870    k: __mmask8,
1871    a: __m256i,
1872    b: __m256i,
1873) -> __m256i {
1874    unsafe {
1875        static_assert_uimm_bits!(IMM8, 8);
1876        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1877        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1878    }
1879}
1880
1881/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1882///
1883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
1884#[inline]
1885#[target_feature(enable = "avx512vbmi2,avx512vl")]
1886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1887#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1888#[rustc_legacy_const_generics(3)]
1889pub fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1890    unsafe {
1891        static_assert_uimm_bits!(IMM8, 8);
1892        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1893        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1894    }
1895}
1896
1897/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1898///
1899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
1900#[inline]
1901#[target_feature(enable = "avx512vbmi2,avx512vl")]
1902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1903#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1904#[rustc_legacy_const_generics(2)]
1905pub fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1906    static_assert_uimm_bits!(IMM8, 8);
1907    _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8))
1908}
1909
1910/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1911///
1912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
1913#[inline]
1914#[target_feature(enable = "avx512vbmi2,avx512vl")]
1915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1916#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1917#[rustc_legacy_const_generics(4)]
1918pub fn _mm_mask_shrdi_epi32<const IMM8: i32>(
1919    src: __m128i,
1920    k: __mmask8,
1921    a: __m128i,
1922    b: __m128i,
1923) -> __m128i {
1924    unsafe {
1925        static_assert_uimm_bits!(IMM8, 8);
1926        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1927        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1928    }
1929}
1930
1931/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1932///
1933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
1934#[inline]
1935#[target_feature(enable = "avx512vbmi2,avx512vl")]
1936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1937#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1938#[rustc_legacy_const_generics(3)]
1939pub fn _mm_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1940    unsafe {
1941        static_assert_uimm_bits!(IMM8, 8);
1942        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1943        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1944    }
1945}
1946
1947/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1948///
1949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
1950#[inline]
1951#[target_feature(enable = "avx512vbmi2")]
1952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1953#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1954#[rustc_legacy_const_generics(2)]
1955pub fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1956    static_assert_uimm_bits!(IMM8, 8);
1957    _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1958}
1959
1960/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1961///
1962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
1963#[inline]
1964#[target_feature(enable = "avx512vbmi2")]
1965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1966#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1967#[rustc_legacy_const_generics(4)]
1968pub fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
1969    src: __m512i,
1970    k: __mmask32,
1971    a: __m512i,
1972    b: __m512i,
1973) -> __m512i {
1974    unsafe {
1975        static_assert_uimm_bits!(IMM8, 8);
1976        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1977        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1978    }
1979}
1980
1981/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1982///
1983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
1984#[inline]
1985#[target_feature(enable = "avx512vbmi2")]
1986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1987#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1988#[rustc_legacy_const_generics(3)]
1989pub fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1990    unsafe {
1991        static_assert_uimm_bits!(IMM8, 8);
1992        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1993        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1994    }
1995}
1996
1997/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1998///
1999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
2000#[inline]
2001#[target_feature(enable = "avx512vbmi2,avx512vl")]
2002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2003#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2004#[rustc_legacy_const_generics(2)]
2005pub fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2006    static_assert_uimm_bits!(IMM8, 8);
2007    _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
2008}
2009
2010/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2011///
2012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
2013#[inline]
2014#[target_feature(enable = "avx512vbmi2,avx512vl")]
2015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2016#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2017#[rustc_legacy_const_generics(4)]
2018pub fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
2019    src: __m256i,
2020    k: __mmask16,
2021    a: __m256i,
2022    b: __m256i,
2023) -> __m256i {
2024    unsafe {
2025        static_assert_uimm_bits!(IMM8, 8);
2026        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2027        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
2028    }
2029}
2030
2031/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
2034#[inline]
2035#[target_feature(enable = "avx512vbmi2,avx512vl")]
2036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2037#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2038#[rustc_legacy_const_generics(3)]
2039pub fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2040    unsafe {
2041        static_assert_uimm_bits!(IMM8, 8);
2042        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2043        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
2044    }
2045}
2046
2047/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2048///
2049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2050#[inline]
2051#[target_feature(enable = "avx512vbmi2,avx512vl")]
2052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2053#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2054#[rustc_legacy_const_generics(2)]
2055pub fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2056    static_assert_uimm_bits!(IMM8, 8);
2057    _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
2058}
2059
2060/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2061///
2062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2063#[inline]
2064#[target_feature(enable = "avx512vbmi2,avx512vl")]
2065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2066#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2067#[rustc_legacy_const_generics(4)]
2068pub fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2069    src: __m128i,
2070    k: __mmask8,
2071    a: __m128i,
2072    b: __m128i,
2073) -> __m128i {
2074    unsafe {
2075        static_assert_uimm_bits!(IMM8, 8);
2076        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2077        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
2078    }
2079}
2080
2081/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2082///
2083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2084#[inline]
2085#[target_feature(enable = "avx512vbmi2,avx512vl")]
2086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2087#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2088#[rustc_legacy_const_generics(3)]
2089pub fn _mm_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2090    unsafe {
2091        static_assert_uimm_bits!(IMM8, 8);
2092        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2093        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
2094    }
2095}
2096
2097#[allow(improper_ctypes)]
2098unsafe extern "C" {
2099    #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2100    fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2101    #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2102    fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2103    #[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2104    fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2105
2106    #[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2107    fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2108    #[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2109    fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2110    #[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2111    fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2112
2113    #[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2114    fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2115    #[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2116    fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2117    #[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2118    fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2119
2120    #[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2121    fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2122    #[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2123    fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2124    #[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2125    fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2126
2127    #[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2128    fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2129    #[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2130    fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2131    #[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2132    fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2133
2134    #[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2135    fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2136    #[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2137    fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2138    #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2139    fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2140
2141    #[link_name = "llvm.fshl.v8i64"]
2142    fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2143    #[link_name = "llvm.fshl.v4i64"]
2144    fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2145    #[link_name = "llvm.fshl.v2i64"]
2146    fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2147    #[link_name = "llvm.fshl.v16i32"]
2148    fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2149    #[link_name = "llvm.fshl.v8i32"]
2150    fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2151    #[link_name = "llvm.fshl.v4i32"]
2152    fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2153    #[link_name = "llvm.fshl.v32i16"]
2154    fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2155    #[link_name = "llvm.fshl.v16i16"]
2156    fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2157    #[link_name = "llvm.fshl.v8i16"]
2158    fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2159
2160    #[link_name = "llvm.fshr.v8i64"]
2161    fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2162    #[link_name = "llvm.fshr.v4i64"]
2163    fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2164    #[link_name = "llvm.fshr.v2i64"]
2165    fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2166    #[link_name = "llvm.fshr.v16i32"]
2167    fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2168    #[link_name = "llvm.fshr.v8i32"]
2169    fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2170    #[link_name = "llvm.fshr.v4i32"]
2171    fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2172    #[link_name = "llvm.fshr.v32i16"]
2173    fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2174    #[link_name = "llvm.fshr.v16i16"]
2175    fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2176    #[link_name = "llvm.fshr.v8i16"]
2177    fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2178
2179    #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
2180    fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
2181    #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
2182    fn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
2183    #[link_name = "llvm.x86.avx512.mask.expand.load.b.256"]
2184    fn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
2185    #[link_name = "llvm.x86.avx512.mask.expand.load.w.256"]
2186    fn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
2187    #[link_name = "llvm.x86.avx512.mask.expand.load.b.512"]
2188    fn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
2189    #[link_name = "llvm.x86.avx512.mask.expand.load.w.512"]
2190    fn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
2191}
2192
2193#[cfg(test)]
2194mod tests {
2195
2196    use stdarch_test::simd_test;
2197
2198    use crate::core_arch::x86::*;
2199    use crate::hint::black_box;
2200
2201    #[simd_test(enable = "avx512vbmi2")]
2202    unsafe fn test_mm512_mask_compress_epi16() {
2203        let src = _mm512_set1_epi16(200);
2204        #[rustfmt::skip]
2205        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2206                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2207        let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2208        #[rustfmt::skip]
2209        let e = _mm512_set_epi16(
2210            200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
2211            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2212        );
2213        assert_eq_m512i(r, e);
2214    }
2215
2216    #[simd_test(enable = "avx512vbmi2")]
2217    unsafe fn test_mm512_maskz_compress_epi16() {
2218        #[rustfmt::skip]
2219        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2220                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2221        let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a);
2222        #[rustfmt::skip]
2223        let e = _mm512_set_epi16(
2224            0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2225            1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2226        );
2227        assert_eq_m512i(r, e);
2228    }
2229
2230    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2231    unsafe fn test_mm256_mask_compress_epi16() {
2232        let src = _mm256_set1_epi16(200);
2233        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2234        let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
2235        let e = _mm256_set_epi16(
2236            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
2237        );
2238        assert_eq_m256i(r, e);
2239    }
2240
2241    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2242    unsafe fn test_mm256_maskz_compress_epi16() {
2243        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2244        let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
2245        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2246        assert_eq_m256i(r, e);
2247    }
2248
2249    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2250    unsafe fn test_mm_mask_compress_epi16() {
2251        let src = _mm_set1_epi16(200);
2252        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2253        let r = _mm_mask_compress_epi16(src, 0b01010101, a);
2254        let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7);
2255        assert_eq_m128i(r, e);
2256    }
2257
2258    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2259    unsafe fn test_mm_maskz_compress_epi16() {
2260        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2261        let r = _mm_maskz_compress_epi16(0b01010101, a);
2262        let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
2263        assert_eq_m128i(r, e);
2264    }
2265
2266    #[simd_test(enable = "avx512vbmi2")]
2267    unsafe fn test_mm512_mask_compress_epi8() {
2268        let src = _mm512_set1_epi8(100);
2269        #[rustfmt::skip]
2270        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2271                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2272                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2273                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2274        let r = _mm512_mask_compress_epi8(
2275            src,
2276            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2277            a,
2278        );
2279        #[rustfmt::skip]
2280        let e = _mm512_set_epi8(
2281            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2282            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2283            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2284            33,  35,  37,  39,  41,  43,  45,  47,  49,  51,  53,  55,  57,  59,  61,  63,
2285        );
2286        assert_eq_m512i(r, e);
2287    }
2288
2289    #[simd_test(enable = "avx512vbmi2")]
2290    unsafe fn test_mm512_maskz_compress_epi8() {
2291        #[rustfmt::skip]
2292        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2293                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2294                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2295                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2296        let r = _mm512_maskz_compress_epi8(
2297            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2298            a,
2299        );
2300        #[rustfmt::skip]
2301        let e = _mm512_set_epi8(
2302            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2303            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2304            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2305            33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2306        );
2307        assert_eq_m512i(r, e);
2308    }
2309
2310    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2311    unsafe fn test_mm256_mask_compress_epi8() {
2312        let src = _mm256_set1_epi8(100);
2313        #[rustfmt::skip]
2314        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2315                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2316        let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2317        #[rustfmt::skip]
2318        let e = _mm256_set_epi8(
2319            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2320            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2321        );
2322        assert_eq_m256i(r, e);
2323    }
2324
2325    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2326    unsafe fn test_mm256_maskz_compress_epi8() {
2327        #[rustfmt::skip]
2328        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2329                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2330        let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a);
2331        #[rustfmt::skip]
2332        let e = _mm256_set_epi8(
2333            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2334            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2335        );
2336        assert_eq_m256i(r, e);
2337    }
2338
2339    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2340    unsafe fn test_mm_mask_compress_epi8() {
2341        let src = _mm_set1_epi8(100);
2342        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2343        let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
2344        let e = _mm_set_epi8(
2345            100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15,
2346        );
2347        assert_eq_m128i(r, e);
2348    }
2349
2350    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2351    unsafe fn test_mm_maskz_compress_epi8() {
2352        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2353        let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
2354        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2355        assert_eq_m128i(r, e);
2356    }
2357
2358    #[simd_test(enable = "avx512vbmi2")]
2359    unsafe fn test_mm512_mask_expand_epi16() {
2360        let src = _mm512_set1_epi16(200);
2361        #[rustfmt::skip]
2362        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2363                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2364        let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2365        #[rustfmt::skip]
2366        let e = _mm512_set_epi16(
2367            200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23,
2368            200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31,
2369        );
2370        assert_eq_m512i(r, e);
2371    }
2372
2373    #[simd_test(enable = "avx512vbmi2")]
2374    unsafe fn test_mm512_maskz_expand_epi16() {
2375        #[rustfmt::skip]
2376        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2377                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2378        let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a);
2379        #[rustfmt::skip]
2380        let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2381                                 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31);
2382        assert_eq_m512i(r, e);
2383    }
2384
2385    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2386    unsafe fn test_mm256_mask_expand_epi16() {
2387        let src = _mm256_set1_epi16(200);
2388        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2389        let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
2390        let e = _mm256_set_epi16(
2391            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
2392        );
2393        assert_eq_m256i(r, e);
2394    }
2395
2396    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2397    unsafe fn test_mm256_maskz_expand_epi16() {
2398        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2399        let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
2400        let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2401        assert_eq_m256i(r, e);
2402    }
2403
2404    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2405    unsafe fn test_mm_mask_expand_epi16() {
2406        let src = _mm_set1_epi16(200);
2407        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2408        let r = _mm_mask_expand_epi16(src, 0b01010101, a);
2409        let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7);
2410        assert_eq_m128i(r, e);
2411    }
2412
2413    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2414    unsafe fn test_mm_maskz_expand_epi16() {
2415        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2416        let r = _mm_maskz_expand_epi16(0b01010101, a);
2417        let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
2418        assert_eq_m128i(r, e);
2419    }
2420
2421    #[simd_test(enable = "avx512vbmi2")]
2422    unsafe fn test_mm512_mask_expand_epi8() {
2423        let src = _mm512_set1_epi8(100);
2424        #[rustfmt::skip]
2425        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2426                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2427                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2428                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2429        let r = _mm512_mask_expand_epi8(
2430            src,
2431            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2432            a,
2433        );
2434        #[rustfmt::skip]
2435        let e = _mm512_set_epi8(
2436            100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39,
2437            100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47,
2438            100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55,
2439            100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63,
2440        );
2441        assert_eq_m512i(r, e);
2442    }
2443
2444    #[simd_test(enable = "avx512vbmi2")]
2445    unsafe fn test_mm512_maskz_expand_epi8() {
2446        #[rustfmt::skip]
2447        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2448                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2449                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2450                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2451        let r = _mm512_maskz_expand_epi8(
2452            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2453            a,
2454        );
2455        #[rustfmt::skip]
2456        let e = _mm512_set_epi8(
2457            0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39,
2458            0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
2459            0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55,
2460            0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63,
2461        );
2462        assert_eq_m512i(r, e);
2463    }
2464
2465    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2466    unsafe fn test_mm256_mask_expand_epi8() {
2467        let src = _mm256_set1_epi8(100);
2468        #[rustfmt::skip]
2469        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2470                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2471        let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2472        #[rustfmt::skip]
2473        let e = _mm256_set_epi8(
2474            100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23,
2475            100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31,
2476        );
2477        assert_eq_m256i(r, e);
2478    }
2479
2480    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2481    unsafe fn test_mm256_maskz_expand_epi8() {
2482        #[rustfmt::skip]
2483        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2484                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2485        let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a);
2486        #[rustfmt::skip]
2487        let e = _mm256_set_epi8(
2488            0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2489            0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31,
2490        );
2491        assert_eq_m256i(r, e);
2492    }
2493
2494    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2495    unsafe fn test_mm_mask_expand_epi8() {
2496        let src = _mm_set1_epi8(100);
2497        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2498        let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
2499        let e = _mm_set_epi8(
2500            100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15,
2501        );
2502        assert_eq_m128i(r, e);
2503    }
2504
2505    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2506    unsafe fn test_mm_maskz_expand_epi8() {
2507        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2508        let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
2509        let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2510        assert_eq_m128i(r, e);
2511    }
2512
2513    #[simd_test(enable = "avx512vbmi2")]
2514    unsafe fn test_mm512_shldv_epi64() {
2515        let a = _mm512_set1_epi64(1);
2516        let b = _mm512_set1_epi64(1 << 63);
2517        let c = _mm512_set1_epi64(2);
2518        let r = _mm512_shldv_epi64(a, b, c);
2519        let e = _mm512_set1_epi64(6);
2520        assert_eq_m512i(r, e);
2521    }
2522
2523    #[simd_test(enable = "avx512vbmi2")]
2524    unsafe fn test_mm512_mask_shldv_epi64() {
2525        let a = _mm512_set1_epi64(1);
2526        let b = _mm512_set1_epi64(1 << 63);
2527        let c = _mm512_set1_epi64(2);
2528        let r = _mm512_mask_shldv_epi64(a, 0, b, c);
2529        assert_eq_m512i(r, a);
2530        let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c);
2531        let e = _mm512_set1_epi64(6);
2532        assert_eq_m512i(r, e);
2533    }
2534
2535    #[simd_test(enable = "avx512vbmi2")]
2536    unsafe fn test_mm512_maskz_shldv_epi64() {
2537        let a = _mm512_set1_epi64(1);
2538        let b = _mm512_set1_epi64(1 << 63);
2539        let c = _mm512_set1_epi64(2);
2540        let r = _mm512_maskz_shldv_epi64(0, a, b, c);
2541        assert_eq_m512i(r, _mm512_setzero_si512());
2542        let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c);
2543        let e = _mm512_set1_epi64(6);
2544        assert_eq_m512i(r, e);
2545    }
2546
2547    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2548    unsafe fn test_mm256_shldv_epi64() {
2549        let a = _mm256_set1_epi64x(1);
2550        let b = _mm256_set1_epi64x(1 << 63);
2551        let c = _mm256_set1_epi64x(2);
2552        let r = _mm256_shldv_epi64(a, b, c);
2553        let e = _mm256_set1_epi64x(6);
2554        assert_eq_m256i(r, e);
2555    }
2556
2557    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2558    unsafe fn test_mm256_mask_shldv_epi64() {
2559        let a = _mm256_set1_epi64x(1);
2560        let b = _mm256_set1_epi64x(1 << 63);
2561        let c = _mm256_set1_epi64x(2);
2562        let r = _mm256_mask_shldv_epi64(a, 0, b, c);
2563        assert_eq_m256i(r, a);
2564        let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c);
2565        let e = _mm256_set1_epi64x(6);
2566        assert_eq_m256i(r, e);
2567    }
2568
2569    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2570    unsafe fn test_mm256_maskz_shldv_epi64() {
2571        let a = _mm256_set1_epi64x(1);
2572        let b = _mm256_set1_epi64x(1 << 63);
2573        let c = _mm256_set1_epi64x(2);
2574        let r = _mm256_maskz_shldv_epi64(0, a, b, c);
2575        assert_eq_m256i(r, _mm256_setzero_si256());
2576        let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c);
2577        let e = _mm256_set1_epi64x(6);
2578        assert_eq_m256i(r, e);
2579    }
2580
2581    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2582    unsafe fn test_mm_shldv_epi64() {
2583        let a = _mm_set1_epi64x(1);
2584        let b = _mm_set1_epi64x(1 << 63);
2585        let c = _mm_set1_epi64x(2);
2586        let r = _mm_shldv_epi64(a, b, c);
2587        let e = _mm_set1_epi64x(6);
2588        assert_eq_m128i(r, e);
2589    }
2590
2591    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2592    unsafe fn test_mm_mask_shldv_epi64() {
2593        let a = _mm_set1_epi64x(1);
2594        let b = _mm_set1_epi64x(1 << 63);
2595        let c = _mm_set1_epi64x(2);
2596        let r = _mm_mask_shldv_epi64(a, 0, b, c);
2597        assert_eq_m128i(r, a);
2598        let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c);
2599        let e = _mm_set1_epi64x(6);
2600        assert_eq_m128i(r, e);
2601    }
2602
2603    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2604    unsafe fn test_mm_maskz_shldv_epi64() {
2605        let a = _mm_set1_epi64x(1);
2606        let b = _mm_set1_epi64x(1 << 63);
2607        let c = _mm_set1_epi64x(2);
2608        let r = _mm_maskz_shldv_epi64(0, a, b, c);
2609        assert_eq_m128i(r, _mm_setzero_si128());
2610        let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c);
2611        let e = _mm_set1_epi64x(6);
2612        assert_eq_m128i(r, e);
2613    }
2614
2615    #[simd_test(enable = "avx512vbmi2")]
2616    unsafe fn test_mm512_shldv_epi32() {
2617        let a = _mm512_set1_epi32(1);
2618        let b = _mm512_set1_epi32(1 << 31);
2619        let c = _mm512_set1_epi32(2);
2620        let r = _mm512_shldv_epi32(a, b, c);
2621        let e = _mm512_set1_epi32(6);
2622        assert_eq_m512i(r, e);
2623    }
2624
2625    #[simd_test(enable = "avx512vbmi2")]
2626    unsafe fn test_mm512_mask_shldv_epi32() {
2627        let a = _mm512_set1_epi32(1);
2628        let b = _mm512_set1_epi32(1 << 31);
2629        let c = _mm512_set1_epi32(2);
2630        let r = _mm512_mask_shldv_epi32(a, 0, b, c);
2631        assert_eq_m512i(r, a);
2632        let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c);
2633        let e = _mm512_set1_epi32(6);
2634        assert_eq_m512i(r, e);
2635    }
2636
2637    #[simd_test(enable = "avx512vbmi2")]
2638    unsafe fn test_mm512_maskz_shldv_epi32() {
2639        let a = _mm512_set1_epi32(1);
2640        let b = _mm512_set1_epi32(1 << 31);
2641        let c = _mm512_set1_epi32(2);
2642        let r = _mm512_maskz_shldv_epi32(0, a, b, c);
2643        assert_eq_m512i(r, _mm512_setzero_si512());
2644        let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c);
2645        let e = _mm512_set1_epi32(6);
2646        assert_eq_m512i(r, e);
2647    }
2648
2649    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2650    unsafe fn test_mm256_shldv_epi32() {
2651        let a = _mm256_set1_epi32(1);
2652        let b = _mm256_set1_epi32(1 << 31);
2653        let c = _mm256_set1_epi32(2);
2654        let r = _mm256_shldv_epi32(a, b, c);
2655        let e = _mm256_set1_epi32(6);
2656        assert_eq_m256i(r, e);
2657    }
2658
2659    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2660    unsafe fn test_mm256_mask_shldv_epi32() {
2661        let a = _mm256_set1_epi32(1);
2662        let b = _mm256_set1_epi32(1 << 31);
2663        let c = _mm256_set1_epi32(2);
2664        let r = _mm256_mask_shldv_epi32(a, 0, b, c);
2665        assert_eq_m256i(r, a);
2666        let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c);
2667        let e = _mm256_set1_epi32(6);
2668        assert_eq_m256i(r, e);
2669    }
2670
2671    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2672    unsafe fn test_mm256_maskz_shldv_epi32() {
2673        let a = _mm256_set1_epi32(1);
2674        let b = _mm256_set1_epi32(1 << 31);
2675        let c = _mm256_set1_epi32(2);
2676        let r = _mm256_maskz_shldv_epi32(0, a, b, c);
2677        assert_eq_m256i(r, _mm256_setzero_si256());
2678        let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c);
2679        let e = _mm256_set1_epi32(6);
2680        assert_eq_m256i(r, e);
2681    }
2682
2683    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2684    unsafe fn test_mm_shldv_epi32() {
2685        let a = _mm_set1_epi32(1);
2686        let b = _mm_set1_epi32(1 << 31);
2687        let c = _mm_set1_epi32(2);
2688        let r = _mm_shldv_epi32(a, b, c);
2689        let e = _mm_set1_epi32(6);
2690        assert_eq_m128i(r, e);
2691    }
2692
2693    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2694    unsafe fn test_mm_mask_shldv_epi32() {
2695        let a = _mm_set1_epi32(1);
2696        let b = _mm_set1_epi32(1 << 31);
2697        let c = _mm_set1_epi32(2);
2698        let r = _mm_mask_shldv_epi32(a, 0, b, c);
2699        assert_eq_m128i(r, a);
2700        let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c);
2701        let e = _mm_set1_epi32(6);
2702        assert_eq_m128i(r, e);
2703    }
2704
2705    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2706    unsafe fn test_mm_maskz_shldv_epi32() {
2707        let a = _mm_set1_epi32(1);
2708        let b = _mm_set1_epi32(1 << 31);
2709        let c = _mm_set1_epi32(2);
2710        let r = _mm_maskz_shldv_epi32(0, a, b, c);
2711        assert_eq_m128i(r, _mm_setzero_si128());
2712        let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c);
2713        let e = _mm_set1_epi32(6);
2714        assert_eq_m128i(r, e);
2715    }
2716
2717    #[simd_test(enable = "avx512vbmi2")]
2718    unsafe fn test_mm512_shldv_epi16() {
2719        let a = _mm512_set1_epi16(1);
2720        let b = _mm512_set1_epi16(1 << 15);
2721        let c = _mm512_set1_epi16(2);
2722        let r = _mm512_shldv_epi16(a, b, c);
2723        let e = _mm512_set1_epi16(6);
2724        assert_eq_m512i(r, e);
2725    }
2726
2727    #[simd_test(enable = "avx512vbmi2")]
2728    unsafe fn test_mm512_mask_shldv_epi16() {
2729        let a = _mm512_set1_epi16(1);
2730        let b = _mm512_set1_epi16(1 << 15);
2731        let c = _mm512_set1_epi16(2);
2732        let r = _mm512_mask_shldv_epi16(a, 0, b, c);
2733        assert_eq_m512i(r, a);
2734        let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2735        let e = _mm512_set1_epi16(6);
2736        assert_eq_m512i(r, e);
2737    }
2738
2739    #[simd_test(enable = "avx512vbmi2")]
2740    unsafe fn test_mm512_maskz_shldv_epi16() {
2741        let a = _mm512_set1_epi16(1);
2742        let b = _mm512_set1_epi16(1 << 15);
2743        let c = _mm512_set1_epi16(2);
2744        let r = _mm512_maskz_shldv_epi16(0, a, b, c);
2745        assert_eq_m512i(r, _mm512_setzero_si512());
2746        let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
2747        let e = _mm512_set1_epi16(6);
2748        assert_eq_m512i(r, e);
2749    }
2750
2751    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2752    unsafe fn test_mm256_shldv_epi16() {
2753        let a = _mm256_set1_epi16(1);
2754        let b = _mm256_set1_epi16(1 << 15);
2755        let c = _mm256_set1_epi16(2);
2756        let r = _mm256_shldv_epi16(a, b, c);
2757        let e = _mm256_set1_epi16(6);
2758        assert_eq_m256i(r, e);
2759    }
2760
2761    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2762    unsafe fn test_mm256_mask_shldv_epi16() {
2763        let a = _mm256_set1_epi16(1);
2764        let b = _mm256_set1_epi16(1 << 15);
2765        let c = _mm256_set1_epi16(2);
2766        let r = _mm256_mask_shldv_epi16(a, 0, b, c);
2767        assert_eq_m256i(r, a);
2768        let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c);
2769        let e = _mm256_set1_epi16(6);
2770        assert_eq_m256i(r, e);
2771    }
2772
2773    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2774    unsafe fn test_mm256_maskz_shldv_epi16() {
2775        let a = _mm256_set1_epi16(1);
2776        let b = _mm256_set1_epi16(1 << 15);
2777        let c = _mm256_set1_epi16(2);
2778        let r = _mm256_maskz_shldv_epi16(0, a, b, c);
2779        assert_eq_m256i(r, _mm256_setzero_si256());
2780        let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c);
2781        let e = _mm256_set1_epi16(6);
2782        assert_eq_m256i(r, e);
2783    }
2784
2785    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2786    unsafe fn test_mm_shldv_epi16() {
2787        let a = _mm_set1_epi16(1);
2788        let b = _mm_set1_epi16(1 << 15);
2789        let c = _mm_set1_epi16(2);
2790        let r = _mm_shldv_epi16(a, b, c);
2791        let e = _mm_set1_epi16(6);
2792        assert_eq_m128i(r, e);
2793    }
2794
2795    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2796    unsafe fn test_mm_mask_shldv_epi16() {
2797        let a = _mm_set1_epi16(1);
2798        let b = _mm_set1_epi16(1 << 15);
2799        let c = _mm_set1_epi16(2);
2800        let r = _mm_mask_shldv_epi16(a, 0, b, c);
2801        assert_eq_m128i(r, a);
2802        let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c);
2803        let e = _mm_set1_epi16(6);
2804        assert_eq_m128i(r, e);
2805    }
2806
2807    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2808    unsafe fn test_mm_maskz_shldv_epi16() {
2809        let a = _mm_set1_epi16(1);
2810        let b = _mm_set1_epi16(1 << 15);
2811        let c = _mm_set1_epi16(2);
2812        let r = _mm_maskz_shldv_epi16(0, a, b, c);
2813        assert_eq_m128i(r, _mm_setzero_si128());
2814        let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c);
2815        let e = _mm_set1_epi16(6);
2816        assert_eq_m128i(r, e);
2817    }
2818
2819    #[simd_test(enable = "avx512vbmi2")]
2820    unsafe fn test_mm512_shrdv_epi64() {
2821        let a = _mm512_set1_epi64(2);
2822        let b = _mm512_set1_epi64(8);
2823        let c = _mm512_set1_epi64(1);
2824        let r = _mm512_shrdv_epi64(a, b, c);
2825        let e = _mm512_set1_epi64(1);
2826        assert_eq_m512i(r, e);
2827    }
2828
2829    #[simd_test(enable = "avx512vbmi2")]
2830    unsafe fn test_mm512_mask_shrdv_epi64() {
2831        let a = _mm512_set1_epi64(2);
2832        let b = _mm512_set1_epi64(8);
2833        let c = _mm512_set1_epi64(1);
2834        let r = _mm512_mask_shrdv_epi64(a, 0, b, c);
2835        assert_eq_m512i(r, a);
2836        let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c);
2837        let e = _mm512_set1_epi64(1);
2838        assert_eq_m512i(r, e);
2839    }
2840
2841    #[simd_test(enable = "avx512vbmi2")]
2842    unsafe fn test_mm512_maskz_shrdv_epi64() {
2843        let a = _mm512_set1_epi64(2);
2844        let b = _mm512_set1_epi64(8);
2845        let c = _mm512_set1_epi64(1);
2846        let r = _mm512_maskz_shrdv_epi64(0, a, b, c);
2847        assert_eq_m512i(r, _mm512_setzero_si512());
2848        let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c);
2849        let e = _mm512_set1_epi64(1);
2850        assert_eq_m512i(r, e);
2851    }
2852
2853    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2854    unsafe fn test_mm256_shrdv_epi64() {
2855        let a = _mm256_set1_epi64x(2);
2856        let b = _mm256_set1_epi64x(8);
2857        let c = _mm256_set1_epi64x(1);
2858        let r = _mm256_shrdv_epi64(a, b, c);
2859        let e = _mm256_set1_epi64x(1);
2860        assert_eq_m256i(r, e);
2861    }
2862
2863    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2864    unsafe fn test_mm256_mask_shrdv_epi64() {
2865        let a = _mm256_set1_epi64x(2);
2866        let b = _mm256_set1_epi64x(8);
2867        let c = _mm256_set1_epi64x(1);
2868        let r = _mm256_mask_shrdv_epi64(a, 0, b, c);
2869        assert_eq_m256i(r, a);
2870        let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c);
2871        let e = _mm256_set1_epi64x(1);
2872        assert_eq_m256i(r, e);
2873    }
2874
2875    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2876    unsafe fn test_mm256_maskz_shrdv_epi64() {
2877        let a = _mm256_set1_epi64x(2);
2878        let b = _mm256_set1_epi64x(8);
2879        let c = _mm256_set1_epi64x(1);
2880        let r = _mm256_maskz_shrdv_epi64(0, a, b, c);
2881        assert_eq_m256i(r, _mm256_setzero_si256());
2882        let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c);
2883        let e = _mm256_set1_epi64x(1);
2884        assert_eq_m256i(r, e);
2885    }
2886
2887    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2888    unsafe fn test_mm_shrdv_epi64() {
2889        let a = _mm_set1_epi64x(2);
2890        let b = _mm_set1_epi64x(8);
2891        let c = _mm_set1_epi64x(1);
2892        let r = _mm_shrdv_epi64(a, b, c);
2893        let e = _mm_set1_epi64x(1);
2894        assert_eq_m128i(r, e);
2895    }
2896
2897    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2898    unsafe fn test_mm_mask_shrdv_epi64() {
2899        let a = _mm_set1_epi64x(2);
2900        let b = _mm_set1_epi64x(8);
2901        let c = _mm_set1_epi64x(1);
2902        let r = _mm_mask_shrdv_epi64(a, 0, b, c);
2903        assert_eq_m128i(r, a);
2904        let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c);
2905        let e = _mm_set1_epi64x(1);
2906        assert_eq_m128i(r, e);
2907    }
2908
2909    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2910    unsafe fn test_mm_maskz_shrdv_epi64() {
2911        let a = _mm_set1_epi64x(2);
2912        let b = _mm_set1_epi64x(8);
2913        let c = _mm_set1_epi64x(1);
2914        let r = _mm_maskz_shrdv_epi64(0, a, b, c);
2915        assert_eq_m128i(r, _mm_setzero_si128());
2916        let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c);
2917        let e = _mm_set1_epi64x(1);
2918        assert_eq_m128i(r, e);
2919    }
2920
2921    #[simd_test(enable = "avx512vbmi2")]
2922    unsafe fn test_mm512_shrdv_epi32() {
2923        let a = _mm512_set1_epi32(2);
2924        let b = _mm512_set1_epi32(8);
2925        let c = _mm512_set1_epi32(1);
2926        let r = _mm512_shrdv_epi32(a, b, c);
2927        let e = _mm512_set1_epi32(1);
2928        assert_eq_m512i(r, e);
2929    }
2930
2931    #[simd_test(enable = "avx512vbmi2")]
2932    unsafe fn test_mm512_mask_shrdv_epi32() {
2933        let a = _mm512_set1_epi32(2);
2934        let b = _mm512_set1_epi32(8);
2935        let c = _mm512_set1_epi32(1);
2936        let r = _mm512_mask_shrdv_epi32(a, 0, b, c);
2937        assert_eq_m512i(r, a);
2938        let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c);
2939        let e = _mm512_set1_epi32(1);
2940        assert_eq_m512i(r, e);
2941    }
2942
2943    #[simd_test(enable = "avx512vbmi2")]
2944    unsafe fn test_mm512_maskz_shrdv_epi32() {
2945        let a = _mm512_set1_epi32(2);
2946        let b = _mm512_set1_epi32(8);
2947        let c = _mm512_set1_epi32(1);
2948        let r = _mm512_maskz_shrdv_epi32(0, a, b, c);
2949        assert_eq_m512i(r, _mm512_setzero_si512());
2950        let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c);
2951        let e = _mm512_set1_epi32(1);
2952        assert_eq_m512i(r, e);
2953    }
2954
2955    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2956    unsafe fn test_mm256_shrdv_epi32() {
2957        let a = _mm256_set1_epi32(2);
2958        let b = _mm256_set1_epi32(8);
2959        let c = _mm256_set1_epi32(1);
2960        let r = _mm256_shrdv_epi32(a, b, c);
2961        let e = _mm256_set1_epi32(1);
2962        assert_eq_m256i(r, e);
2963    }
2964
2965    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2966    unsafe fn test_mm256_mask_shrdv_epi32() {
2967        let a = _mm256_set1_epi32(2);
2968        let b = _mm256_set1_epi32(8);
2969        let c = _mm256_set1_epi32(1);
2970        let r = _mm256_mask_shrdv_epi32(a, 0, b, c);
2971        assert_eq_m256i(r, a);
2972        let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c);
2973        let e = _mm256_set1_epi32(1);
2974        assert_eq_m256i(r, e);
2975    }
2976
2977    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2978    unsafe fn test_mm256_maskz_shrdv_epi32() {
2979        let a = _mm256_set1_epi32(2);
2980        let b = _mm256_set1_epi32(8);
2981        let c = _mm256_set1_epi32(1);
2982        let r = _mm256_maskz_shrdv_epi32(0, a, b, c);
2983        assert_eq_m256i(r, _mm256_setzero_si256());
2984        let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c);
2985        let e = _mm256_set1_epi32(1);
2986        assert_eq_m256i(r, e);
2987    }
2988
2989    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2990    unsafe fn test_mm_shrdv_epi32() {
2991        let a = _mm_set1_epi32(2);
2992        let b = _mm_set1_epi32(8);
2993        let c = _mm_set1_epi32(1);
2994        let r = _mm_shrdv_epi32(a, b, c);
2995        let e = _mm_set1_epi32(1);
2996        assert_eq_m128i(r, e);
2997    }
2998
2999    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3000    unsafe fn test_mm_mask_shrdv_epi32() {
3001        let a = _mm_set1_epi32(2);
3002        let b = _mm_set1_epi32(8);
3003        let c = _mm_set1_epi32(1);
3004        let r = _mm_mask_shrdv_epi32(a, 0, b, c);
3005        assert_eq_m128i(r, a);
3006        let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c);
3007        let e = _mm_set1_epi32(1);
3008        assert_eq_m128i(r, e);
3009    }
3010
3011    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3012    unsafe fn test_mm_maskz_shrdv_epi32() {
3013        let a = _mm_set1_epi32(2);
3014        let b = _mm_set1_epi32(8);
3015        let c = _mm_set1_epi32(1);
3016        let r = _mm_maskz_shrdv_epi32(0, a, b, c);
3017        assert_eq_m128i(r, _mm_setzero_si128());
3018        let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c);
3019        let e = _mm_set1_epi32(1);
3020        assert_eq_m128i(r, e);
3021    }
3022
3023    #[simd_test(enable = "avx512vbmi2")]
3024    unsafe fn test_mm512_shrdv_epi16() {
3025        let a = _mm512_set1_epi16(2);
3026        let b = _mm512_set1_epi16(8);
3027        let c = _mm512_set1_epi16(1);
3028        let r = _mm512_shrdv_epi16(a, b, c);
3029        let e = _mm512_set1_epi16(1);
3030        assert_eq_m512i(r, e);
3031    }
3032
3033    #[simd_test(enable = "avx512vbmi2")]
3034    unsafe fn test_mm512_mask_shrdv_epi16() {
3035        let a = _mm512_set1_epi16(2);
3036        let b = _mm512_set1_epi16(8);
3037        let c = _mm512_set1_epi16(1);
3038        let r = _mm512_mask_shrdv_epi16(a, 0, b, c);
3039        assert_eq_m512i(r, a);
3040        let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
3041        let e = _mm512_set1_epi16(1);
3042        assert_eq_m512i(r, e);
3043    }
3044
3045    #[simd_test(enable = "avx512vbmi2")]
3046    unsafe fn test_mm512_maskz_shrdv_epi16() {
3047        let a = _mm512_set1_epi16(2);
3048        let b = _mm512_set1_epi16(8);
3049        let c = _mm512_set1_epi16(1);
3050        let r = _mm512_maskz_shrdv_epi16(0, a, b, c);
3051        assert_eq_m512i(r, _mm512_setzero_si512());
3052        let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
3053        let e = _mm512_set1_epi16(1);
3054        assert_eq_m512i(r, e);
3055    }
3056
3057    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3058    unsafe fn test_mm256_shrdv_epi16() {
3059        let a = _mm256_set1_epi16(2);
3060        let b = _mm256_set1_epi16(8);
3061        let c = _mm256_set1_epi16(1);
3062        let r = _mm256_shrdv_epi16(a, b, c);
3063        let e = _mm256_set1_epi16(1);
3064        assert_eq_m256i(r, e);
3065    }
3066
3067    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3068    unsafe fn test_mm256_mask_shrdv_epi16() {
3069        let a = _mm256_set1_epi16(2);
3070        let b = _mm256_set1_epi16(8);
3071        let c = _mm256_set1_epi16(1);
3072        let r = _mm256_mask_shrdv_epi16(a, 0, b, c);
3073        assert_eq_m256i(r, a);
3074        let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c);
3075        let e = _mm256_set1_epi16(1);
3076        assert_eq_m256i(r, e);
3077    }
3078
3079    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3080    unsafe fn test_mm256_maskz_shrdv_epi16() {
3081        let a = _mm256_set1_epi16(2);
3082        let b = _mm256_set1_epi16(8);
3083        let c = _mm256_set1_epi16(1);
3084        let r = _mm256_maskz_shrdv_epi16(0, a, b, c);
3085        assert_eq_m256i(r, _mm256_setzero_si256());
3086        let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c);
3087        let e = _mm256_set1_epi16(1);
3088        assert_eq_m256i(r, e);
3089    }
3090
3091    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3092    unsafe fn test_mm_shrdv_epi16() {
3093        let a = _mm_set1_epi16(2);
3094        let b = _mm_set1_epi16(8);
3095        let c = _mm_set1_epi16(1);
3096        let r = _mm_shrdv_epi16(a, b, c);
3097        let e = _mm_set1_epi16(1);
3098        assert_eq_m128i(r, e);
3099    }
3100
3101    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3102    unsafe fn test_mm_mask_shrdv_epi16() {
3103        let a = _mm_set1_epi16(2);
3104        let b = _mm_set1_epi16(8);
3105        let c = _mm_set1_epi16(1);
3106        let r = _mm_mask_shrdv_epi16(a, 0, b, c);
3107        assert_eq_m128i(r, a);
3108        let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c);
3109        let e = _mm_set1_epi16(1);
3110        assert_eq_m128i(r, e);
3111    }
3112
3113    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3114    unsafe fn test_mm_maskz_shrdv_epi16() {
3115        let a = _mm_set1_epi16(2);
3116        let b = _mm_set1_epi16(8);
3117        let c = _mm_set1_epi16(1);
3118        let r = _mm_maskz_shrdv_epi16(0, a, b, c);
3119        assert_eq_m128i(r, _mm_setzero_si128());
3120        let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c);
3121        let e = _mm_set1_epi16(1);
3122        assert_eq_m128i(r, e);
3123    }
3124
3125    #[simd_test(enable = "avx512vbmi2")]
3126    unsafe fn test_mm512_shldi_epi64() {
3127        let a = _mm512_set1_epi64(1);
3128        let b = _mm512_set1_epi64(1 << 63);
3129        let r = _mm512_shldi_epi64::<2>(a, b);
3130        let e = _mm512_set1_epi64(6);
3131        assert_eq_m512i(r, e);
3132    }
3133
3134    #[simd_test(enable = "avx512vbmi2")]
3135    unsafe fn test_mm512_mask_shldi_epi64() {
3136        let a = _mm512_set1_epi64(1);
3137        let b = _mm512_set1_epi64(1 << 63);
3138        let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b);
3139        assert_eq_m512i(r, a);
3140        let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b);
3141        let e = _mm512_set1_epi64(6);
3142        assert_eq_m512i(r, e);
3143    }
3144
3145    #[simd_test(enable = "avx512vbmi2")]
3146    unsafe fn test_mm512_maskz_shldi_epi64() {
3147        let a = _mm512_set1_epi64(1);
3148        let b = _mm512_set1_epi64(1 << 63);
3149        let r = _mm512_maskz_shldi_epi64::<2>(0, a, b);
3150        assert_eq_m512i(r, _mm512_setzero_si512());
3151        let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b);
3152        let e = _mm512_set1_epi64(6);
3153        assert_eq_m512i(r, e);
3154    }
3155
3156    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3157    unsafe fn test_mm256_shldi_epi64() {
3158        let a = _mm256_set1_epi64x(1);
3159        let b = _mm256_set1_epi64x(1 << 63);
3160        let r = _mm256_shldi_epi64::<2>(a, b);
3161        let e = _mm256_set1_epi64x(6);
3162        assert_eq_m256i(r, e);
3163    }
3164
3165    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3166    unsafe fn test_mm256_mask_shldi_epi64() {
3167        let a = _mm256_set1_epi64x(1);
3168        let b = _mm256_set1_epi64x(1 << 63);
3169        let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b);
3170        assert_eq_m256i(r, a);
3171        let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b);
3172        let e = _mm256_set1_epi64x(6);
3173        assert_eq_m256i(r, e);
3174    }
3175
3176    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3177    unsafe fn test_mm256_maskz_shldi_epi64() {
3178        let a = _mm256_set1_epi64x(1);
3179        let b = _mm256_set1_epi64x(1 << 63);
3180        let r = _mm256_maskz_shldi_epi64::<2>(0, a, b);
3181        assert_eq_m256i(r, _mm256_setzero_si256());
3182        let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b);
3183        let e = _mm256_set1_epi64x(6);
3184        assert_eq_m256i(r, e);
3185    }
3186
3187    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3188    unsafe fn test_mm_shldi_epi64() {
3189        let a = _mm_set1_epi64x(1);
3190        let b = _mm_set1_epi64x(1 << 63);
3191        let r = _mm_shldi_epi64::<2>(a, b);
3192        let e = _mm_set1_epi64x(6);
3193        assert_eq_m128i(r, e);
3194    }
3195
3196    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3197    unsafe fn test_mm_mask_shldi_epi64() {
3198        let a = _mm_set1_epi64x(1);
3199        let b = _mm_set1_epi64x(1 << 63);
3200        let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b);
3201        assert_eq_m128i(r, a);
3202        let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b);
3203        let e = _mm_set1_epi64x(6);
3204        assert_eq_m128i(r, e);
3205    }
3206
3207    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3208    unsafe fn test_mm_maskz_shldi_epi64() {
3209        let a = _mm_set1_epi64x(1);
3210        let b = _mm_set1_epi64x(1 << 63);
3211        let r = _mm_maskz_shldi_epi64::<2>(0, a, b);
3212        assert_eq_m128i(r, _mm_setzero_si128());
3213        let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b);
3214        let e = _mm_set1_epi64x(6);
3215        assert_eq_m128i(r, e);
3216    }
3217
3218    #[simd_test(enable = "avx512vbmi2")]
3219    unsafe fn test_mm512_shldi_epi32() {
3220        let a = _mm512_set1_epi32(1);
3221        let b = _mm512_set1_epi32(1 << 31);
3222        let r = _mm512_shldi_epi32::<2>(a, b);
3223        let e = _mm512_set1_epi32(6);
3224        assert_eq_m512i(r, e);
3225    }
3226
3227    #[simd_test(enable = "avx512vbmi2")]
3228    unsafe fn test_mm512_mask_shldi_epi32() {
3229        let a = _mm512_set1_epi32(1);
3230        let b = _mm512_set1_epi32(1 << 31);
3231        let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b);
3232        assert_eq_m512i(r, a);
3233        let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b);
3234        let e = _mm512_set1_epi32(6);
3235        assert_eq_m512i(r, e);
3236    }
3237
3238    #[simd_test(enable = "avx512vbmi2")]
3239    unsafe fn test_mm512_maskz_shldi_epi32() {
3240        let a = _mm512_set1_epi32(1);
3241        let b = _mm512_set1_epi32(1 << 31);
3242        let r = _mm512_maskz_shldi_epi32::<2>(0, a, b);
3243        assert_eq_m512i(r, _mm512_setzero_si512());
3244        let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b);
3245        let e = _mm512_set1_epi32(6);
3246        assert_eq_m512i(r, e);
3247    }
3248
3249    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3250    unsafe fn test_mm256_shldi_epi32() {
3251        let a = _mm256_set1_epi32(1);
3252        let b = _mm256_set1_epi32(1 << 31);
3253        let r = _mm256_shldi_epi32::<2>(a, b);
3254        let e = _mm256_set1_epi32(6);
3255        assert_eq_m256i(r, e);
3256    }
3257
3258    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3259    unsafe fn test_mm256_mask_shldi_epi32() {
3260        let a = _mm256_set1_epi32(1);
3261        let b = _mm256_set1_epi32(1 << 31);
3262        let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b);
3263        assert_eq_m256i(r, a);
3264        let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b);
3265        let e = _mm256_set1_epi32(6);
3266        assert_eq_m256i(r, e);
3267    }
3268
3269    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3270    unsafe fn test_mm256_maskz_shldi_epi32() {
3271        let a = _mm256_set1_epi32(1);
3272        let b = _mm256_set1_epi32(1 << 31);
3273        let r = _mm256_maskz_shldi_epi32::<2>(0, a, b);
3274        assert_eq_m256i(r, _mm256_setzero_si256());
3275        let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b);
3276        let e = _mm256_set1_epi32(6);
3277        assert_eq_m256i(r, e);
3278    }
3279
3280    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3281    unsafe fn test_mm_shldi_epi32() {
3282        let a = _mm_set1_epi32(1);
3283        let b = _mm_set1_epi32(1 << 31);
3284        let r = _mm_shldi_epi32::<2>(a, b);
3285        let e = _mm_set1_epi32(6);
3286        assert_eq_m128i(r, e);
3287    }
3288
3289    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3290    unsafe fn test_mm_mask_shldi_epi32() {
3291        let a = _mm_set1_epi32(1);
3292        let b = _mm_set1_epi32(1 << 31);
3293        let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b);
3294        assert_eq_m128i(r, a);
3295        let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b);
3296        let e = _mm_set1_epi32(6);
3297        assert_eq_m128i(r, e);
3298    }
3299
3300    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3301    unsafe fn test_mm_maskz_shldi_epi32() {
3302        let a = _mm_set1_epi32(1);
3303        let b = _mm_set1_epi32(1 << 31);
3304        let r = _mm_maskz_shldi_epi32::<2>(0, a, b);
3305        assert_eq_m128i(r, _mm_setzero_si128());
3306        let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b);
3307        let e = _mm_set1_epi32(6);
3308        assert_eq_m128i(r, e);
3309    }
3310
3311    #[simd_test(enable = "avx512vbmi2")]
3312    unsafe fn test_mm512_shldi_epi16() {
3313        let a = _mm512_set1_epi16(1);
3314        let b = _mm512_set1_epi16(1 << 15);
3315        let r = _mm512_shldi_epi16::<2>(a, b);
3316        let e = _mm512_set1_epi16(6);
3317        assert_eq_m512i(r, e);
3318    }
3319
3320    #[simd_test(enable = "avx512vbmi2")]
3321    unsafe fn test_mm512_mask_shldi_epi16() {
3322        let a = _mm512_set1_epi16(1);
3323        let b = _mm512_set1_epi16(1 << 15);
3324        let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b);
3325        assert_eq_m512i(r, a);
3326        let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b);
3327        let e = _mm512_set1_epi16(6);
3328        assert_eq_m512i(r, e);
3329    }
3330
3331    #[simd_test(enable = "avx512vbmi2")]
3332    unsafe fn test_mm512_maskz_shldi_epi16() {
3333        let a = _mm512_set1_epi16(1);
3334        let b = _mm512_set1_epi16(1 << 15);
3335        let r = _mm512_maskz_shldi_epi16::<2>(0, a, b);
3336        assert_eq_m512i(r, _mm512_setzero_si512());
3337        let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b);
3338        let e = _mm512_set1_epi16(6);
3339        assert_eq_m512i(r, e);
3340    }
3341
3342    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3343    unsafe fn test_mm256_shldi_epi16() {
3344        let a = _mm256_set1_epi16(1);
3345        let b = _mm256_set1_epi16(1 << 15);
3346        let r = _mm256_shldi_epi16::<2>(a, b);
3347        let e = _mm256_set1_epi16(6);
3348        assert_eq_m256i(r, e);
3349    }
3350
3351    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3352    unsafe fn test_mm256_mask_shldi_epi16() {
3353        let a = _mm256_set1_epi16(1);
3354        let b = _mm256_set1_epi16(1 << 15);
3355        let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b);
3356        assert_eq_m256i(r, a);
3357        let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b);
3358        let e = _mm256_set1_epi16(6);
3359        assert_eq_m256i(r, e);
3360    }
3361
3362    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3363    unsafe fn test_mm256_maskz_shldi_epi16() {
3364        let a = _mm256_set1_epi16(1);
3365        let b = _mm256_set1_epi16(1 << 15);
3366        let r = _mm256_maskz_shldi_epi16::<2>(0, a, b);
3367        assert_eq_m256i(r, _mm256_setzero_si256());
3368        let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b);
3369        let e = _mm256_set1_epi16(6);
3370        assert_eq_m256i(r, e);
3371    }
3372
3373    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3374    unsafe fn test_mm_shldi_epi16() {
3375        let a = _mm_set1_epi16(1);
3376        let b = _mm_set1_epi16(1 << 15);
3377        let r = _mm_shldi_epi16::<2>(a, b);
3378        let e = _mm_set1_epi16(6);
3379        assert_eq_m128i(r, e);
3380    }
3381
3382    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3383    unsafe fn test_mm_mask_shldi_epi16() {
3384        let a = _mm_set1_epi16(1);
3385        let b = _mm_set1_epi16(1 << 15);
3386        let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b);
3387        assert_eq_m128i(r, a);
3388        let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b);
3389        let e = _mm_set1_epi16(6);
3390        assert_eq_m128i(r, e);
3391    }
3392
3393    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3394    unsafe fn test_mm_maskz_shldi_epi16() {
3395        let a = _mm_set1_epi16(1);
3396        let b = _mm_set1_epi16(1 << 15);
3397        let r = _mm_maskz_shldi_epi16::<2>(0, a, b);
3398        assert_eq_m128i(r, _mm_setzero_si128());
3399        let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b);
3400        let e = _mm_set1_epi16(6);
3401        assert_eq_m128i(r, e);
3402    }
3403
3404    #[simd_test(enable = "avx512vbmi2")]
3405    unsafe fn test_mm512_shrdi_epi64() {
3406        let a = _mm512_set1_epi64(2);
3407        let b = _mm512_set1_epi64(8);
3408        let r = _mm512_shrdi_epi64::<1>(a, b);
3409        let e = _mm512_set1_epi64(1);
3410        assert_eq_m512i(r, e);
3411    }
3412
3413    #[simd_test(enable = "avx512vbmi2")]
3414    unsafe fn test_mm512_mask_shrdi_epi64() {
3415        let a = _mm512_set1_epi64(2);
3416        let b = _mm512_set1_epi64(8);
3417        let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b);
3418        assert_eq_m512i(r, a);
3419        let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b);
3420        let e = _mm512_set1_epi64(1);
3421        assert_eq_m512i(r, e);
3422    }
3423
3424    #[simd_test(enable = "avx512vbmi2")]
3425    unsafe fn test_mm512_maskz_shrdi_epi64() {
3426        let a = _mm512_set1_epi64(2);
3427        let b = _mm512_set1_epi64(8);
3428        let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b);
3429        assert_eq_m512i(r, _mm512_setzero_si512());
3430        let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b);
3431        let e = _mm512_set1_epi64(1);
3432        assert_eq_m512i(r, e);
3433    }
3434
3435    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3436    unsafe fn test_mm256_shrdi_epi64() {
3437        let a = _mm256_set1_epi64x(2);
3438        let b = _mm256_set1_epi64x(8);
3439        let r = _mm256_shrdi_epi64::<1>(a, b);
3440        let e = _mm256_set1_epi64x(1);
3441        assert_eq_m256i(r, e);
3442    }
3443
3444    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3445    unsafe fn test_mm256_mask_shrdi_epi64() {
3446        let a = _mm256_set1_epi64x(2);
3447        let b = _mm256_set1_epi64x(8);
3448        let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b);
3449        assert_eq_m256i(r, a);
3450        let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b);
3451        let e = _mm256_set1_epi64x(1);
3452        assert_eq_m256i(r, e);
3453    }
3454
3455    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3456    unsafe fn test_mm256_maskz_shrdi_epi64() {
3457        let a = _mm256_set1_epi64x(2);
3458        let b = _mm256_set1_epi64x(8);
3459        let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b);
3460        assert_eq_m256i(r, _mm256_setzero_si256());
3461        let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b);
3462        let e = _mm256_set1_epi64x(1);
3463        assert_eq_m256i(r, e);
3464    }
3465
3466    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3467    unsafe fn test_mm_shrdi_epi64() {
3468        let a = _mm_set1_epi64x(2);
3469        let b = _mm_set1_epi64x(8);
3470        let r = _mm_shrdi_epi64::<1>(a, b);
3471        let e = _mm_set1_epi64x(1);
3472        assert_eq_m128i(r, e);
3473    }
3474
3475    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3476    unsafe fn test_mm_mask_shrdi_epi64() {
3477        let a = _mm_set1_epi64x(2);
3478        let b = _mm_set1_epi64x(8);
3479        let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b);
3480        assert_eq_m128i(r, a);
3481        let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b);
3482        let e = _mm_set1_epi64x(1);
3483        assert_eq_m128i(r, e);
3484    }
3485
3486    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3487    unsafe fn test_mm_maskz_shrdi_epi64() {
3488        let a = _mm_set1_epi64x(2);
3489        let b = _mm_set1_epi64x(8);
3490        let r = _mm_maskz_shrdi_epi64::<1>(0, a, b);
3491        assert_eq_m128i(r, _mm_setzero_si128());
3492        let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b);
3493        let e = _mm_set1_epi64x(1);
3494        assert_eq_m128i(r, e);
3495    }
3496
3497    #[simd_test(enable = "avx512vbmi2")]
3498    unsafe fn test_mm512_shrdi_epi32() {
3499        let a = _mm512_set1_epi32(2);
3500        let b = _mm512_set1_epi32(8);
3501        let r = _mm512_shrdi_epi32::<1>(a, b);
3502        let e = _mm512_set1_epi32(1);
3503        assert_eq_m512i(r, e);
3504    }
3505
3506    #[simd_test(enable = "avx512vbmi2")]
3507    unsafe fn test_mm512_mask_shrdi_epi32() {
3508        let a = _mm512_set1_epi32(2);
3509        let b = _mm512_set1_epi32(8);
3510        let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b);
3511        assert_eq_m512i(r, a);
3512        let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b);
3513        let e = _mm512_set1_epi32(1);
3514        assert_eq_m512i(r, e);
3515    }
3516
3517    #[simd_test(enable = "avx512vbmi2")]
3518    unsafe fn test_mm512_maskz_shrdi_epi32() {
3519        let a = _mm512_set1_epi32(2);
3520        let b = _mm512_set1_epi32(8);
3521        let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b);
3522        assert_eq_m512i(r, _mm512_setzero_si512());
3523        let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b);
3524        let e = _mm512_set1_epi32(1);
3525        assert_eq_m512i(r, e);
3526    }
3527
3528    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3529    unsafe fn test_mm256_shrdi_epi32() {
3530        let a = _mm256_set1_epi32(2);
3531        let b = _mm256_set1_epi32(8);
3532        let r = _mm256_shrdi_epi32::<1>(a, b);
3533        let e = _mm256_set1_epi32(1);
3534        assert_eq_m256i(r, e);
3535    }
3536
3537    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3538    unsafe fn test_mm256_mask_shrdi_epi32() {
3539        let a = _mm256_set1_epi32(2);
3540        let b = _mm256_set1_epi32(8);
3541        let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b);
3542        assert_eq_m256i(r, a);
3543        let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b);
3544        let e = _mm256_set1_epi32(1);
3545        assert_eq_m256i(r, e);
3546    }
3547
3548    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3549    unsafe fn test_mm256_maskz_shrdi_epi32() {
3550        let a = _mm256_set1_epi32(2);
3551        let b = _mm256_set1_epi32(8);
3552        let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b);
3553        assert_eq_m256i(r, _mm256_setzero_si256());
3554        let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b);
3555        let e = _mm256_set1_epi32(1);
3556        assert_eq_m256i(r, e);
3557    }
3558
3559    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3560    unsafe fn test_mm_shrdi_epi32() {
3561        let a = _mm_set1_epi32(2);
3562        let b = _mm_set1_epi32(8);
3563        let r = _mm_shrdi_epi32::<1>(a, b);
3564        let e = _mm_set1_epi32(1);
3565        assert_eq_m128i(r, e);
3566    }
3567
3568    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3569    unsafe fn test_mm_mask_shrdi_epi32() {
3570        let a = _mm_set1_epi32(2);
3571        let b = _mm_set1_epi32(8);
3572        let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b);
3573        assert_eq_m128i(r, a);
3574        let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b);
3575        let e = _mm_set1_epi32(1);
3576        assert_eq_m128i(r, e);
3577    }
3578
3579    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3580    unsafe fn test_mm_maskz_shrdi_epi32() {
3581        let a = _mm_set1_epi32(2);
3582        let b = _mm_set1_epi32(8);
3583        let r = _mm_maskz_shrdi_epi32::<1>(0, a, b);
3584        assert_eq_m128i(r, _mm_setzero_si128());
3585        let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b);
3586        let e = _mm_set1_epi32(1);
3587        assert_eq_m128i(r, e);
3588    }
3589
3590    #[simd_test(enable = "avx512vbmi2")]
3591    unsafe fn test_mm512_shrdi_epi16() {
3592        let a = _mm512_set1_epi16(2);
3593        let b = _mm512_set1_epi16(8);
3594        let r = _mm512_shrdi_epi16::<1>(a, b);
3595        let e = _mm512_set1_epi16(1);
3596        assert_eq_m512i(r, e);
3597    }
3598
3599    #[simd_test(enable = "avx512vbmi2")]
3600    unsafe fn test_mm512_mask_shrdi_epi16() {
3601        let a = _mm512_set1_epi16(2);
3602        let b = _mm512_set1_epi16(8);
3603        let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b);
3604        assert_eq_m512i(r, a);
3605        let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b);
3606        let e = _mm512_set1_epi16(1);
3607        assert_eq_m512i(r, e);
3608    }
3609
3610    #[simd_test(enable = "avx512vbmi2")]
3611    unsafe fn test_mm512_maskz_shrdi_epi16() {
3612        let a = _mm512_set1_epi16(2);
3613        let b = _mm512_set1_epi16(8);
3614        let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b);
3615        assert_eq_m512i(r, _mm512_setzero_si512());
3616        let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b);
3617        let e = _mm512_set1_epi16(1);
3618        assert_eq_m512i(r, e);
3619    }
3620
3621    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3622    unsafe fn test_mm256_shrdi_epi16() {
3623        let a = _mm256_set1_epi16(2);
3624        let b = _mm256_set1_epi16(8);
3625        let r = _mm256_shrdi_epi16::<1>(a, b);
3626        let e = _mm256_set1_epi16(1);
3627        assert_eq_m256i(r, e);
3628    }
3629
3630    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3631    unsafe fn test_mm256_mask_shrdi_epi16() {
3632        let a = _mm256_set1_epi16(2);
3633        let b = _mm256_set1_epi16(8);
3634        let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b);
3635        assert_eq_m256i(r, a);
3636        let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b);
3637        let e = _mm256_set1_epi16(1);
3638        assert_eq_m256i(r, e);
3639    }
3640
3641    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3642    unsafe fn test_mm256_maskz_shrdi_epi16() {
3643        let a = _mm256_set1_epi16(2);
3644        let b = _mm256_set1_epi16(8);
3645        let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b);
3646        assert_eq_m256i(r, _mm256_setzero_si256());
3647        let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b);
3648        let e = _mm256_set1_epi16(1);
3649        assert_eq_m256i(r, e);
3650    }
3651
3652    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3653    unsafe fn test_mm_shrdi_epi16() {
3654        let a = _mm_set1_epi16(2);
3655        let b = _mm_set1_epi16(8);
3656        let r = _mm_shrdi_epi16::<1>(a, b);
3657        let e = _mm_set1_epi16(1);
3658        assert_eq_m128i(r, e);
3659    }
3660
3661    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3662    unsafe fn test_mm_mask_shrdi_epi16() {
3663        let a = _mm_set1_epi16(2);
3664        let b = _mm_set1_epi16(8);
3665        let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b);
3666        assert_eq_m128i(r, a);
3667        let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b);
3668        let e = _mm_set1_epi16(1);
3669        assert_eq_m128i(r, e);
3670    }
3671
3672    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3673    unsafe fn test_mm_maskz_shrdi_epi16() {
3674        let a = _mm_set1_epi16(2);
3675        let b = _mm_set1_epi16(8);
3676        let r = _mm_maskz_shrdi_epi16::<1>(0, a, b);
3677        assert_eq_m128i(r, _mm_setzero_si128());
3678        let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b);
3679        let e = _mm_set1_epi16(1);
3680        assert_eq_m128i(r, e);
3681    }
3682
3683    #[simd_test(enable = "avx512vbmi2")]
3684    unsafe fn test_mm512_mask_expandloadu_epi16() {
3685        let src = _mm512_set1_epi16(42);
3686        let a = &[
3687            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3688            24, 25, 26, 27, 28, 29, 30, 31, 32,
3689        ];
3690        let p = a.as_ptr();
3691        let m = 0b11101000_11001010_11110000_00001111;
3692        let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
3693        let e = _mm512_set_epi16(
3694            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3695            42, 42, 42, 42, 42, 4, 3, 2, 1,
3696        );
3697        assert_eq_m512i(r, e);
3698    }
3699
3700    #[simd_test(enable = "avx512vbmi2")]
3701    unsafe fn test_mm512_maskz_expandloadu_epi16() {
3702        let a = &[
3703            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3704            24, 25, 26, 27, 28, 29, 30, 31, 32,
3705        ];
3706        let p = a.as_ptr();
3707        let m = 0b11101000_11001010_11110000_00001111;
3708        let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
3709        let e = _mm512_set_epi16(
3710            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3711            0, 4, 3, 2, 1,
3712        );
3713        assert_eq_m512i(r, e);
3714    }
3715
3716    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3717    unsafe fn test_mm256_mask_expandloadu_epi16() {
3718        let src = _mm256_set1_epi16(42);
3719        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3720        let p = a.as_ptr();
3721        let m = 0b11101000_11001010;
3722        let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
3723        let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3724        assert_eq_m256i(r, e);
3725    }
3726
3727    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3728    unsafe fn test_mm256_maskz_expandloadu_epi16() {
3729        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3730        let p = a.as_ptr();
3731        let m = 0b11101000_11001010;
3732        let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
3733        let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3734        assert_eq_m256i(r, e);
3735    }
3736
3737    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3738    unsafe fn test_mm_mask_expandloadu_epi16() {
3739        let src = _mm_set1_epi16(42);
3740        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3741        let p = a.as_ptr();
3742        let m = 0b11101000;
3743        let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
3744        let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
3745        assert_eq_m128i(r, e);
3746    }
3747
3748    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3749    unsafe fn test_mm_maskz_expandloadu_epi16() {
3750        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3751        let p = a.as_ptr();
3752        let m = 0b11101000;
3753        let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
3754        let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
3755        assert_eq_m128i(r, e);
3756    }
3757
3758    #[simd_test(enable = "avx512vbmi2")]
3759    unsafe fn test_mm512_mask_expandloadu_epi8() {
3760        let src = _mm512_set1_epi8(42);
3761        let a = &[
3762            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3763            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3764            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3765        ];
3766        let p = a.as_ptr();
3767        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3768        let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
3769        let e = _mm512_set_epi8(
3770            32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
3771            42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
3772            42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1,
3773        );
3774        assert_eq_m512i(r, e);
3775    }
3776
3777    #[simd_test(enable = "avx512vbmi2")]
3778    unsafe fn test_mm512_maskz_expandloadu_epi8() {
3779        let a = &[
3780            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3781            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3782            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3783        ];
3784        let p = a.as_ptr();
3785        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3786        let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
3787        let e = _mm512_set_epi8(
3788            32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
3789            0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
3790            7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1,
3791        );
3792        assert_eq_m512i(r, e);
3793    }
3794
3795    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3796    unsafe fn test_mm256_mask_expandloadu_epi8() {
3797        let src = _mm256_set1_epi8(42);
3798        let a = &[
3799            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3800            24, 25, 26, 27, 28, 29, 30, 31, 32,
3801        ];
3802        let p = a.as_ptr();
3803        let m = 0b11101000_11001010_11110000_00001111;
3804        let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
3805        let e = _mm256_set_epi8(
3806            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3807            42, 42, 42, 42, 42, 4, 3, 2, 1,
3808        );
3809        assert_eq_m256i(r, e);
3810    }
3811
3812    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3813    unsafe fn test_mm256_maskz_expandloadu_epi8() {
3814        let a = &[
3815            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3816            24, 25, 26, 27, 28, 29, 30, 31, 32,
3817        ];
3818        let p = a.as_ptr();
3819        let m = 0b11101000_11001010_11110000_00001111;
3820        let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
3821        let e = _mm256_set_epi8(
3822            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3823            0, 4, 3, 2, 1,
3824        );
3825        assert_eq_m256i(r, e);
3826    }
3827
3828    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3829    unsafe fn test_mm_mask_expandloadu_epi8() {
3830        let src = _mm_set1_epi8(42);
3831        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3832        let p = a.as_ptr();
3833        let m = 0b11101000_11001010;
3834        let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
3835        let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3836        assert_eq_m128i(r, e);
3837    }
3838
3839    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3840    unsafe fn test_mm_maskz_expandloadu_epi8() {
3841        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3842        let p = a.as_ptr();
3843        let m = 0b11101000_11001010;
3844        let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
3845        let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3846        assert_eq_m128i(r, e);
3847    }
3848
3849    #[simd_test(enable = "avx512vbmi2")]
3850    unsafe fn test_mm512_mask_compressstoreu_epi16() {
3851        let a = _mm512_set_epi16(
3852            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3853            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3854        );
3855        let mut r = [0_i16; 32];
3856        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
3857        assert_eq!(&r, &[0_i16; 32]);
3858        _mm512_mask_compressstoreu_epi16(
3859            r.as_mut_ptr() as *mut _,
3860            0b11110000_11001010_11111111_00000000,
3861            a,
3862        );
3863        assert_eq!(
3864            &r,
3865            &[
3866                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3867                0, 0, 0, 0, 0, 0, 0, 0, 0
3868            ]
3869        );
3870    }
3871
3872    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3873    unsafe fn test_mm256_mask_compressstoreu_epi16() {
3874        let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
3875        let mut r = [0_i16; 16];
3876        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
3877        assert_eq!(&r, &[0_i16; 16]);
3878        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a);
3879        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
3880    }
3881
3882    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3883    unsafe fn test_mm_mask_compressstoreu_epi16() {
3884        let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
3885        let mut r = [0_i16; 8];
3886        _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
3887        assert_eq!(&r, &[0_i16; 8]);
3888        _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000, a);
3889        assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
3890    }
3891
3892    #[simd_test(enable = "avx512vbmi2")]
3893    unsafe fn test_mm512_mask_compressstoreu_epi8() {
3894        let a = _mm512_set_epi8(
3895            64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
3896            42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
3897            20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3898        );
3899        let mut r = [0_i8; 64];
3900        _mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
3901        assert_eq!(&r, &[0_i8; 64]);
3902        _mm512_mask_compressstoreu_epi8(
3903            r.as_mut_ptr() as *mut _,
3904            0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
3905            a,
3906        );
3907        assert_eq!(
3908            &r,
3909            &[
3910                1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46,
3911                47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3912                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3913            ]
3914        );
3915    }
3916
3917    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3918    unsafe fn test_mm256_mask_compressstoreu_epi8() {
3919        let a = _mm256_set_epi8(
3920            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3921            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3922        );
3923        let mut r = [0_i8; 32];
3924        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
3925        assert_eq!(&r, &[0_i8; 32]);
3926        _mm256_mask_compressstoreu_epi8(
3927            r.as_mut_ptr() as *mut _,
3928            0b11110000_11001010_11111111_00000000,
3929            a,
3930        );
3931        assert_eq!(
3932            &r,
3933            &[
3934                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3935                0, 0, 0, 0, 0, 0, 0, 0, 0
3936            ]
3937        );
3938    }
3939
3940    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3941    unsafe fn test_mm_mask_compressstoreu_epi8() {
3942        let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
3943        let mut r = [0_i8; 16];
3944        _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
3945        assert_eq!(&r, &[0_i8; 16]);
3946        _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a);
3947        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
3948    }
3949}