core/stdarch/crates/core_arch/src/x86/
avx512vbmi2.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4};
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10///
11/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
12#[inline]
13#[target_feature(enable = "avx512vbmi2")]
14#[cfg_attr(test, assert_instr(vpexpandw))]
15#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16pub unsafe fn _mm512_mask_expandloadu_epi16(
17    src: __m512i,
18    k: __mmask32,
19    mem_addr: *const i16,
20) -> __m512i {
21    transmute(expandloadw_512(mem_addr, src.as_i16x32(), k))
22}
23
24/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25///
26/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
27#[inline]
28#[target_feature(enable = "avx512vbmi2")]
29#[cfg_attr(test, assert_instr(vpexpandw))]
30#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
32    _mm512_mask_expandloadu_epi16(_mm512_setzero_si512(), k, mem_addr)
33}
34
35/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
38#[inline]
39#[target_feature(enable = "avx512vbmi2,avx512vl")]
40#[cfg_attr(test, assert_instr(vpexpandw))]
41#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42pub unsafe fn _mm256_mask_expandloadu_epi16(
43    src: __m256i,
44    k: __mmask16,
45    mem_addr: *const i16,
46) -> __m256i {
47    transmute(expandloadw_256(mem_addr, src.as_i16x16(), k))
48}
49
50/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
53#[inline]
54#[target_feature(enable = "avx512vbmi2,avx512vl")]
55#[cfg_attr(test, assert_instr(vpexpandw))]
56#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
57pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
58    _mm256_mask_expandloadu_epi16(_mm256_setzero_si256(), k, mem_addr)
59}
60
61/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
64#[inline]
65#[target_feature(enable = "avx512vbmi2,avx512vl")]
66#[cfg_attr(test, assert_instr(vpexpandw))]
67#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
68pub unsafe fn _mm_mask_expandloadu_epi16(
69    src: __m128i,
70    k: __mmask8,
71    mem_addr: *const i16,
72) -> __m128i {
73    transmute(expandloadw_128(mem_addr, src.as_i16x8(), k))
74}
75
76/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
77///
78/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
79#[inline]
80#[target_feature(enable = "avx512vbmi2,avx512vl")]
81#[cfg_attr(test, assert_instr(vpexpandw))]
82#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
83pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
84    _mm_mask_expandloadu_epi16(_mm_setzero_si128(), k, mem_addr)
85}
86
87/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
90#[inline]
91#[target_feature(enable = "avx512vbmi2")]
92#[cfg_attr(test, assert_instr(vpexpandb))]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94pub unsafe fn _mm512_mask_expandloadu_epi8(
95    src: __m512i,
96    k: __mmask64,
97    mem_addr: *const i8,
98) -> __m512i {
99    transmute(expandloadb_512(mem_addr, src.as_i8x64(), k))
100}
101
102/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
105#[inline]
106#[target_feature(enable = "avx512vbmi2")]
107#[cfg_attr(test, assert_instr(vpexpandb))]
108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
109pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
110    _mm512_mask_expandloadu_epi8(_mm512_setzero_si512(), k, mem_addr)
111}
112
113/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
116#[inline]
117#[target_feature(enable = "avx512vbmi2,avx512vl")]
118#[cfg_attr(test, assert_instr(vpexpandb))]
119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
120pub unsafe fn _mm256_mask_expandloadu_epi8(
121    src: __m256i,
122    k: __mmask32,
123    mem_addr: *const i8,
124) -> __m256i {
125    transmute(expandloadb_256(mem_addr, src.as_i8x32(), k))
126}
127
128/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
129///
130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
131#[inline]
132#[target_feature(enable = "avx512vbmi2,avx512vl")]
133#[cfg_attr(test, assert_instr(vpexpandb))]
134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
135pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
136    _mm256_mask_expandloadu_epi8(_mm256_setzero_si256(), k, mem_addr)
137}
138
139/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
142#[inline]
143#[target_feature(enable = "avx512vbmi2,avx512vl")]
144#[cfg_attr(test, assert_instr(vpexpandb))]
145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
146pub unsafe fn _mm_mask_expandloadu_epi8(
147    src: __m128i,
148    k: __mmask16,
149    mem_addr: *const i8,
150) -> __m128i {
151    transmute(expandloadb_128(mem_addr, src.as_i8x16(), k))
152}
153
154/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
157#[inline]
158#[target_feature(enable = "avx512vbmi2,avx512vl")]
159#[cfg_attr(test, assert_instr(vpexpandb))]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
162    _mm_mask_expandloadu_epi8(_mm_setzero_si128(), k, mem_addr)
163}
164
165/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
166///
167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
168#[inline]
169#[target_feature(enable = "avx512vbmi2")]
170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
171#[cfg_attr(test, assert_instr(vpcompressw))]
172pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) {
173    vcompressstorew(base_addr as *mut _, a.as_i16x32(), k)
174}
175
176/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
179#[inline]
180#[target_feature(enable = "avx512vbmi2,avx512vl")]
181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
182#[cfg_attr(test, assert_instr(vpcompressw))]
183pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) {
184    vcompressstorew256(base_addr as *mut _, a.as_i16x16(), k)
185}
186
187/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
188///
189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
190#[inline]
191#[target_feature(enable = "avx512vbmi2,avx512vl")]
192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
193#[cfg_attr(test, assert_instr(vpcompressw))]
194pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) {
195    vcompressstorew128(base_addr as *mut _, a.as_i16x8(), k)
196}
197
198/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
201#[inline]
202#[target_feature(enable = "avx512vbmi2")]
203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
204#[cfg_attr(test, assert_instr(vpcompressb))]
205pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) {
206    vcompressstoreb(base_addr as *mut _, a.as_i8x64(), k)
207}
208
209/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
212#[inline]
213#[target_feature(enable = "avx512vbmi2,avx512vl")]
214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
215#[cfg_attr(test, assert_instr(vpcompressb))]
216pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) {
217    vcompressstoreb256(base_addr as *mut _, a.as_i8x32(), k)
218}
219
220/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
223#[inline]
224#[target_feature(enable = "avx512vbmi2,avx512vl")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vpcompressb))]
227pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) {
228    vcompressstoreb128(base_addr as *mut _, a.as_i8x16(), k)
229}
230
231/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
234#[inline]
235#[target_feature(enable = "avx512vbmi2")]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237#[cfg_attr(test, assert_instr(vpcompressw))]
238pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
239    transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k))
240}
241
242/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
243///
244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
245#[inline]
246#[target_feature(enable = "avx512vbmi2")]
247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
248#[cfg_attr(test, assert_instr(vpcompressw))]
249pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
250    transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k))
251}
252
253/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
256#[inline]
257#[target_feature(enable = "avx512vbmi2,avx512vl")]
258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259#[cfg_attr(test, assert_instr(vpcompressw))]
260pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
261    transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k))
262}
263
264/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
267#[inline]
268#[target_feature(enable = "avx512vbmi2,avx512vl")]
269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
270#[cfg_attr(test, assert_instr(vpcompressw))]
271pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
272    transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k))
273}
274
275/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
278#[inline]
279#[target_feature(enable = "avx512vbmi2,avx512vl")]
280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
281#[cfg_attr(test, assert_instr(vpcompressw))]
282pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
283    transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k))
284}
285
286/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
289#[inline]
290#[target_feature(enable = "avx512vbmi2,avx512vl")]
291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
292#[cfg_attr(test, assert_instr(vpcompressw))]
293pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
294    transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k))
295}
296
297/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
300#[inline]
301#[target_feature(enable = "avx512vbmi2")]
302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
303#[cfg_attr(test, assert_instr(vpcompressb))]
304pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
305    transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k))
306}
307
308/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
311#[inline]
312#[target_feature(enable = "avx512vbmi2")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpcompressb))]
315pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
316    transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k))
317}
318
319/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
322#[inline]
323#[target_feature(enable = "avx512vbmi2,avx512vl")]
324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
325#[cfg_attr(test, assert_instr(vpcompressb))]
326pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
327    transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k))
328}
329
330/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
333#[inline]
334#[target_feature(enable = "avx512vbmi2,avx512vl")]
335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
336#[cfg_attr(test, assert_instr(vpcompressb))]
337pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
338    transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k))
339}
340
341/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
344#[inline]
345#[target_feature(enable = "avx512vbmi2,avx512vl")]
346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
347#[cfg_attr(test, assert_instr(vpcompressb))]
348pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
349    transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k))
350}
351
352/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
353///
354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
355#[inline]
356#[target_feature(enable = "avx512vbmi2,avx512vl")]
357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
358#[cfg_attr(test, assert_instr(vpcompressb))]
359pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
360    transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k))
361}
362
363/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
366#[inline]
367#[target_feature(enable = "avx512vbmi2")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369#[cfg_attr(test, assert_instr(vpexpandw))]
370pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
371    transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k))
372}
373
374/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
377#[inline]
378#[target_feature(enable = "avx512vbmi2")]
379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
380#[cfg_attr(test, assert_instr(vpexpandw))]
381pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
382    transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k))
383}
384
385/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
388#[inline]
389#[target_feature(enable = "avx512vbmi2,avx512vl")]
390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
391#[cfg_attr(test, assert_instr(vpexpandw))]
392pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
393    transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k))
394}
395
396/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
399#[inline]
400#[target_feature(enable = "avx512vbmi2,avx512vl")]
401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402#[cfg_attr(test, assert_instr(vpexpandw))]
403pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
404    transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k))
405}
406
407/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408///
409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
410#[inline]
411#[target_feature(enable = "avx512vbmi2,avx512vl")]
412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
413#[cfg_attr(test, assert_instr(vpexpandw))]
414pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
415    transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k))
416}
417
418/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
419///
420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
421#[inline]
422#[target_feature(enable = "avx512vbmi2,avx512vl")]
423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
424#[cfg_attr(test, assert_instr(vpexpandw))]
425pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
426    transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k))
427}
428
429/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
432#[inline]
433#[target_feature(enable = "avx512vbmi2")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vpexpandb))]
436pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
437    transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k))
438}
439
440/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441///
442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
443#[inline]
444#[target_feature(enable = "avx512vbmi2")]
445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
446#[cfg_attr(test, assert_instr(vpexpandb))]
447pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
448    transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k))
449}
450
451/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
454#[inline]
455#[target_feature(enable = "avx512vbmi2,avx512vl")]
456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
457#[cfg_attr(test, assert_instr(vpexpandb))]
458pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
459    transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k))
460}
461
462/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
463///
464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
465#[inline]
466#[target_feature(enable = "avx512vbmi2,avx512vl")]
467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
468#[cfg_attr(test, assert_instr(vpexpandb))]
469pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
470    transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k))
471}
472
473/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
474///
475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
476#[inline]
477#[target_feature(enable = "avx512vbmi2,avx512vl")]
478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
479#[cfg_attr(test, assert_instr(vpexpandb))]
480pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
481    transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k))
482}
483
484/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
485///
486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
487#[inline]
488#[target_feature(enable = "avx512vbmi2,avx512vl")]
489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
490#[cfg_attr(test, assert_instr(vpexpandb))]
491pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
492    transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k))
493}
494
495/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
498#[inline]
499#[target_feature(enable = "avx512vbmi2")]
500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
501#[cfg_attr(test, assert_instr(vpshldvq))]
502pub unsafe fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
503    transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8()))
504}
505
506/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
507///
508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
509#[inline]
510#[target_feature(enable = "avx512vbmi2")]
511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
512#[cfg_attr(test, assert_instr(vpshldvq))]
513pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
514    let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
515    transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
516}
517
518/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
519///
520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
521#[inline]
522#[target_feature(enable = "avx512vbmi2")]
523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
524#[cfg_attr(test, assert_instr(vpshldvq))]
525pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
526    let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
527    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
528}
529
530/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
531///
532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
533#[inline]
534#[target_feature(enable = "avx512vbmi2,avx512vl")]
535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
536#[cfg_attr(test, assert_instr(vpshldvq))]
537pub unsafe fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
538    transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4()))
539}
540
541/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
544#[inline]
545#[target_feature(enable = "avx512vbmi2,avx512vl")]
546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547#[cfg_attr(test, assert_instr(vpshldvq))]
548pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
549    let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
550    transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
551}
552
553/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
554///
555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
556#[inline]
557#[target_feature(enable = "avx512vbmi2,avx512vl")]
558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
559#[cfg_attr(test, assert_instr(vpshldvq))]
560pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
561    let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
562    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
563}
564
565/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
566///
567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
568#[inline]
569#[target_feature(enable = "avx512vbmi2,avx512vl")]
570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
571#[cfg_attr(test, assert_instr(vpshldvq))]
572pub unsafe fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
573    transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2()))
574}
575
576/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
577///
578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
579#[inline]
580#[target_feature(enable = "avx512vbmi2,avx512vl")]
581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
582#[cfg_attr(test, assert_instr(vpshldvq))]
583pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
584    let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
585    transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
586}
587
588/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
589///
590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
591#[inline]
592#[target_feature(enable = "avx512vbmi2,avx512vl")]
593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
594#[cfg_attr(test, assert_instr(vpshldvq))]
595pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
596    let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
597    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
598}
599
600/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
603#[inline]
604#[target_feature(enable = "avx512vbmi2")]
605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
606#[cfg_attr(test, assert_instr(vpshldvd))]
607pub unsafe fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
608    transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16()))
609}
610
611/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
614#[inline]
615#[target_feature(enable = "avx512vbmi2")]
616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
617#[cfg_attr(test, assert_instr(vpshldvd))]
618pub unsafe fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
619    let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
620    transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
621}
622
623/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
624///
625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
626#[inline]
627#[target_feature(enable = "avx512vbmi2")]
628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
629#[cfg_attr(test, assert_instr(vpshldvd))]
630pub unsafe fn _mm512_maskz_shldv_epi32(
631    k: __mmask16,
632    a: __m512i,
633    b: __m512i,
634    c: __m512i,
635) -> __m512i {
636    let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
637    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
638}
639
640/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
641///
642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
643#[inline]
644#[target_feature(enable = "avx512vbmi2,avx512vl")]
645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
646#[cfg_attr(test, assert_instr(vpshldvd))]
647pub unsafe fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
648    transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8()))
649}
650
651/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
652///
653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
654#[inline]
655#[target_feature(enable = "avx512vbmi2,avx512vl")]
656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
657#[cfg_attr(test, assert_instr(vpshldvd))]
658pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
659    let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
660    transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
661}
662
663/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
664///
665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
666#[inline]
667#[target_feature(enable = "avx512vbmi2,avx512vl")]
668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
669#[cfg_attr(test, assert_instr(vpshldvd))]
670pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
671    let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
672    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
673}
674
675/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
676///
677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
678#[inline]
679#[target_feature(enable = "avx512vbmi2,avx512vl")]
680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
681#[cfg_attr(test, assert_instr(vpshldvd))]
682pub unsafe fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
683    transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4()))
684}
685
686/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
687///
688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
689#[inline]
690#[target_feature(enable = "avx512vbmi2,avx512vl")]
691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
692#[cfg_attr(test, assert_instr(vpshldvd))]
693pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
694    let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
695    transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
696}
697
698/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
699///
700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
701#[inline]
702#[target_feature(enable = "avx512vbmi2,avx512vl")]
703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
704#[cfg_attr(test, assert_instr(vpshldvd))]
705pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
706    let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
707    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
708}
709
710/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
711///
712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
713#[inline]
714#[target_feature(enable = "avx512vbmi2")]
715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
716#[cfg_attr(test, assert_instr(vpshldvw))]
717pub unsafe fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
718    transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32()))
719}
720
721/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
722///
723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
724#[inline]
725#[target_feature(enable = "avx512vbmi2")]
726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
727#[cfg_attr(test, assert_instr(vpshldvw))]
728pub unsafe fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
729    let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
730    transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
731}
732
733/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
734///
735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
736#[inline]
737#[target_feature(enable = "avx512vbmi2")]
738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
739#[cfg_attr(test, assert_instr(vpshldvw))]
740pub unsafe fn _mm512_maskz_shldv_epi16(
741    k: __mmask32,
742    a: __m512i,
743    b: __m512i,
744    c: __m512i,
745) -> __m512i {
746    let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
747    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
748}
749
750/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
751///
752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
753#[inline]
754#[target_feature(enable = "avx512vbmi2,avx512vl")]
755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
756#[cfg_attr(test, assert_instr(vpshldvw))]
757pub unsafe fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
758    transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16()))
759}
760
761/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
762///
763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
764#[inline]
765#[target_feature(enable = "avx512vbmi2,avx512vl")]
766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
767#[cfg_attr(test, assert_instr(vpshldvw))]
768pub unsafe fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
769    let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
770    transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
771}
772
773/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
776#[inline]
777#[target_feature(enable = "avx512vbmi2,avx512vl")]
778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
779#[cfg_attr(test, assert_instr(vpshldvw))]
780pub unsafe fn _mm256_maskz_shldv_epi16(
781    k: __mmask16,
782    a: __m256i,
783    b: __m256i,
784    c: __m256i,
785) -> __m256i {
786    let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
787    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
788}
789
790/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
791///
792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
793#[inline]
794#[target_feature(enable = "avx512vbmi2,avx512vl")]
795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
796#[cfg_attr(test, assert_instr(vpshldvw))]
797pub unsafe fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
798    transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8()))
799}
800
801/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
804#[inline]
805#[target_feature(enable = "avx512vbmi2,avx512vl")]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807#[cfg_attr(test, assert_instr(vpshldvw))]
808pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
809    let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
810    transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
811}
812
813/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
816#[inline]
817#[target_feature(enable = "avx512vbmi2,avx512vl")]
818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
819#[cfg_attr(test, assert_instr(vpshldvw))]
820pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
821    let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
822    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
823}
824
825/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
826///
827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
828#[inline]
829#[target_feature(enable = "avx512vbmi2")]
830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
831#[cfg_attr(test, assert_instr(vpshrdvq))]
832pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
833    transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8()))
834}
835
836/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
837///
838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
839#[inline]
840#[target_feature(enable = "avx512vbmi2")]
841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
842#[cfg_attr(test, assert_instr(vpshrdvq))]
843pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
844    let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
845    transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
846}
847
848/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
849///
850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
851#[inline]
852#[target_feature(enable = "avx512vbmi2")]
853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
854#[cfg_attr(test, assert_instr(vpshrdvq))]
855pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
856    let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
857    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
858}
859
860/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
861///
862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
863#[inline]
864#[target_feature(enable = "avx512vbmi2,avx512vl")]
865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
866#[cfg_attr(test, assert_instr(vpshrdvq))]
867pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
868    transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4()))
869}
870
871/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
872///
873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
874#[inline]
875#[target_feature(enable = "avx512vbmi2,avx512vl")]
876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
877#[cfg_attr(test, assert_instr(vpshrdvq))]
878pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
879    let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
880    transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
881}
882
883/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
884///
885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
886#[inline]
887#[target_feature(enable = "avx512vbmi2,avx512vl")]
888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
889#[cfg_attr(test, assert_instr(vpshrdvq))]
890pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
891    let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
892    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
893}
894
895/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
896///
897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
898#[inline]
899#[target_feature(enable = "avx512vbmi2,avx512vl")]
900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
901#[cfg_attr(test, assert_instr(vpshrdvq))]
902pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
903    transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2()))
904}
905
906/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
909#[inline]
910#[target_feature(enable = "avx512vbmi2,avx512vl")]
911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
912#[cfg_attr(test, assert_instr(vpshrdvq))]
913pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
914    let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
915    transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
916}
917
918/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
919///
920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
921#[inline]
922#[target_feature(enable = "avx512vbmi2,avx512vl")]
923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
924#[cfg_attr(test, assert_instr(vpshrdvq))]
925pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
926    let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
927    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
928}
929
930/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
931///
932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
933#[inline]
934#[target_feature(enable = "avx512vbmi2")]
935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
936#[cfg_attr(test, assert_instr(vpshrdvd))]
937pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
938    transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16()))
939}
940
941/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
942///
943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
944#[inline]
945#[target_feature(enable = "avx512vbmi2")]
946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
947#[cfg_attr(test, assert_instr(vpshrdvd))]
948pub unsafe fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
949    let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
950    transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
951}
952
953/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
954///
955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
956#[inline]
957#[target_feature(enable = "avx512vbmi2")]
958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
959#[cfg_attr(test, assert_instr(vpshrdvd))]
960pub unsafe fn _mm512_maskz_shrdv_epi32(
961    k: __mmask16,
962    a: __m512i,
963    b: __m512i,
964    c: __m512i,
965) -> __m512i {
966    let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
967    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
968}
969
970/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
971///
972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
973#[inline]
974#[target_feature(enable = "avx512vbmi2,avx512vl")]
975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
976#[cfg_attr(test, assert_instr(vpshrdvd))]
977pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
978    transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8()))
979}
980
981/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
982///
983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
984#[inline]
985#[target_feature(enable = "avx512vbmi2,avx512vl")]
986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
987#[cfg_attr(test, assert_instr(vpshrdvd))]
988pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
989    let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
990    transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
991}
992
993/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
996#[inline]
997#[target_feature(enable = "avx512vbmi2,avx512vl")]
998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
999#[cfg_attr(test, assert_instr(vpshrdvd))]
1000pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1001    let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1002    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1003}
1004
1005/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1008#[inline]
1009#[target_feature(enable = "avx512vbmi2,avx512vl")]
1010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1011#[cfg_attr(test, assert_instr(vpshrdvd))]
1012pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1013    transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4()))
1014}
1015
1016/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1019#[inline]
1020#[target_feature(enable = "avx512vbmi2,avx512vl")]
1021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1022#[cfg_attr(test, assert_instr(vpshrdvd))]
1023pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1024    let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1025    transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
1026}
1027
1028/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1029///
1030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1031#[inline]
1032#[target_feature(enable = "avx512vbmi2,avx512vl")]
1033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1034#[cfg_attr(test, assert_instr(vpshrdvd))]
1035pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1036    let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1037    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1038}
1039
1040/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1041///
1042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1043#[inline]
1044#[target_feature(enable = "avx512vbmi2")]
1045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1046#[cfg_attr(test, assert_instr(vpshrdvw))]
1047pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1048    transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32()))
1049}
1050
1051/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1052///
1053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1054#[inline]
1055#[target_feature(enable = "avx512vbmi2")]
1056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1057#[cfg_attr(test, assert_instr(vpshrdvw))]
1058pub unsafe fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1059    let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1060    transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
1061}
1062
1063/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1064///
1065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1066#[inline]
1067#[target_feature(enable = "avx512vbmi2")]
1068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1069#[cfg_attr(test, assert_instr(vpshrdvw))]
1070pub unsafe fn _mm512_maskz_shrdv_epi16(
1071    k: __mmask32,
1072    a: __m512i,
1073    b: __m512i,
1074    c: __m512i,
1075) -> __m512i {
1076    let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1077    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1078}
1079
1080/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1083#[inline]
1084#[target_feature(enable = "avx512vbmi2,avx512vl")]
1085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1086#[cfg_attr(test, assert_instr(vpshrdvw))]
1087pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1088    transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16()))
1089}
1090
1091/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1092///
1093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1094#[inline]
1095#[target_feature(enable = "avx512vbmi2,avx512vl")]
1096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1097#[cfg_attr(test, assert_instr(vpshrdvw))]
1098pub unsafe fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1099    let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1100    transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
1101}
1102
1103/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1104///
1105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1106#[inline]
1107#[target_feature(enable = "avx512vbmi2,avx512vl")]
1108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1109#[cfg_attr(test, assert_instr(vpshrdvw))]
1110pub unsafe fn _mm256_maskz_shrdv_epi16(
1111    k: __mmask16,
1112    a: __m256i,
1113    b: __m256i,
1114    c: __m256i,
1115) -> __m256i {
1116    let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1117    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1118}
1119
1120/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1121///
1122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1123#[inline]
1124#[target_feature(enable = "avx512vbmi2,avx512vl")]
1125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1126#[cfg_attr(test, assert_instr(vpshrdvw))]
1127pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1128    transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8()))
1129}
1130
1131/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1132///
1133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1134#[inline]
1135#[target_feature(enable = "avx512vbmi2,avx512vl")]
1136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1137#[cfg_attr(test, assert_instr(vpshrdvw))]
1138pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1139    let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1140    transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
1141}
1142
1143/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1144///
1145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1146#[inline]
1147#[target_feature(enable = "avx512vbmi2,avx512vl")]
1148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1149#[cfg_attr(test, assert_instr(vpshrdvw))]
1150pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1151    let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1152    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1153}
1154
1155/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1156///
1157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1158#[inline]
1159#[target_feature(enable = "avx512vbmi2")]
1160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1161#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1162#[rustc_legacy_const_generics(2)]
1163pub unsafe fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1164    static_assert_uimm_bits!(IMM8, 8);
1165    _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1166}
1167
1168/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1169///
1170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1171#[inline]
1172#[target_feature(enable = "avx512vbmi2")]
1173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1174#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1175#[rustc_legacy_const_generics(4)]
1176pub unsafe fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1177    src: __m512i,
1178    k: __mmask8,
1179    a: __m512i,
1180    b: __m512i,
1181) -> __m512i {
1182    static_assert_uimm_bits!(IMM8, 8);
1183    let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1184    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1185}
1186
1187/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1188///
1189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1190#[inline]
1191#[target_feature(enable = "avx512vbmi2")]
1192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1193#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1194#[rustc_legacy_const_generics(3)]
1195pub unsafe fn _mm512_maskz_shldi_epi64<const IMM8: i32>(
1196    k: __mmask8,
1197    a: __m512i,
1198    b: __m512i,
1199) -> __m512i {
1200    static_assert_uimm_bits!(IMM8, 8);
1201    let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1202    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1203}
1204
1205/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1206///
1207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1208#[inline]
1209#[target_feature(enable = "avx512vbmi2,avx512vl")]
1210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1211#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1212#[rustc_legacy_const_generics(2)]
1213pub unsafe fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1214    static_assert_uimm_bits!(IMM8, 8);
1215    _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1216}
1217
1218/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1219///
1220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1221#[inline]
1222#[target_feature(enable = "avx512vbmi2,avx512vl")]
1223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1224#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1225#[rustc_legacy_const_generics(4)]
1226pub unsafe fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1227    src: __m256i,
1228    k: __mmask8,
1229    a: __m256i,
1230    b: __m256i,
1231) -> __m256i {
1232    static_assert_uimm_bits!(IMM8, 8);
1233    let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1234    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1235}
1236
1237/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1238///
1239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1240#[inline]
1241#[target_feature(enable = "avx512vbmi2,avx512vl")]
1242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1243#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1244#[rustc_legacy_const_generics(3)]
1245pub unsafe fn _mm256_maskz_shldi_epi64<const IMM8: i32>(
1246    k: __mmask8,
1247    a: __m256i,
1248    b: __m256i,
1249) -> __m256i {
1250    static_assert_uimm_bits!(IMM8, 8);
1251    let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1252    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1253}
1254
1255/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1256///
1257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1258#[inline]
1259#[target_feature(enable = "avx512vbmi2,avx512vl")]
1260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1261#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1262#[rustc_legacy_const_generics(2)]
1263pub unsafe fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1264    static_assert_uimm_bits!(IMM8, 8);
1265    _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1266}
1267
1268/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1269///
1270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1271#[inline]
1272#[target_feature(enable = "avx512vbmi2,avx512vl")]
1273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1274#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1275#[rustc_legacy_const_generics(4)]
1276pub unsafe fn _mm_mask_shldi_epi64<const IMM8: i32>(
1277    src: __m128i,
1278    k: __mmask8,
1279    a: __m128i,
1280    b: __m128i,
1281) -> __m128i {
1282    static_assert_uimm_bits!(IMM8, 8);
1283    let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1284    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1285}
1286
1287/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1288///
1289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1290#[inline]
1291#[target_feature(enable = "avx512vbmi2,avx512vl")]
1292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1293#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1294#[rustc_legacy_const_generics(3)]
1295pub unsafe fn _mm_maskz_shldi_epi64<const IMM8: i32>(
1296    k: __mmask8,
1297    a: __m128i,
1298    b: __m128i,
1299) -> __m128i {
1300    static_assert_uimm_bits!(IMM8, 8);
1301    let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1302    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1303}
1304
1305/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1306///
1307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1308#[inline]
1309#[target_feature(enable = "avx512vbmi2")]
1310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1311#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1312#[rustc_legacy_const_generics(2)]
1313pub unsafe fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1314    static_assert_uimm_bits!(IMM8, 8);
1315    _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8))
1316}
1317
1318/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1319///
1320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1321#[inline]
1322#[target_feature(enable = "avx512vbmi2")]
1323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1324#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1325#[rustc_legacy_const_generics(4)]
1326pub unsafe fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1327    src: __m512i,
1328    k: __mmask16,
1329    a: __m512i,
1330    b: __m512i,
1331) -> __m512i {
1332    static_assert_uimm_bits!(IMM8, 8);
1333    let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1334    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1335}
1336
1337/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1338///
1339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1340#[inline]
1341#[target_feature(enable = "avx512vbmi2")]
1342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1343#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1344#[rustc_legacy_const_generics(3)]
1345pub unsafe fn _mm512_maskz_shldi_epi32<const IMM8: i32>(
1346    k: __mmask16,
1347    a: __m512i,
1348    b: __m512i,
1349) -> __m512i {
1350    static_assert_uimm_bits!(IMM8, 8);
1351    let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1352    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1353}
1354
1355/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1356///
1357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1358#[inline]
1359#[target_feature(enable = "avx512vbmi2,avx512vl")]
1360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1361#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1362#[rustc_legacy_const_generics(2)]
1363pub unsafe fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1364    static_assert_uimm_bits!(IMM8, 8);
1365    _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8))
1366}
1367
1368/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1369///
1370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1371#[inline]
1372#[target_feature(enable = "avx512vbmi2,avx512vl")]
1373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1374#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1375#[rustc_legacy_const_generics(4)]
1376pub unsafe fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1377    src: __m256i,
1378    k: __mmask8,
1379    a: __m256i,
1380    b: __m256i,
1381) -> __m256i {
1382    static_assert_uimm_bits!(IMM8, 8);
1383    let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1384    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1385}
1386
1387/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1388///
1389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1390#[inline]
1391#[target_feature(enable = "avx512vbmi2,avx512vl")]
1392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1393#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1394#[rustc_legacy_const_generics(3)]
1395pub unsafe fn _mm256_maskz_shldi_epi32<const IMM8: i32>(
1396    k: __mmask8,
1397    a: __m256i,
1398    b: __m256i,
1399) -> __m256i {
1400    static_assert_uimm_bits!(IMM8, 8);
1401    let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1402    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1403}
1404
1405/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1406///
1407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1408#[inline]
1409#[target_feature(enable = "avx512vbmi2,avx512vl")]
1410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1411#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1412#[rustc_legacy_const_generics(2)]
1413pub unsafe fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1414    static_assert_uimm_bits!(IMM8, 8);
1415    _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8))
1416}
1417
1418/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1419///
1420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1421#[inline]
1422#[target_feature(enable = "avx512vbmi2,avx512vl")]
1423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1424#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1425#[rustc_legacy_const_generics(4)]
1426pub unsafe fn _mm_mask_shldi_epi32<const IMM8: i32>(
1427    src: __m128i,
1428    k: __mmask8,
1429    a: __m128i,
1430    b: __m128i,
1431) -> __m128i {
1432    static_assert_uimm_bits!(IMM8, 8);
1433    let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1434    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1435}
1436
1437/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1438///
1439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1440#[inline]
1441#[target_feature(enable = "avx512vbmi2,avx512vl")]
1442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1443#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1444#[rustc_legacy_const_generics(3)]
1445pub unsafe fn _mm_maskz_shldi_epi32<const IMM8: i32>(
1446    k: __mmask8,
1447    a: __m128i,
1448    b: __m128i,
1449) -> __m128i {
1450    static_assert_uimm_bits!(IMM8, 8);
1451    let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1452    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1453}
1454
1455/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1456///
1457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1458#[inline]
1459#[target_feature(enable = "avx512vbmi2")]
1460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1461#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1462#[rustc_legacy_const_generics(2)]
1463pub unsafe fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1464    static_assert_uimm_bits!(IMM8, 8);
1465    _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1466}
1467
1468/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1469///
1470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1471#[inline]
1472#[target_feature(enable = "avx512vbmi2")]
1473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1474#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1475#[rustc_legacy_const_generics(4)]
1476pub unsafe fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1477    src: __m512i,
1478    k: __mmask32,
1479    a: __m512i,
1480    b: __m512i,
1481) -> __m512i {
1482    static_assert_uimm_bits!(IMM8, 8);
1483    let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1484    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1485}
1486
1487/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1488///
1489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1490#[inline]
1491#[target_feature(enable = "avx512vbmi2")]
1492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1493#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1494#[rustc_legacy_const_generics(3)]
1495pub unsafe fn _mm512_maskz_shldi_epi16<const IMM8: i32>(
1496    k: __mmask32,
1497    a: __m512i,
1498    b: __m512i,
1499) -> __m512i {
1500    static_assert_uimm_bits!(IMM8, 8);
1501    let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1502    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1503}
1504
1505/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1506///
1507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1508#[inline]
1509#[target_feature(enable = "avx512vbmi2,avx512vl")]
1510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1511#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1512#[rustc_legacy_const_generics(2)]
1513pub unsafe fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1514    static_assert_uimm_bits!(IMM8, 8);
1515    _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
1516}
1517
1518/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1519///
1520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1521#[inline]
1522#[target_feature(enable = "avx512vbmi2,avx512vl")]
1523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1524#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1525#[rustc_legacy_const_generics(4)]
1526pub unsafe fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1527    src: __m256i,
1528    k: __mmask16,
1529    a: __m256i,
1530    b: __m256i,
1531) -> __m256i {
1532    static_assert_uimm_bits!(IMM8, 8);
1533    let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1534    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1535}
1536
1537/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1540#[inline]
1541#[target_feature(enable = "avx512vbmi2,avx512vl")]
1542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1543#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1544#[rustc_legacy_const_generics(3)]
1545pub unsafe fn _mm256_maskz_shldi_epi16<const IMM8: i32>(
1546    k: __mmask16,
1547    a: __m256i,
1548    b: __m256i,
1549) -> __m256i {
1550    static_assert_uimm_bits!(IMM8, 8);
1551    let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1552    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1553}
1554
1555/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1556///
1557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1558#[inline]
1559#[target_feature(enable = "avx512vbmi2,avx512vl")]
1560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1561#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1562#[rustc_legacy_const_generics(2)]
1563pub unsafe fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1564    static_assert_uimm_bits!(IMM8, 8);
1565    _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
1566}
1567
1568/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1569///
1570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1571#[inline]
1572#[target_feature(enable = "avx512vbmi2,avx512vl")]
1573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1574#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1575#[rustc_legacy_const_generics(4)]
1576pub unsafe fn _mm_mask_shldi_epi16<const IMM8: i32>(
1577    src: __m128i,
1578    k: __mmask8,
1579    a: __m128i,
1580    b: __m128i,
1581) -> __m128i {
1582    static_assert_uimm_bits!(IMM8, 8);
1583    let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1584    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
1585}
1586
1587/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1588///
1589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1590#[inline]
1591#[target_feature(enable = "avx512vbmi2,avx512vl")]
1592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1593#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1594#[rustc_legacy_const_generics(3)]
1595pub unsafe fn _mm_maskz_shldi_epi16<const IMM8: i32>(
1596    k: __mmask8,
1597    a: __m128i,
1598    b: __m128i,
1599) -> __m128i {
1600    static_assert_uimm_bits!(IMM8, 8);
1601    let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1602    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1603}
1604
1605/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1606///
1607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1608#[inline]
1609#[target_feature(enable = "avx512vbmi2")]
1610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1611#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1612#[rustc_legacy_const_generics(2)]
1613pub unsafe fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1614    static_assert_uimm_bits!(IMM8, 8);
1615    _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1616}
1617
1618/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1619///
1620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1621#[inline]
1622#[target_feature(enable = "avx512vbmi2")]
1623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1624#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1625#[rustc_legacy_const_generics(4)]
1626pub unsafe fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1627    src: __m512i,
1628    k: __mmask8,
1629    a: __m512i,
1630    b: __m512i,
1631) -> __m512i {
1632    static_assert_uimm_bits!(IMM8, 8);
1633    let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1634    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1635}
1636
1637/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1638///
1639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1640#[inline]
1641#[target_feature(enable = "avx512vbmi2")]
1642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1643#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq
1644#[rustc_legacy_const_generics(3)]
1645pub unsafe fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(
1646    k: __mmask8,
1647    a: __m512i,
1648    b: __m512i,
1649) -> __m512i {
1650    static_assert_uimm_bits!(IMM8, 8);
1651    let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1652    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1653}
1654
1655/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1656///
1657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1658#[inline]
1659#[target_feature(enable = "avx512vbmi2,avx512vl")]
1660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1661#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1662#[rustc_legacy_const_generics(2)]
1663pub unsafe fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1664    static_assert_uimm_bits!(IMM8, 8);
1665    _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1666}
1667
1668/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1671#[inline]
1672#[target_feature(enable = "avx512vbmi2,avx512vl")]
1673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1674#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1675#[rustc_legacy_const_generics(4)]
1676pub unsafe fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1677    src: __m256i,
1678    k: __mmask8,
1679    a: __m256i,
1680    b: __m256i,
1681) -> __m256i {
1682    static_assert_uimm_bits!(IMM8, 8);
1683    let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1684    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1685}
1686
1687/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1688///
1689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1690#[inline]
1691#[target_feature(enable = "avx512vbmi2,avx512vl")]
1692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1693#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1694#[rustc_legacy_const_generics(3)]
1695pub unsafe fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(
1696    k: __mmask8,
1697    a: __m256i,
1698    b: __m256i,
1699) -> __m256i {
1700    static_assert_uimm_bits!(IMM8, 8);
1701    let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1702    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1703}
1704
1705/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
1708#[inline]
1709#[target_feature(enable = "avx512vbmi2,avx512vl")]
1710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1711#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1712#[rustc_legacy_const_generics(2)]
1713pub unsafe fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1714    static_assert_uimm_bits!(IMM8, 8);
1715    _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1716}
1717
1718/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1719///
1720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
1721#[inline]
1722#[target_feature(enable = "avx512vbmi2,avx512vl")]
1723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1724#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1725#[rustc_legacy_const_generics(4)]
1726pub unsafe fn _mm_mask_shrdi_epi64<const IMM8: i32>(
1727    src: __m128i,
1728    k: __mmask8,
1729    a: __m128i,
1730    b: __m128i,
1731) -> __m128i {
1732    static_assert_uimm_bits!(IMM8, 8);
1733    let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1734    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1735}
1736
1737/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1738///
1739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
1740#[inline]
1741#[target_feature(enable = "avx512vbmi2,avx512vl")]
1742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1743#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1744#[rustc_legacy_const_generics(3)]
1745pub unsafe fn _mm_maskz_shrdi_epi64<const IMM8: i32>(
1746    k: __mmask8,
1747    a: __m128i,
1748    b: __m128i,
1749) -> __m128i {
1750    static_assert_uimm_bits!(IMM8, 8);
1751    let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1752    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1753}
1754
1755/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1756///
1757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
1758#[inline]
1759#[target_feature(enable = "avx512vbmi2")]
1760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1761#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1762#[rustc_legacy_const_generics(2)]
1763pub unsafe fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1764    static_assert_uimm_bits!(IMM8, 8);
1765    _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8))
1766}
1767
1768/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1769///
1770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
1771#[inline]
1772#[target_feature(enable = "avx512vbmi2")]
1773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1774#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1775#[rustc_legacy_const_generics(4)]
1776pub unsafe fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
1777    src: __m512i,
1778    k: __mmask16,
1779    a: __m512i,
1780    b: __m512i,
1781) -> __m512i {
1782    static_assert_uimm_bits!(IMM8, 8);
1783    let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1784    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1785}
1786
1787/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
1790#[inline]
1791#[target_feature(enable = "avx512vbmi2")]
1792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1794#[rustc_legacy_const_generics(3)]
1795pub unsafe fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(
1796    k: __mmask16,
1797    a: __m512i,
1798    b: __m512i,
1799) -> __m512i {
1800    static_assert_uimm_bits!(IMM8, 8);
1801    let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1802    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1803}
1804
1805/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1806///
1807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
1808#[inline]
1809#[target_feature(enable = "avx512vbmi2,avx512vl")]
1810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1811#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1812#[rustc_legacy_const_generics(2)]
1813pub unsafe fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1814    static_assert_uimm_bits!(IMM8, 8);
1815    _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8))
1816}
1817
1818/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1819///
1820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
1821#[inline]
1822#[target_feature(enable = "avx512vbmi2,avx512vl")]
1823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1824#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1825#[rustc_legacy_const_generics(4)]
1826pub unsafe fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
1827    src: __m256i,
1828    k: __mmask8,
1829    a: __m256i,
1830    b: __m256i,
1831) -> __m256i {
1832    static_assert_uimm_bits!(IMM8, 8);
1833    let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1834    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1835}
1836
1837/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
1840#[inline]
1841#[target_feature(enable = "avx512vbmi2,avx512vl")]
1842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1843#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1844#[rustc_legacy_const_generics(3)]
1845pub unsafe fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(
1846    k: __mmask8,
1847    a: __m256i,
1848    b: __m256i,
1849) -> __m256i {
1850    static_assert_uimm_bits!(IMM8, 8);
1851    let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1852    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1853}
1854
1855/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1856///
1857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
1858#[inline]
1859#[target_feature(enable = "avx512vbmi2,avx512vl")]
1860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1861#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1862#[rustc_legacy_const_generics(2)]
1863pub unsafe fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1864    static_assert_uimm_bits!(IMM8, 8);
1865    _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8))
1866}
1867
1868/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
1871#[inline]
1872#[target_feature(enable = "avx512vbmi2,avx512vl")]
1873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1875#[rustc_legacy_const_generics(4)]
1876pub unsafe fn _mm_mask_shrdi_epi32<const IMM8: i32>(
1877    src: __m128i,
1878    k: __mmask8,
1879    a: __m128i,
1880    b: __m128i,
1881) -> __m128i {
1882    static_assert_uimm_bits!(IMM8, 8);
1883    let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1884    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1885}
1886
1887/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1888///
1889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
1890#[inline]
1891#[target_feature(enable = "avx512vbmi2,avx512vl")]
1892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1893#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1894#[rustc_legacy_const_generics(3)]
1895pub unsafe fn _mm_maskz_shrdi_epi32<const IMM8: i32>(
1896    k: __mmask8,
1897    a: __m128i,
1898    b: __m128i,
1899) -> __m128i {
1900    static_assert_uimm_bits!(IMM8, 8);
1901    let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1902    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1903}
1904
1905/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1906///
1907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
1908#[inline]
1909#[target_feature(enable = "avx512vbmi2")]
1910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1911#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1912#[rustc_legacy_const_generics(2)]
1913pub unsafe fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1914    static_assert_uimm_bits!(IMM8, 8);
1915    _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1916}
1917
1918/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1919///
1920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
1921#[inline]
1922#[target_feature(enable = "avx512vbmi2")]
1923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1924#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1925#[rustc_legacy_const_generics(4)]
1926pub unsafe fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
1927    src: __m512i,
1928    k: __mmask32,
1929    a: __m512i,
1930    b: __m512i,
1931) -> __m512i {
1932    static_assert_uimm_bits!(IMM8, 8);
1933    let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1934    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1935}
1936
1937/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1938///
1939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
1940#[inline]
1941#[target_feature(enable = "avx512vbmi2")]
1942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1943#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1944#[rustc_legacy_const_generics(3)]
1945pub unsafe fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(
1946    k: __mmask32,
1947    a: __m512i,
1948    b: __m512i,
1949) -> __m512i {
1950    static_assert_uimm_bits!(IMM8, 8);
1951    let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1952    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1953}
1954
1955/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1956///
1957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
1958#[inline]
1959#[target_feature(enable = "avx512vbmi2,avx512vl")]
1960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1961#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1962#[rustc_legacy_const_generics(2)]
1963pub unsafe fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1964    static_assert_uimm_bits!(IMM8, 8);
1965    _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
1966}
1967
1968/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
1971#[inline]
1972#[target_feature(enable = "avx512vbmi2,avx512vl")]
1973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1974#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1975#[rustc_legacy_const_generics(4)]
1976pub unsafe fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
1977    src: __m256i,
1978    k: __mmask16,
1979    a: __m256i,
1980    b: __m256i,
1981) -> __m256i {
1982    static_assert_uimm_bits!(IMM8, 8);
1983    let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
1984    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1985}
1986
1987/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1988///
1989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
1990#[inline]
1991#[target_feature(enable = "avx512vbmi2,avx512vl")]
1992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1993#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1994#[rustc_legacy_const_generics(3)]
1995pub unsafe fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(
1996    k: __mmask16,
1997    a: __m256i,
1998    b: __m256i,
1999) -> __m256i {
2000    static_assert_uimm_bits!(IMM8, 8);
2001    let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2002    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
2003}
2004
2005/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2006///
2007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2008#[inline]
2009#[target_feature(enable = "avx512vbmi2,avx512vl")]
2010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2011#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2012#[rustc_legacy_const_generics(2)]
2013pub unsafe fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2014    static_assert_uimm_bits!(IMM8, 8);
2015    _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
2016}
2017
2018/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2019///
2020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2021#[inline]
2022#[target_feature(enable = "avx512vbmi2,avx512vl")]
2023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2024#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2025#[rustc_legacy_const_generics(4)]
2026pub unsafe fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2027    src: __m128i,
2028    k: __mmask8,
2029    a: __m128i,
2030    b: __m128i,
2031) -> __m128i {
2032    static_assert_uimm_bits!(IMM8, 8);
2033    let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2034    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
2035}
2036
2037/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2038///
2039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2040#[inline]
2041#[target_feature(enable = "avx512vbmi2,avx512vl")]
2042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2043#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2044#[rustc_legacy_const_generics(3)]
2045pub unsafe fn _mm_maskz_shrdi_epi16<const IMM8: i32>(
2046    k: __mmask8,
2047    a: __m128i,
2048    b: __m128i,
2049) -> __m128i {
2050    static_assert_uimm_bits!(IMM8, 8);
2051    let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2052    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
2053}
2054
2055#[allow(improper_ctypes)]
2056extern "C" {
2057    #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2058    fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2059    #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2060    fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2061    #[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2062    fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2063
2064    #[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2065    fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2066    #[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2067    fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2068    #[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2069    fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2070
2071    #[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2072    fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2073    #[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2074    fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2075    #[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2076    fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2077
2078    #[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2079    fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2080    #[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2081    fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2082    #[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2083    fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2084
2085    #[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2086    fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2087    #[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2088    fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2089    #[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2090    fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2091
2092    #[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2093    fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2094    #[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2095    fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2096    #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2097    fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2098
2099    #[link_name = "llvm.fshl.v8i64"]
2100    fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2101    #[link_name = "llvm.fshl.v4i64"]
2102    fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2103    #[link_name = "llvm.fshl.v2i64"]
2104    fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2105    #[link_name = "llvm.fshl.v16i32"]
2106    fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2107    #[link_name = "llvm.fshl.v8i32"]
2108    fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2109    #[link_name = "llvm.fshl.v4i32"]
2110    fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2111    #[link_name = "llvm.fshl.v32i16"]
2112    fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2113    #[link_name = "llvm.fshl.v16i16"]
2114    fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2115    #[link_name = "llvm.fshl.v8i16"]
2116    fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2117
2118    #[link_name = "llvm.fshr.v8i64"]
2119    fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
2120    #[link_name = "llvm.fshr.v4i64"]
2121    fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
2122    #[link_name = "llvm.fshr.v2i64"]
2123    fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
2124    #[link_name = "llvm.fshr.v16i32"]
2125    fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
2126    #[link_name = "llvm.fshr.v8i32"]
2127    fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
2128    #[link_name = "llvm.fshr.v4i32"]
2129    fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
2130    #[link_name = "llvm.fshr.v32i16"]
2131    fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
2132    #[link_name = "llvm.fshr.v16i16"]
2133    fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
2134    #[link_name = "llvm.fshr.v8i16"]
2135    fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
2136
2137    #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
2138    fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
2139    #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
2140    fn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
2141    #[link_name = "llvm.x86.avx512.mask.expand.load.b.256"]
2142    fn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
2143    #[link_name = "llvm.x86.avx512.mask.expand.load.w.256"]
2144    fn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
2145    #[link_name = "llvm.x86.avx512.mask.expand.load.b.512"]
2146    fn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
2147    #[link_name = "llvm.x86.avx512.mask.expand.load.w.512"]
2148    fn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
2149}
2150
2151#[cfg(test)]
2152mod tests {
2153
2154    use stdarch_test::simd_test;
2155
2156    use crate::core_arch::x86::*;
2157    use crate::hint::black_box;
2158
2159    #[simd_test(enable = "avx512vbmi2")]
2160    unsafe fn test_mm512_mask_compress_epi16() {
2161        let src = _mm512_set1_epi16(200);
2162        #[rustfmt::skip]
2163        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2164                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2165        let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2166        #[rustfmt::skip]
2167        let e = _mm512_set_epi16(
2168            200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
2169            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2170        );
2171        assert_eq_m512i(r, e);
2172    }
2173
2174    #[simd_test(enable = "avx512vbmi2")]
2175    unsafe fn test_mm512_maskz_compress_epi16() {
2176        #[rustfmt::skip]
2177        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2178                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2179        let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a);
2180        #[rustfmt::skip]
2181        let e = _mm512_set_epi16(
2182            0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2183            1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2184        );
2185        assert_eq_m512i(r, e);
2186    }
2187
2188    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2189    unsafe fn test_mm256_mask_compress_epi16() {
2190        let src = _mm256_set1_epi16(200);
2191        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2192        let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
2193        let e = _mm256_set_epi16(
2194            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
2195        );
2196        assert_eq_m256i(r, e);
2197    }
2198
2199    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2200    unsafe fn test_mm256_maskz_compress_epi16() {
2201        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2202        let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
2203        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2204        assert_eq_m256i(r, e);
2205    }
2206
2207    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2208    unsafe fn test_mm_mask_compress_epi16() {
2209        let src = _mm_set1_epi16(200);
2210        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2211        let r = _mm_mask_compress_epi16(src, 0b01010101, a);
2212        let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7);
2213        assert_eq_m128i(r, e);
2214    }
2215
2216    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2217    unsafe fn test_mm_maskz_compress_epi16() {
2218        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2219        let r = _mm_maskz_compress_epi16(0b01010101, a);
2220        let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
2221        assert_eq_m128i(r, e);
2222    }
2223
2224    #[simd_test(enable = "avx512vbmi2")]
2225    unsafe fn test_mm512_mask_compress_epi8() {
2226        let src = _mm512_set1_epi8(100);
2227        #[rustfmt::skip]
2228        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2229                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2230                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2231                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2232        let r = _mm512_mask_compress_epi8(
2233            src,
2234            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2235            a,
2236        );
2237        #[rustfmt::skip]
2238        let e = _mm512_set_epi8(
2239            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2240            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2241            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2242            33,  35,  37,  39,  41,  43,  45,  47,  49,  51,  53,  55,  57,  59,  61,  63,
2243        );
2244        assert_eq_m512i(r, e);
2245    }
2246
2247    #[simd_test(enable = "avx512vbmi2")]
2248    unsafe fn test_mm512_maskz_compress_epi8() {
2249        #[rustfmt::skip]
2250        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2251                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2252                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2253                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2254        let r = _mm512_maskz_compress_epi8(
2255            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2256            a,
2257        );
2258        #[rustfmt::skip]
2259        let e = _mm512_set_epi8(
2260            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2261            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2262            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2263            33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2264        );
2265        assert_eq_m512i(r, e);
2266    }
2267
2268    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2269    unsafe fn test_mm256_mask_compress_epi8() {
2270        let src = _mm256_set1_epi8(100);
2271        #[rustfmt::skip]
2272        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2273                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2274        let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2275        #[rustfmt::skip]
2276        let e = _mm256_set_epi8(
2277            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2278            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2279        );
2280        assert_eq_m256i(r, e);
2281    }
2282
2283    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2284    unsafe fn test_mm256_maskz_compress_epi8() {
2285        #[rustfmt::skip]
2286        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2287                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2288        let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a);
2289        #[rustfmt::skip]
2290        let e = _mm256_set_epi8(
2291            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2292            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2293        );
2294        assert_eq_m256i(r, e);
2295    }
2296
2297    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2298    unsafe fn test_mm_mask_compress_epi8() {
2299        let src = _mm_set1_epi8(100);
2300        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2301        let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
2302        let e = _mm_set_epi8(
2303            100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15,
2304        );
2305        assert_eq_m128i(r, e);
2306    }
2307
2308    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2309    unsafe fn test_mm_maskz_compress_epi8() {
2310        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2311        let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
2312        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2313        assert_eq_m128i(r, e);
2314    }
2315
2316    #[simd_test(enable = "avx512vbmi2")]
2317    unsafe fn test_mm512_mask_expand_epi16() {
2318        let src = _mm512_set1_epi16(200);
2319        #[rustfmt::skip]
2320        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2321                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2322        let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2323        #[rustfmt::skip]
2324        let e = _mm512_set_epi16(
2325            200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23,
2326            200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31,
2327        );
2328        assert_eq_m512i(r, e);
2329    }
2330
2331    #[simd_test(enable = "avx512vbmi2")]
2332    unsafe fn test_mm512_maskz_expand_epi16() {
2333        #[rustfmt::skip]
2334        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2335                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2336        let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a);
2337        #[rustfmt::skip]
2338        let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2339                                 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31);
2340        assert_eq_m512i(r, e);
2341    }
2342
2343    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2344    unsafe fn test_mm256_mask_expand_epi16() {
2345        let src = _mm256_set1_epi16(200);
2346        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2347        let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
2348        let e = _mm256_set_epi16(
2349            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
2350        );
2351        assert_eq_m256i(r, e);
2352    }
2353
2354    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2355    unsafe fn test_mm256_maskz_expand_epi16() {
2356        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2357        let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
2358        let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2359        assert_eq_m256i(r, e);
2360    }
2361
2362    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2363    unsafe fn test_mm_mask_expand_epi16() {
2364        let src = _mm_set1_epi16(200);
2365        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2366        let r = _mm_mask_expand_epi16(src, 0b01010101, a);
2367        let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7);
2368        assert_eq_m128i(r, e);
2369    }
2370
2371    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2372    unsafe fn test_mm_maskz_expand_epi16() {
2373        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2374        let r = _mm_maskz_expand_epi16(0b01010101, a);
2375        let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
2376        assert_eq_m128i(r, e);
2377    }
2378
2379    #[simd_test(enable = "avx512vbmi2")]
2380    unsafe fn test_mm512_mask_expand_epi8() {
2381        let src = _mm512_set1_epi8(100);
2382        #[rustfmt::skip]
2383        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2384                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2385                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2386                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2387        let r = _mm512_mask_expand_epi8(
2388            src,
2389            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2390            a,
2391        );
2392        #[rustfmt::skip]
2393        let e = _mm512_set_epi8(
2394            100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39,
2395            100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47,
2396            100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55,
2397            100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63,
2398        );
2399        assert_eq_m512i(r, e);
2400    }
2401
2402    #[simd_test(enable = "avx512vbmi2")]
2403    unsafe fn test_mm512_maskz_expand_epi8() {
2404        #[rustfmt::skip]
2405        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2406                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2407                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2408                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2409        let r = _mm512_maskz_expand_epi8(
2410            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2411            a,
2412        );
2413        #[rustfmt::skip]
2414        let e = _mm512_set_epi8(
2415            0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39,
2416            0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
2417            0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55,
2418            0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63,
2419        );
2420        assert_eq_m512i(r, e);
2421    }
2422
2423    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2424    unsafe fn test_mm256_mask_expand_epi8() {
2425        let src = _mm256_set1_epi8(100);
2426        #[rustfmt::skip]
2427        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2428                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2429        let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2430        #[rustfmt::skip]
2431        let e = _mm256_set_epi8(
2432            100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23,
2433            100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31,
2434        );
2435        assert_eq_m256i(r, e);
2436    }
2437
2438    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2439    unsafe fn test_mm256_maskz_expand_epi8() {
2440        #[rustfmt::skip]
2441        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2442                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2443        let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a);
2444        #[rustfmt::skip]
2445        let e = _mm256_set_epi8(
2446            0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2447            0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31,
2448        );
2449        assert_eq_m256i(r, e);
2450    }
2451
2452    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2453    unsafe fn test_mm_mask_expand_epi8() {
2454        let src = _mm_set1_epi8(100);
2455        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2456        let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
2457        let e = _mm_set_epi8(
2458            100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15,
2459        );
2460        assert_eq_m128i(r, e);
2461    }
2462
2463    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2464    unsafe fn test_mm_maskz_expand_epi8() {
2465        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2466        let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
2467        let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2468        assert_eq_m128i(r, e);
2469    }
2470
2471    #[simd_test(enable = "avx512vbmi2")]
2472    unsafe fn test_mm512_shldv_epi64() {
2473        let a = _mm512_set1_epi64(1);
2474        let b = _mm512_set1_epi64(1 << 63);
2475        let c = _mm512_set1_epi64(2);
2476        let r = _mm512_shldv_epi64(a, b, c);
2477        let e = _mm512_set1_epi64(6);
2478        assert_eq_m512i(r, e);
2479    }
2480
2481    #[simd_test(enable = "avx512vbmi2")]
2482    unsafe fn test_mm512_mask_shldv_epi64() {
2483        let a = _mm512_set1_epi64(1);
2484        let b = _mm512_set1_epi64(1 << 63);
2485        let c = _mm512_set1_epi64(2);
2486        let r = _mm512_mask_shldv_epi64(a, 0, b, c);
2487        assert_eq_m512i(r, a);
2488        let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c);
2489        let e = _mm512_set1_epi64(6);
2490        assert_eq_m512i(r, e);
2491    }
2492
2493    #[simd_test(enable = "avx512vbmi2")]
2494    unsafe fn test_mm512_maskz_shldv_epi64() {
2495        let a = _mm512_set1_epi64(1);
2496        let b = _mm512_set1_epi64(1 << 63);
2497        let c = _mm512_set1_epi64(2);
2498        let r = _mm512_maskz_shldv_epi64(0, a, b, c);
2499        assert_eq_m512i(r, _mm512_setzero_si512());
2500        let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c);
2501        let e = _mm512_set1_epi64(6);
2502        assert_eq_m512i(r, e);
2503    }
2504
2505    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2506    unsafe fn test_mm256_shldv_epi64() {
2507        let a = _mm256_set1_epi64x(1);
2508        let b = _mm256_set1_epi64x(1 << 63);
2509        let c = _mm256_set1_epi64x(2);
2510        let r = _mm256_shldv_epi64(a, b, c);
2511        let e = _mm256_set1_epi64x(6);
2512        assert_eq_m256i(r, e);
2513    }
2514
2515    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2516    unsafe fn test_mm256_mask_shldv_epi64() {
2517        let a = _mm256_set1_epi64x(1);
2518        let b = _mm256_set1_epi64x(1 << 63);
2519        let c = _mm256_set1_epi64x(2);
2520        let r = _mm256_mask_shldv_epi64(a, 0, b, c);
2521        assert_eq_m256i(r, a);
2522        let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c);
2523        let e = _mm256_set1_epi64x(6);
2524        assert_eq_m256i(r, e);
2525    }
2526
2527    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2528    unsafe fn test_mm256_maskz_shldv_epi64() {
2529        let a = _mm256_set1_epi64x(1);
2530        let b = _mm256_set1_epi64x(1 << 63);
2531        let c = _mm256_set1_epi64x(2);
2532        let r = _mm256_maskz_shldv_epi64(0, a, b, c);
2533        assert_eq_m256i(r, _mm256_setzero_si256());
2534        let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c);
2535        let e = _mm256_set1_epi64x(6);
2536        assert_eq_m256i(r, e);
2537    }
2538
2539    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2540    unsafe fn test_mm_shldv_epi64() {
2541        let a = _mm_set1_epi64x(1);
2542        let b = _mm_set1_epi64x(1 << 63);
2543        let c = _mm_set1_epi64x(2);
2544        let r = _mm_shldv_epi64(a, b, c);
2545        let e = _mm_set1_epi64x(6);
2546        assert_eq_m128i(r, e);
2547    }
2548
2549    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2550    unsafe fn test_mm_mask_shldv_epi64() {
2551        let a = _mm_set1_epi64x(1);
2552        let b = _mm_set1_epi64x(1 << 63);
2553        let c = _mm_set1_epi64x(2);
2554        let r = _mm_mask_shldv_epi64(a, 0, b, c);
2555        assert_eq_m128i(r, a);
2556        let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c);
2557        let e = _mm_set1_epi64x(6);
2558        assert_eq_m128i(r, e);
2559    }
2560
2561    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2562    unsafe fn test_mm_maskz_shldv_epi64() {
2563        let a = _mm_set1_epi64x(1);
2564        let b = _mm_set1_epi64x(1 << 63);
2565        let c = _mm_set1_epi64x(2);
2566        let r = _mm_maskz_shldv_epi64(0, a, b, c);
2567        assert_eq_m128i(r, _mm_setzero_si128());
2568        let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c);
2569        let e = _mm_set1_epi64x(6);
2570        assert_eq_m128i(r, e);
2571    }
2572
2573    #[simd_test(enable = "avx512vbmi2")]
2574    unsafe fn test_mm512_shldv_epi32() {
2575        let a = _mm512_set1_epi32(1);
2576        let b = _mm512_set1_epi32(1 << 31);
2577        let c = _mm512_set1_epi32(2);
2578        let r = _mm512_shldv_epi32(a, b, c);
2579        let e = _mm512_set1_epi32(6);
2580        assert_eq_m512i(r, e);
2581    }
2582
2583    #[simd_test(enable = "avx512vbmi2")]
2584    unsafe fn test_mm512_mask_shldv_epi32() {
2585        let a = _mm512_set1_epi32(1);
2586        let b = _mm512_set1_epi32(1 << 31);
2587        let c = _mm512_set1_epi32(2);
2588        let r = _mm512_mask_shldv_epi32(a, 0, b, c);
2589        assert_eq_m512i(r, a);
2590        let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c);
2591        let e = _mm512_set1_epi32(6);
2592        assert_eq_m512i(r, e);
2593    }
2594
2595    #[simd_test(enable = "avx512vbmi2")]
2596    unsafe fn test_mm512_maskz_shldv_epi32() {
2597        let a = _mm512_set1_epi32(1);
2598        let b = _mm512_set1_epi32(1 << 31);
2599        let c = _mm512_set1_epi32(2);
2600        let r = _mm512_maskz_shldv_epi32(0, a, b, c);
2601        assert_eq_m512i(r, _mm512_setzero_si512());
2602        let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c);
2603        let e = _mm512_set1_epi32(6);
2604        assert_eq_m512i(r, e);
2605    }
2606
2607    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2608    unsafe fn test_mm256_shldv_epi32() {
2609        let a = _mm256_set1_epi32(1);
2610        let b = _mm256_set1_epi32(1 << 31);
2611        let c = _mm256_set1_epi32(2);
2612        let r = _mm256_shldv_epi32(a, b, c);
2613        let e = _mm256_set1_epi32(6);
2614        assert_eq_m256i(r, e);
2615    }
2616
2617    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2618    unsafe fn test_mm256_mask_shldv_epi32() {
2619        let a = _mm256_set1_epi32(1);
2620        let b = _mm256_set1_epi32(1 << 31);
2621        let c = _mm256_set1_epi32(2);
2622        let r = _mm256_mask_shldv_epi32(a, 0, b, c);
2623        assert_eq_m256i(r, a);
2624        let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c);
2625        let e = _mm256_set1_epi32(6);
2626        assert_eq_m256i(r, e);
2627    }
2628
2629    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2630    unsafe fn test_mm256_maskz_shldv_epi32() {
2631        let a = _mm256_set1_epi32(1);
2632        let b = _mm256_set1_epi32(1 << 31);
2633        let c = _mm256_set1_epi32(2);
2634        let r = _mm256_maskz_shldv_epi32(0, a, b, c);
2635        assert_eq_m256i(r, _mm256_setzero_si256());
2636        let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c);
2637        let e = _mm256_set1_epi32(6);
2638        assert_eq_m256i(r, e);
2639    }
2640
2641    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2642    unsafe fn test_mm_shldv_epi32() {
2643        let a = _mm_set1_epi32(1);
2644        let b = _mm_set1_epi32(1 << 31);
2645        let c = _mm_set1_epi32(2);
2646        let r = _mm_shldv_epi32(a, b, c);
2647        let e = _mm_set1_epi32(6);
2648        assert_eq_m128i(r, e);
2649    }
2650
2651    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2652    unsafe fn test_mm_mask_shldv_epi32() {
2653        let a = _mm_set1_epi32(1);
2654        let b = _mm_set1_epi32(1 << 31);
2655        let c = _mm_set1_epi32(2);
2656        let r = _mm_mask_shldv_epi32(a, 0, b, c);
2657        assert_eq_m128i(r, a);
2658        let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c);
2659        let e = _mm_set1_epi32(6);
2660        assert_eq_m128i(r, e);
2661    }
2662
2663    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2664    unsafe fn test_mm_maskz_shldv_epi32() {
2665        let a = _mm_set1_epi32(1);
2666        let b = _mm_set1_epi32(1 << 31);
2667        let c = _mm_set1_epi32(2);
2668        let r = _mm_maskz_shldv_epi32(0, a, b, c);
2669        assert_eq_m128i(r, _mm_setzero_si128());
2670        let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c);
2671        let e = _mm_set1_epi32(6);
2672        assert_eq_m128i(r, e);
2673    }
2674
2675    #[simd_test(enable = "avx512vbmi2")]
2676    unsafe fn test_mm512_shldv_epi16() {
2677        let a = _mm512_set1_epi16(1);
2678        let b = _mm512_set1_epi16(1 << 15);
2679        let c = _mm512_set1_epi16(2);
2680        let r = _mm512_shldv_epi16(a, b, c);
2681        let e = _mm512_set1_epi16(6);
2682        assert_eq_m512i(r, e);
2683    }
2684
2685    #[simd_test(enable = "avx512vbmi2")]
2686    unsafe fn test_mm512_mask_shldv_epi16() {
2687        let a = _mm512_set1_epi16(1);
2688        let b = _mm512_set1_epi16(1 << 15);
2689        let c = _mm512_set1_epi16(2);
2690        let r = _mm512_mask_shldv_epi16(a, 0, b, c);
2691        assert_eq_m512i(r, a);
2692        let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2693        let e = _mm512_set1_epi16(6);
2694        assert_eq_m512i(r, e);
2695    }
2696
2697    #[simd_test(enable = "avx512vbmi2")]
2698    unsafe fn test_mm512_maskz_shldv_epi16() {
2699        let a = _mm512_set1_epi16(1);
2700        let b = _mm512_set1_epi16(1 << 15);
2701        let c = _mm512_set1_epi16(2);
2702        let r = _mm512_maskz_shldv_epi16(0, a, b, c);
2703        assert_eq_m512i(r, _mm512_setzero_si512());
2704        let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
2705        let e = _mm512_set1_epi16(6);
2706        assert_eq_m512i(r, e);
2707    }
2708
2709    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2710    unsafe fn test_mm256_shldv_epi16() {
2711        let a = _mm256_set1_epi16(1);
2712        let b = _mm256_set1_epi16(1 << 15);
2713        let c = _mm256_set1_epi16(2);
2714        let r = _mm256_shldv_epi16(a, b, c);
2715        let e = _mm256_set1_epi16(6);
2716        assert_eq_m256i(r, e);
2717    }
2718
2719    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2720    unsafe fn test_mm256_mask_shldv_epi16() {
2721        let a = _mm256_set1_epi16(1);
2722        let b = _mm256_set1_epi16(1 << 15);
2723        let c = _mm256_set1_epi16(2);
2724        let r = _mm256_mask_shldv_epi16(a, 0, b, c);
2725        assert_eq_m256i(r, a);
2726        let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c);
2727        let e = _mm256_set1_epi16(6);
2728        assert_eq_m256i(r, e);
2729    }
2730
2731    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2732    unsafe fn test_mm256_maskz_shldv_epi16() {
2733        let a = _mm256_set1_epi16(1);
2734        let b = _mm256_set1_epi16(1 << 15);
2735        let c = _mm256_set1_epi16(2);
2736        let r = _mm256_maskz_shldv_epi16(0, a, b, c);
2737        assert_eq_m256i(r, _mm256_setzero_si256());
2738        let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c);
2739        let e = _mm256_set1_epi16(6);
2740        assert_eq_m256i(r, e);
2741    }
2742
2743    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2744    unsafe fn test_mm_shldv_epi16() {
2745        let a = _mm_set1_epi16(1);
2746        let b = _mm_set1_epi16(1 << 15);
2747        let c = _mm_set1_epi16(2);
2748        let r = _mm_shldv_epi16(a, b, c);
2749        let e = _mm_set1_epi16(6);
2750        assert_eq_m128i(r, e);
2751    }
2752
2753    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2754    unsafe fn test_mm_mask_shldv_epi16() {
2755        let a = _mm_set1_epi16(1);
2756        let b = _mm_set1_epi16(1 << 15);
2757        let c = _mm_set1_epi16(2);
2758        let r = _mm_mask_shldv_epi16(a, 0, b, c);
2759        assert_eq_m128i(r, a);
2760        let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c);
2761        let e = _mm_set1_epi16(6);
2762        assert_eq_m128i(r, e);
2763    }
2764
2765    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2766    unsafe fn test_mm_maskz_shldv_epi16() {
2767        let a = _mm_set1_epi16(1);
2768        let b = _mm_set1_epi16(1 << 15);
2769        let c = _mm_set1_epi16(2);
2770        let r = _mm_maskz_shldv_epi16(0, a, b, c);
2771        assert_eq_m128i(r, _mm_setzero_si128());
2772        let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c);
2773        let e = _mm_set1_epi16(6);
2774        assert_eq_m128i(r, e);
2775    }
2776
2777    #[simd_test(enable = "avx512vbmi2")]
2778    unsafe fn test_mm512_shrdv_epi64() {
2779        let a = _mm512_set1_epi64(2);
2780        let b = _mm512_set1_epi64(8);
2781        let c = _mm512_set1_epi64(1);
2782        let r = _mm512_shrdv_epi64(a, b, c);
2783        let e = _mm512_set1_epi64(1);
2784        assert_eq_m512i(r, e);
2785    }
2786
2787    #[simd_test(enable = "avx512vbmi2")]
2788    unsafe fn test_mm512_mask_shrdv_epi64() {
2789        let a = _mm512_set1_epi64(2);
2790        let b = _mm512_set1_epi64(8);
2791        let c = _mm512_set1_epi64(1);
2792        let r = _mm512_mask_shrdv_epi64(a, 0, b, c);
2793        assert_eq_m512i(r, a);
2794        let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c);
2795        let e = _mm512_set1_epi64(1);
2796        assert_eq_m512i(r, e);
2797    }
2798
2799    #[simd_test(enable = "avx512vbmi2")]
2800    unsafe fn test_mm512_maskz_shrdv_epi64() {
2801        let a = _mm512_set1_epi64(2);
2802        let b = _mm512_set1_epi64(8);
2803        let c = _mm512_set1_epi64(1);
2804        let r = _mm512_maskz_shrdv_epi64(0, a, b, c);
2805        assert_eq_m512i(r, _mm512_setzero_si512());
2806        let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c);
2807        let e = _mm512_set1_epi64(1);
2808        assert_eq_m512i(r, e);
2809    }
2810
2811    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2812    unsafe fn test_mm256_shrdv_epi64() {
2813        let a = _mm256_set1_epi64x(2);
2814        let b = _mm256_set1_epi64x(8);
2815        let c = _mm256_set1_epi64x(1);
2816        let r = _mm256_shrdv_epi64(a, b, c);
2817        let e = _mm256_set1_epi64x(1);
2818        assert_eq_m256i(r, e);
2819    }
2820
2821    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2822    unsafe fn test_mm256_mask_shrdv_epi64() {
2823        let a = _mm256_set1_epi64x(2);
2824        let b = _mm256_set1_epi64x(8);
2825        let c = _mm256_set1_epi64x(1);
2826        let r = _mm256_mask_shrdv_epi64(a, 0, b, c);
2827        assert_eq_m256i(r, a);
2828        let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c);
2829        let e = _mm256_set1_epi64x(1);
2830        assert_eq_m256i(r, e);
2831    }
2832
2833    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2834    unsafe fn test_mm256_maskz_shrdv_epi64() {
2835        let a = _mm256_set1_epi64x(2);
2836        let b = _mm256_set1_epi64x(8);
2837        let c = _mm256_set1_epi64x(1);
2838        let r = _mm256_maskz_shrdv_epi64(0, a, b, c);
2839        assert_eq_m256i(r, _mm256_setzero_si256());
2840        let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c);
2841        let e = _mm256_set1_epi64x(1);
2842        assert_eq_m256i(r, e);
2843    }
2844
2845    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2846    unsafe fn test_mm_shrdv_epi64() {
2847        let a = _mm_set1_epi64x(2);
2848        let b = _mm_set1_epi64x(8);
2849        let c = _mm_set1_epi64x(1);
2850        let r = _mm_shrdv_epi64(a, b, c);
2851        let e = _mm_set1_epi64x(1);
2852        assert_eq_m128i(r, e);
2853    }
2854
2855    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2856    unsafe fn test_mm_mask_shrdv_epi64() {
2857        let a = _mm_set1_epi64x(2);
2858        let b = _mm_set1_epi64x(8);
2859        let c = _mm_set1_epi64x(1);
2860        let r = _mm_mask_shrdv_epi64(a, 0, b, c);
2861        assert_eq_m128i(r, a);
2862        let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c);
2863        let e = _mm_set1_epi64x(1);
2864        assert_eq_m128i(r, e);
2865    }
2866
2867    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2868    unsafe fn test_mm_maskz_shrdv_epi64() {
2869        let a = _mm_set1_epi64x(2);
2870        let b = _mm_set1_epi64x(8);
2871        let c = _mm_set1_epi64x(1);
2872        let r = _mm_maskz_shrdv_epi64(0, a, b, c);
2873        assert_eq_m128i(r, _mm_setzero_si128());
2874        let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c);
2875        let e = _mm_set1_epi64x(1);
2876        assert_eq_m128i(r, e);
2877    }
2878
2879    #[simd_test(enable = "avx512vbmi2")]
2880    unsafe fn test_mm512_shrdv_epi32() {
2881        let a = _mm512_set1_epi32(2);
2882        let b = _mm512_set1_epi32(8);
2883        let c = _mm512_set1_epi32(1);
2884        let r = _mm512_shrdv_epi32(a, b, c);
2885        let e = _mm512_set1_epi32(1);
2886        assert_eq_m512i(r, e);
2887    }
2888
2889    #[simd_test(enable = "avx512vbmi2")]
2890    unsafe fn test_mm512_mask_shrdv_epi32() {
2891        let a = _mm512_set1_epi32(2);
2892        let b = _mm512_set1_epi32(8);
2893        let c = _mm512_set1_epi32(1);
2894        let r = _mm512_mask_shrdv_epi32(a, 0, b, c);
2895        assert_eq_m512i(r, a);
2896        let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c);
2897        let e = _mm512_set1_epi32(1);
2898        assert_eq_m512i(r, e);
2899    }
2900
2901    #[simd_test(enable = "avx512vbmi2")]
2902    unsafe fn test_mm512_maskz_shrdv_epi32() {
2903        let a = _mm512_set1_epi32(2);
2904        let b = _mm512_set1_epi32(8);
2905        let c = _mm512_set1_epi32(1);
2906        let r = _mm512_maskz_shrdv_epi32(0, a, b, c);
2907        assert_eq_m512i(r, _mm512_setzero_si512());
2908        let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c);
2909        let e = _mm512_set1_epi32(1);
2910        assert_eq_m512i(r, e);
2911    }
2912
2913    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2914    unsafe fn test_mm256_shrdv_epi32() {
2915        let a = _mm256_set1_epi32(2);
2916        let b = _mm256_set1_epi32(8);
2917        let c = _mm256_set1_epi32(1);
2918        let r = _mm256_shrdv_epi32(a, b, c);
2919        let e = _mm256_set1_epi32(1);
2920        assert_eq_m256i(r, e);
2921    }
2922
2923    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2924    unsafe fn test_mm256_mask_shrdv_epi32() {
2925        let a = _mm256_set1_epi32(2);
2926        let b = _mm256_set1_epi32(8);
2927        let c = _mm256_set1_epi32(1);
2928        let r = _mm256_mask_shrdv_epi32(a, 0, b, c);
2929        assert_eq_m256i(r, a);
2930        let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c);
2931        let e = _mm256_set1_epi32(1);
2932        assert_eq_m256i(r, e);
2933    }
2934
2935    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2936    unsafe fn test_mm256_maskz_shrdv_epi32() {
2937        let a = _mm256_set1_epi32(2);
2938        let b = _mm256_set1_epi32(8);
2939        let c = _mm256_set1_epi32(1);
2940        let r = _mm256_maskz_shrdv_epi32(0, a, b, c);
2941        assert_eq_m256i(r, _mm256_setzero_si256());
2942        let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c);
2943        let e = _mm256_set1_epi32(1);
2944        assert_eq_m256i(r, e);
2945    }
2946
2947    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2948    unsafe fn test_mm_shrdv_epi32() {
2949        let a = _mm_set1_epi32(2);
2950        let b = _mm_set1_epi32(8);
2951        let c = _mm_set1_epi32(1);
2952        let r = _mm_shrdv_epi32(a, b, c);
2953        let e = _mm_set1_epi32(1);
2954        assert_eq_m128i(r, e);
2955    }
2956
2957    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2958    unsafe fn test_mm_mask_shrdv_epi32() {
2959        let a = _mm_set1_epi32(2);
2960        let b = _mm_set1_epi32(8);
2961        let c = _mm_set1_epi32(1);
2962        let r = _mm_mask_shrdv_epi32(a, 0, b, c);
2963        assert_eq_m128i(r, a);
2964        let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c);
2965        let e = _mm_set1_epi32(1);
2966        assert_eq_m128i(r, e);
2967    }
2968
2969    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2970    unsafe fn test_mm_maskz_shrdv_epi32() {
2971        let a = _mm_set1_epi32(2);
2972        let b = _mm_set1_epi32(8);
2973        let c = _mm_set1_epi32(1);
2974        let r = _mm_maskz_shrdv_epi32(0, a, b, c);
2975        assert_eq_m128i(r, _mm_setzero_si128());
2976        let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c);
2977        let e = _mm_set1_epi32(1);
2978        assert_eq_m128i(r, e);
2979    }
2980
2981    #[simd_test(enable = "avx512vbmi2")]
2982    unsafe fn test_mm512_shrdv_epi16() {
2983        let a = _mm512_set1_epi16(2);
2984        let b = _mm512_set1_epi16(8);
2985        let c = _mm512_set1_epi16(1);
2986        let r = _mm512_shrdv_epi16(a, b, c);
2987        let e = _mm512_set1_epi16(1);
2988        assert_eq_m512i(r, e);
2989    }
2990
2991    #[simd_test(enable = "avx512vbmi2")]
2992    unsafe fn test_mm512_mask_shrdv_epi16() {
2993        let a = _mm512_set1_epi16(2);
2994        let b = _mm512_set1_epi16(8);
2995        let c = _mm512_set1_epi16(1);
2996        let r = _mm512_mask_shrdv_epi16(a, 0, b, c);
2997        assert_eq_m512i(r, a);
2998        let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2999        let e = _mm512_set1_epi16(1);
3000        assert_eq_m512i(r, e);
3001    }
3002
3003    #[simd_test(enable = "avx512vbmi2")]
3004    unsafe fn test_mm512_maskz_shrdv_epi16() {
3005        let a = _mm512_set1_epi16(2);
3006        let b = _mm512_set1_epi16(8);
3007        let c = _mm512_set1_epi16(1);
3008        let r = _mm512_maskz_shrdv_epi16(0, a, b, c);
3009        assert_eq_m512i(r, _mm512_setzero_si512());
3010        let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
3011        let e = _mm512_set1_epi16(1);
3012        assert_eq_m512i(r, e);
3013    }
3014
3015    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3016    unsafe fn test_mm256_shrdv_epi16() {
3017        let a = _mm256_set1_epi16(2);
3018        let b = _mm256_set1_epi16(8);
3019        let c = _mm256_set1_epi16(1);
3020        let r = _mm256_shrdv_epi16(a, b, c);
3021        let e = _mm256_set1_epi16(1);
3022        assert_eq_m256i(r, e);
3023    }
3024
3025    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3026    unsafe fn test_mm256_mask_shrdv_epi16() {
3027        let a = _mm256_set1_epi16(2);
3028        let b = _mm256_set1_epi16(8);
3029        let c = _mm256_set1_epi16(1);
3030        let r = _mm256_mask_shrdv_epi16(a, 0, b, c);
3031        assert_eq_m256i(r, a);
3032        let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c);
3033        let e = _mm256_set1_epi16(1);
3034        assert_eq_m256i(r, e);
3035    }
3036
3037    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3038    unsafe fn test_mm256_maskz_shrdv_epi16() {
3039        let a = _mm256_set1_epi16(2);
3040        let b = _mm256_set1_epi16(8);
3041        let c = _mm256_set1_epi16(1);
3042        let r = _mm256_maskz_shrdv_epi16(0, a, b, c);
3043        assert_eq_m256i(r, _mm256_setzero_si256());
3044        let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c);
3045        let e = _mm256_set1_epi16(1);
3046        assert_eq_m256i(r, e);
3047    }
3048
3049    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3050    unsafe fn test_mm_shrdv_epi16() {
3051        let a = _mm_set1_epi16(2);
3052        let b = _mm_set1_epi16(8);
3053        let c = _mm_set1_epi16(1);
3054        let r = _mm_shrdv_epi16(a, b, c);
3055        let e = _mm_set1_epi16(1);
3056        assert_eq_m128i(r, e);
3057    }
3058
3059    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3060    unsafe fn test_mm_mask_shrdv_epi16() {
3061        let a = _mm_set1_epi16(2);
3062        let b = _mm_set1_epi16(8);
3063        let c = _mm_set1_epi16(1);
3064        let r = _mm_mask_shrdv_epi16(a, 0, b, c);
3065        assert_eq_m128i(r, a);
3066        let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c);
3067        let e = _mm_set1_epi16(1);
3068        assert_eq_m128i(r, e);
3069    }
3070
3071    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3072    unsafe fn test_mm_maskz_shrdv_epi16() {
3073        let a = _mm_set1_epi16(2);
3074        let b = _mm_set1_epi16(8);
3075        let c = _mm_set1_epi16(1);
3076        let r = _mm_maskz_shrdv_epi16(0, a, b, c);
3077        assert_eq_m128i(r, _mm_setzero_si128());
3078        let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c);
3079        let e = _mm_set1_epi16(1);
3080        assert_eq_m128i(r, e);
3081    }
3082
3083    #[simd_test(enable = "avx512vbmi2")]
3084    unsafe fn test_mm512_shldi_epi64() {
3085        let a = _mm512_set1_epi64(1);
3086        let b = _mm512_set1_epi64(1 << 63);
3087        let r = _mm512_shldi_epi64::<2>(a, b);
3088        let e = _mm512_set1_epi64(6);
3089        assert_eq_m512i(r, e);
3090    }
3091
3092    #[simd_test(enable = "avx512vbmi2")]
3093    unsafe fn test_mm512_mask_shldi_epi64() {
3094        let a = _mm512_set1_epi64(1);
3095        let b = _mm512_set1_epi64(1 << 63);
3096        let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b);
3097        assert_eq_m512i(r, a);
3098        let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b);
3099        let e = _mm512_set1_epi64(6);
3100        assert_eq_m512i(r, e);
3101    }
3102
3103    #[simd_test(enable = "avx512vbmi2")]
3104    unsafe fn test_mm512_maskz_shldi_epi64() {
3105        let a = _mm512_set1_epi64(1);
3106        let b = _mm512_set1_epi64(1 << 63);
3107        let r = _mm512_maskz_shldi_epi64::<2>(0, a, b);
3108        assert_eq_m512i(r, _mm512_setzero_si512());
3109        let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b);
3110        let e = _mm512_set1_epi64(6);
3111        assert_eq_m512i(r, e);
3112    }
3113
3114    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3115    unsafe fn test_mm256_shldi_epi64() {
3116        let a = _mm256_set1_epi64x(1);
3117        let b = _mm256_set1_epi64x(1 << 63);
3118        let r = _mm256_shldi_epi64::<2>(a, b);
3119        let e = _mm256_set1_epi64x(6);
3120        assert_eq_m256i(r, e);
3121    }
3122
3123    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3124    unsafe fn test_mm256_mask_shldi_epi64() {
3125        let a = _mm256_set1_epi64x(1);
3126        let b = _mm256_set1_epi64x(1 << 63);
3127        let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b);
3128        assert_eq_m256i(r, a);
3129        let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b);
3130        let e = _mm256_set1_epi64x(6);
3131        assert_eq_m256i(r, e);
3132    }
3133
3134    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3135    unsafe fn test_mm256_maskz_shldi_epi64() {
3136        let a = _mm256_set1_epi64x(1);
3137        let b = _mm256_set1_epi64x(1 << 63);
3138        let r = _mm256_maskz_shldi_epi64::<2>(0, a, b);
3139        assert_eq_m256i(r, _mm256_setzero_si256());
3140        let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b);
3141        let e = _mm256_set1_epi64x(6);
3142        assert_eq_m256i(r, e);
3143    }
3144
3145    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3146    unsafe fn test_mm_shldi_epi64() {
3147        let a = _mm_set1_epi64x(1);
3148        let b = _mm_set1_epi64x(1 << 63);
3149        let r = _mm_shldi_epi64::<2>(a, b);
3150        let e = _mm_set1_epi64x(6);
3151        assert_eq_m128i(r, e);
3152    }
3153
3154    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3155    unsafe fn test_mm_mask_shldi_epi64() {
3156        let a = _mm_set1_epi64x(1);
3157        let b = _mm_set1_epi64x(1 << 63);
3158        let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b);
3159        assert_eq_m128i(r, a);
3160        let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b);
3161        let e = _mm_set1_epi64x(6);
3162        assert_eq_m128i(r, e);
3163    }
3164
3165    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3166    unsafe fn test_mm_maskz_shldi_epi64() {
3167        let a = _mm_set1_epi64x(1);
3168        let b = _mm_set1_epi64x(1 << 63);
3169        let r = _mm_maskz_shldi_epi64::<2>(0, a, b);
3170        assert_eq_m128i(r, _mm_setzero_si128());
3171        let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b);
3172        let e = _mm_set1_epi64x(6);
3173        assert_eq_m128i(r, e);
3174    }
3175
3176    #[simd_test(enable = "avx512vbmi2")]
3177    unsafe fn test_mm512_shldi_epi32() {
3178        let a = _mm512_set1_epi32(1);
3179        let b = _mm512_set1_epi32(1 << 31);
3180        let r = _mm512_shldi_epi32::<2>(a, b);
3181        let e = _mm512_set1_epi32(6);
3182        assert_eq_m512i(r, e);
3183    }
3184
3185    #[simd_test(enable = "avx512vbmi2")]
3186    unsafe fn test_mm512_mask_shldi_epi32() {
3187        let a = _mm512_set1_epi32(1);
3188        let b = _mm512_set1_epi32(1 << 31);
3189        let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b);
3190        assert_eq_m512i(r, a);
3191        let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b);
3192        let e = _mm512_set1_epi32(6);
3193        assert_eq_m512i(r, e);
3194    }
3195
3196    #[simd_test(enable = "avx512vbmi2")]
3197    unsafe fn test_mm512_maskz_shldi_epi32() {
3198        let a = _mm512_set1_epi32(1);
3199        let b = _mm512_set1_epi32(1 << 31);
3200        let r = _mm512_maskz_shldi_epi32::<2>(0, a, b);
3201        assert_eq_m512i(r, _mm512_setzero_si512());
3202        let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b);
3203        let e = _mm512_set1_epi32(6);
3204        assert_eq_m512i(r, e);
3205    }
3206
3207    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3208    unsafe fn test_mm256_shldi_epi32() {
3209        let a = _mm256_set1_epi32(1);
3210        let b = _mm256_set1_epi32(1 << 31);
3211        let r = _mm256_shldi_epi32::<2>(a, b);
3212        let e = _mm256_set1_epi32(6);
3213        assert_eq_m256i(r, e);
3214    }
3215
3216    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3217    unsafe fn test_mm256_mask_shldi_epi32() {
3218        let a = _mm256_set1_epi32(1);
3219        let b = _mm256_set1_epi32(1 << 31);
3220        let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b);
3221        assert_eq_m256i(r, a);
3222        let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b);
3223        let e = _mm256_set1_epi32(6);
3224        assert_eq_m256i(r, e);
3225    }
3226
3227    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3228    unsafe fn test_mm256_maskz_shldi_epi32() {
3229        let a = _mm256_set1_epi32(1);
3230        let b = _mm256_set1_epi32(1 << 31);
3231        let r = _mm256_maskz_shldi_epi32::<2>(0, a, b);
3232        assert_eq_m256i(r, _mm256_setzero_si256());
3233        let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b);
3234        let e = _mm256_set1_epi32(6);
3235        assert_eq_m256i(r, e);
3236    }
3237
3238    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3239    unsafe fn test_mm_shldi_epi32() {
3240        let a = _mm_set1_epi32(1);
3241        let b = _mm_set1_epi32(1 << 31);
3242        let r = _mm_shldi_epi32::<2>(a, b);
3243        let e = _mm_set1_epi32(6);
3244        assert_eq_m128i(r, e);
3245    }
3246
3247    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3248    unsafe fn test_mm_mask_shldi_epi32() {
3249        let a = _mm_set1_epi32(1);
3250        let b = _mm_set1_epi32(1 << 31);
3251        let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b);
3252        assert_eq_m128i(r, a);
3253        let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b);
3254        let e = _mm_set1_epi32(6);
3255        assert_eq_m128i(r, e);
3256    }
3257
3258    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3259    unsafe fn test_mm_maskz_shldi_epi32() {
3260        let a = _mm_set1_epi32(1);
3261        let b = _mm_set1_epi32(1 << 31);
3262        let r = _mm_maskz_shldi_epi32::<2>(0, a, b);
3263        assert_eq_m128i(r, _mm_setzero_si128());
3264        let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b);
3265        let e = _mm_set1_epi32(6);
3266        assert_eq_m128i(r, e);
3267    }
3268
3269    #[simd_test(enable = "avx512vbmi2")]
3270    unsafe fn test_mm512_shldi_epi16() {
3271        let a = _mm512_set1_epi16(1);
3272        let b = _mm512_set1_epi16(1 << 15);
3273        let r = _mm512_shldi_epi16::<2>(a, b);
3274        let e = _mm512_set1_epi16(6);
3275        assert_eq_m512i(r, e);
3276    }
3277
3278    #[simd_test(enable = "avx512vbmi2")]
3279    unsafe fn test_mm512_mask_shldi_epi16() {
3280        let a = _mm512_set1_epi16(1);
3281        let b = _mm512_set1_epi16(1 << 15);
3282        let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b);
3283        assert_eq_m512i(r, a);
3284        let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b);
3285        let e = _mm512_set1_epi16(6);
3286        assert_eq_m512i(r, e);
3287    }
3288
3289    #[simd_test(enable = "avx512vbmi2")]
3290    unsafe fn test_mm512_maskz_shldi_epi16() {
3291        let a = _mm512_set1_epi16(1);
3292        let b = _mm512_set1_epi16(1 << 15);
3293        let r = _mm512_maskz_shldi_epi16::<2>(0, a, b);
3294        assert_eq_m512i(r, _mm512_setzero_si512());
3295        let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b);
3296        let e = _mm512_set1_epi16(6);
3297        assert_eq_m512i(r, e);
3298    }
3299
3300    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3301    unsafe fn test_mm256_shldi_epi16() {
3302        let a = _mm256_set1_epi16(1);
3303        let b = _mm256_set1_epi16(1 << 15);
3304        let r = _mm256_shldi_epi16::<2>(a, b);
3305        let e = _mm256_set1_epi16(6);
3306        assert_eq_m256i(r, e);
3307    }
3308
3309    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3310    unsafe fn test_mm256_mask_shldi_epi16() {
3311        let a = _mm256_set1_epi16(1);
3312        let b = _mm256_set1_epi16(1 << 15);
3313        let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b);
3314        assert_eq_m256i(r, a);
3315        let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b);
3316        let e = _mm256_set1_epi16(6);
3317        assert_eq_m256i(r, e);
3318    }
3319
3320    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3321    unsafe fn test_mm256_maskz_shldi_epi16() {
3322        let a = _mm256_set1_epi16(1);
3323        let b = _mm256_set1_epi16(1 << 15);
3324        let r = _mm256_maskz_shldi_epi16::<2>(0, a, b);
3325        assert_eq_m256i(r, _mm256_setzero_si256());
3326        let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b);
3327        let e = _mm256_set1_epi16(6);
3328        assert_eq_m256i(r, e);
3329    }
3330
3331    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3332    unsafe fn test_mm_shldi_epi16() {
3333        let a = _mm_set1_epi16(1);
3334        let b = _mm_set1_epi16(1 << 15);
3335        let r = _mm_shldi_epi16::<2>(a, b);
3336        let e = _mm_set1_epi16(6);
3337        assert_eq_m128i(r, e);
3338    }
3339
3340    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3341    unsafe fn test_mm_mask_shldi_epi16() {
3342        let a = _mm_set1_epi16(1);
3343        let b = _mm_set1_epi16(1 << 15);
3344        let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b);
3345        assert_eq_m128i(r, a);
3346        let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b);
3347        let e = _mm_set1_epi16(6);
3348        assert_eq_m128i(r, e);
3349    }
3350
3351    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3352    unsafe fn test_mm_maskz_shldi_epi16() {
3353        let a = _mm_set1_epi16(1);
3354        let b = _mm_set1_epi16(1 << 15);
3355        let r = _mm_maskz_shldi_epi16::<2>(0, a, b);
3356        assert_eq_m128i(r, _mm_setzero_si128());
3357        let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b);
3358        let e = _mm_set1_epi16(6);
3359        assert_eq_m128i(r, e);
3360    }
3361
3362    #[simd_test(enable = "avx512vbmi2")]
3363    unsafe fn test_mm512_shrdi_epi64() {
3364        let a = _mm512_set1_epi64(2);
3365        let b = _mm512_set1_epi64(8);
3366        let r = _mm512_shrdi_epi64::<1>(a, b);
3367        let e = _mm512_set1_epi64(1);
3368        assert_eq_m512i(r, e);
3369    }
3370
3371    #[simd_test(enable = "avx512vbmi2")]
3372    unsafe fn test_mm512_mask_shrdi_epi64() {
3373        let a = _mm512_set1_epi64(2);
3374        let b = _mm512_set1_epi64(8);
3375        let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b);
3376        assert_eq_m512i(r, a);
3377        let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b);
3378        let e = _mm512_set1_epi64(1);
3379        assert_eq_m512i(r, e);
3380    }
3381
3382    #[simd_test(enable = "avx512vbmi2")]
3383    unsafe fn test_mm512_maskz_shrdi_epi64() {
3384        let a = _mm512_set1_epi64(2);
3385        let b = _mm512_set1_epi64(8);
3386        let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b);
3387        assert_eq_m512i(r, _mm512_setzero_si512());
3388        let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b);
3389        let e = _mm512_set1_epi64(1);
3390        assert_eq_m512i(r, e);
3391    }
3392
3393    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3394    unsafe fn test_mm256_shrdi_epi64() {
3395        let a = _mm256_set1_epi64x(2);
3396        let b = _mm256_set1_epi64x(8);
3397        let r = _mm256_shrdi_epi64::<1>(a, b);
3398        let e = _mm256_set1_epi64x(1);
3399        assert_eq_m256i(r, e);
3400    }
3401
3402    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3403    unsafe fn test_mm256_mask_shrdi_epi64() {
3404        let a = _mm256_set1_epi64x(2);
3405        let b = _mm256_set1_epi64x(8);
3406        let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b);
3407        assert_eq_m256i(r, a);
3408        let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b);
3409        let e = _mm256_set1_epi64x(1);
3410        assert_eq_m256i(r, e);
3411    }
3412
3413    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3414    unsafe fn test_mm256_maskz_shrdi_epi64() {
3415        let a = _mm256_set1_epi64x(2);
3416        let b = _mm256_set1_epi64x(8);
3417        let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b);
3418        assert_eq_m256i(r, _mm256_setzero_si256());
3419        let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b);
3420        let e = _mm256_set1_epi64x(1);
3421        assert_eq_m256i(r, e);
3422    }
3423
3424    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3425    unsafe fn test_mm_shrdi_epi64() {
3426        let a = _mm_set1_epi64x(2);
3427        let b = _mm_set1_epi64x(8);
3428        let r = _mm_shrdi_epi64::<1>(a, b);
3429        let e = _mm_set1_epi64x(1);
3430        assert_eq_m128i(r, e);
3431    }
3432
3433    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3434    unsafe fn test_mm_mask_shrdi_epi64() {
3435        let a = _mm_set1_epi64x(2);
3436        let b = _mm_set1_epi64x(8);
3437        let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b);
3438        assert_eq_m128i(r, a);
3439        let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b);
3440        let e = _mm_set1_epi64x(1);
3441        assert_eq_m128i(r, e);
3442    }
3443
3444    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3445    unsafe fn test_mm_maskz_shrdi_epi64() {
3446        let a = _mm_set1_epi64x(2);
3447        let b = _mm_set1_epi64x(8);
3448        let r = _mm_maskz_shrdi_epi64::<1>(0, a, b);
3449        assert_eq_m128i(r, _mm_setzero_si128());
3450        let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b);
3451        let e = _mm_set1_epi64x(1);
3452        assert_eq_m128i(r, e);
3453    }
3454
3455    #[simd_test(enable = "avx512vbmi2")]
3456    unsafe fn test_mm512_shrdi_epi32() {
3457        let a = _mm512_set1_epi32(2);
3458        let b = _mm512_set1_epi32(8);
3459        let r = _mm512_shrdi_epi32::<1>(a, b);
3460        let e = _mm512_set1_epi32(1);
3461        assert_eq_m512i(r, e);
3462    }
3463
3464    #[simd_test(enable = "avx512vbmi2")]
3465    unsafe fn test_mm512_mask_shrdi_epi32() {
3466        let a = _mm512_set1_epi32(2);
3467        let b = _mm512_set1_epi32(8);
3468        let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b);
3469        assert_eq_m512i(r, a);
3470        let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b);
3471        let e = _mm512_set1_epi32(1);
3472        assert_eq_m512i(r, e);
3473    }
3474
3475    #[simd_test(enable = "avx512vbmi2")]
3476    unsafe fn test_mm512_maskz_shrdi_epi32() {
3477        let a = _mm512_set1_epi32(2);
3478        let b = _mm512_set1_epi32(8);
3479        let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b);
3480        assert_eq_m512i(r, _mm512_setzero_si512());
3481        let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b);
3482        let e = _mm512_set1_epi32(1);
3483        assert_eq_m512i(r, e);
3484    }
3485
3486    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3487    unsafe fn test_mm256_shrdi_epi32() {
3488        let a = _mm256_set1_epi32(2);
3489        let b = _mm256_set1_epi32(8);
3490        let r = _mm256_shrdi_epi32::<1>(a, b);
3491        let e = _mm256_set1_epi32(1);
3492        assert_eq_m256i(r, e);
3493    }
3494
3495    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3496    unsafe fn test_mm256_mask_shrdi_epi32() {
3497        let a = _mm256_set1_epi32(2);
3498        let b = _mm256_set1_epi32(8);
3499        let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b);
3500        assert_eq_m256i(r, a);
3501        let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b);
3502        let e = _mm256_set1_epi32(1);
3503        assert_eq_m256i(r, e);
3504    }
3505
3506    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3507    unsafe fn test_mm256_maskz_shrdi_epi32() {
3508        let a = _mm256_set1_epi32(2);
3509        let b = _mm256_set1_epi32(8);
3510        let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b);
3511        assert_eq_m256i(r, _mm256_setzero_si256());
3512        let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b);
3513        let e = _mm256_set1_epi32(1);
3514        assert_eq_m256i(r, e);
3515    }
3516
3517    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3518    unsafe fn test_mm_shrdi_epi32() {
3519        let a = _mm_set1_epi32(2);
3520        let b = _mm_set1_epi32(8);
3521        let r = _mm_shrdi_epi32::<1>(a, b);
3522        let e = _mm_set1_epi32(1);
3523        assert_eq_m128i(r, e);
3524    }
3525
3526    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3527    unsafe fn test_mm_mask_shrdi_epi32() {
3528        let a = _mm_set1_epi32(2);
3529        let b = _mm_set1_epi32(8);
3530        let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b);
3531        assert_eq_m128i(r, a);
3532        let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b);
3533        let e = _mm_set1_epi32(1);
3534        assert_eq_m128i(r, e);
3535    }
3536
3537    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3538    unsafe fn test_mm_maskz_shrdi_epi32() {
3539        let a = _mm_set1_epi32(2);
3540        let b = _mm_set1_epi32(8);
3541        let r = _mm_maskz_shrdi_epi32::<1>(0, a, b);
3542        assert_eq_m128i(r, _mm_setzero_si128());
3543        let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b);
3544        let e = _mm_set1_epi32(1);
3545        assert_eq_m128i(r, e);
3546    }
3547
3548    #[simd_test(enable = "avx512vbmi2")]
3549    unsafe fn test_mm512_shrdi_epi16() {
3550        let a = _mm512_set1_epi16(2);
3551        let b = _mm512_set1_epi16(8);
3552        let r = _mm512_shrdi_epi16::<1>(a, b);
3553        let e = _mm512_set1_epi16(1);
3554        assert_eq_m512i(r, e);
3555    }
3556
3557    #[simd_test(enable = "avx512vbmi2")]
3558    unsafe fn test_mm512_mask_shrdi_epi16() {
3559        let a = _mm512_set1_epi16(2);
3560        let b = _mm512_set1_epi16(8);
3561        let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b);
3562        assert_eq_m512i(r, a);
3563        let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b);
3564        let e = _mm512_set1_epi16(1);
3565        assert_eq_m512i(r, e);
3566    }
3567
3568    #[simd_test(enable = "avx512vbmi2")]
3569    unsafe fn test_mm512_maskz_shrdi_epi16() {
3570        let a = _mm512_set1_epi16(2);
3571        let b = _mm512_set1_epi16(8);
3572        let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b);
3573        assert_eq_m512i(r, _mm512_setzero_si512());
3574        let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b);
3575        let e = _mm512_set1_epi16(1);
3576        assert_eq_m512i(r, e);
3577    }
3578
3579    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3580    unsafe fn test_mm256_shrdi_epi16() {
3581        let a = _mm256_set1_epi16(2);
3582        let b = _mm256_set1_epi16(8);
3583        let r = _mm256_shrdi_epi16::<1>(a, b);
3584        let e = _mm256_set1_epi16(1);
3585        assert_eq_m256i(r, e);
3586    }
3587
3588    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3589    unsafe fn test_mm256_mask_shrdi_epi16() {
3590        let a = _mm256_set1_epi16(2);
3591        let b = _mm256_set1_epi16(8);
3592        let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b);
3593        assert_eq_m256i(r, a);
3594        let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b);
3595        let e = _mm256_set1_epi16(1);
3596        assert_eq_m256i(r, e);
3597    }
3598
3599    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3600    unsafe fn test_mm256_maskz_shrdi_epi16() {
3601        let a = _mm256_set1_epi16(2);
3602        let b = _mm256_set1_epi16(8);
3603        let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b);
3604        assert_eq_m256i(r, _mm256_setzero_si256());
3605        let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b);
3606        let e = _mm256_set1_epi16(1);
3607        assert_eq_m256i(r, e);
3608    }
3609
3610    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3611    unsafe fn test_mm_shrdi_epi16() {
3612        let a = _mm_set1_epi16(2);
3613        let b = _mm_set1_epi16(8);
3614        let r = _mm_shrdi_epi16::<1>(a, b);
3615        let e = _mm_set1_epi16(1);
3616        assert_eq_m128i(r, e);
3617    }
3618
3619    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3620    unsafe fn test_mm_mask_shrdi_epi16() {
3621        let a = _mm_set1_epi16(2);
3622        let b = _mm_set1_epi16(8);
3623        let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b);
3624        assert_eq_m128i(r, a);
3625        let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b);
3626        let e = _mm_set1_epi16(1);
3627        assert_eq_m128i(r, e);
3628    }
3629
3630    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3631    unsafe fn test_mm_maskz_shrdi_epi16() {
3632        let a = _mm_set1_epi16(2);
3633        let b = _mm_set1_epi16(8);
3634        let r = _mm_maskz_shrdi_epi16::<1>(0, a, b);
3635        assert_eq_m128i(r, _mm_setzero_si128());
3636        let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b);
3637        let e = _mm_set1_epi16(1);
3638        assert_eq_m128i(r, e);
3639    }
3640
3641    #[simd_test(enable = "avx512vbmi2")]
3642    unsafe fn test_mm512_mask_expandloadu_epi16() {
3643        let src = _mm512_set1_epi16(42);
3644        let a = &[
3645            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3646            24, 25, 26, 27, 28, 29, 30, 31, 32,
3647        ];
3648        let p = a.as_ptr();
3649        let m = 0b11101000_11001010_11110000_00001111;
3650        let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
3651        let e = _mm512_set_epi16(
3652            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3653            42, 42, 42, 42, 42, 4, 3, 2, 1,
3654        );
3655        assert_eq_m512i(r, e);
3656    }
3657
3658    #[simd_test(enable = "avx512vbmi2")]
3659    unsafe fn test_mm512_maskz_expandloadu_epi16() {
3660        let a = &[
3661            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3662            24, 25, 26, 27, 28, 29, 30, 31, 32,
3663        ];
3664        let p = a.as_ptr();
3665        let m = 0b11101000_11001010_11110000_00001111;
3666        let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
3667        let e = _mm512_set_epi16(
3668            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3669            0, 4, 3, 2, 1,
3670        );
3671        assert_eq_m512i(r, e);
3672    }
3673
3674    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3675    unsafe fn test_mm256_mask_expandloadu_epi16() {
3676        let src = _mm256_set1_epi16(42);
3677        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3678        let p = a.as_ptr();
3679        let m = 0b11101000_11001010;
3680        let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
3681        let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3682        assert_eq_m256i(r, e);
3683    }
3684
3685    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3686    unsafe fn test_mm256_maskz_expandloadu_epi16() {
3687        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3688        let p = a.as_ptr();
3689        let m = 0b11101000_11001010;
3690        let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
3691        let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3692        assert_eq_m256i(r, e);
3693    }
3694
3695    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3696    unsafe fn test_mm_mask_expandloadu_epi16() {
3697        let src = _mm_set1_epi16(42);
3698        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3699        let p = a.as_ptr();
3700        let m = 0b11101000;
3701        let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
3702        let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
3703        assert_eq_m128i(r, e);
3704    }
3705
3706    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3707    unsafe fn test_mm_maskz_expandloadu_epi16() {
3708        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3709        let p = a.as_ptr();
3710        let m = 0b11101000;
3711        let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
3712        let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
3713        assert_eq_m128i(r, e);
3714    }
3715
3716    #[simd_test(enable = "avx512vbmi2")]
3717    unsafe fn test_mm512_mask_expandloadu_epi8() {
3718        let src = _mm512_set1_epi8(42);
3719        let a = &[
3720            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3721            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3722            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3723        ];
3724        let p = a.as_ptr();
3725        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3726        let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
3727        let e = _mm512_set_epi8(
3728            32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
3729            42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
3730            42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1,
3731        );
3732        assert_eq_m512i(r, e);
3733    }
3734
3735    #[simd_test(enable = "avx512vbmi2")]
3736    unsafe fn test_mm512_maskz_expandloadu_epi8() {
3737        let a = &[
3738            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3739            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3740            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3741        ];
3742        let p = a.as_ptr();
3743        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3744        let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
3745        let e = _mm512_set_epi8(
3746            32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
3747            0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
3748            7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1,
3749        );
3750        assert_eq_m512i(r, e);
3751    }
3752
3753    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3754    unsafe fn test_mm256_mask_expandloadu_epi8() {
3755        let src = _mm256_set1_epi8(42);
3756        let a = &[
3757            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3758            24, 25, 26, 27, 28, 29, 30, 31, 32,
3759        ];
3760        let p = a.as_ptr();
3761        let m = 0b11101000_11001010_11110000_00001111;
3762        let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
3763        let e = _mm256_set_epi8(
3764            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3765            42, 42, 42, 42, 42, 4, 3, 2, 1,
3766        );
3767        assert_eq_m256i(r, e);
3768    }
3769
3770    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3771    unsafe fn test_mm256_maskz_expandloadu_epi8() {
3772        let a = &[
3773            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3774            24, 25, 26, 27, 28, 29, 30, 31, 32,
3775        ];
3776        let p = a.as_ptr();
3777        let m = 0b11101000_11001010_11110000_00001111;
3778        let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
3779        let e = _mm256_set_epi8(
3780            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3781            0, 4, 3, 2, 1,
3782        );
3783        assert_eq_m256i(r, e);
3784    }
3785
3786    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3787    unsafe fn test_mm_mask_expandloadu_epi8() {
3788        let src = _mm_set1_epi8(42);
3789        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3790        let p = a.as_ptr();
3791        let m = 0b11101000_11001010;
3792        let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
3793        let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3794        assert_eq_m128i(r, e);
3795    }
3796
3797    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3798    unsafe fn test_mm_maskz_expandloadu_epi8() {
3799        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3800        let p = a.as_ptr();
3801        let m = 0b11101000_11001010;
3802        let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
3803        let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3804        assert_eq_m128i(r, e);
3805    }
3806
3807    #[simd_test(enable = "avx512vbmi2")]
3808    unsafe fn test_mm512_mask_compressstoreu_epi16() {
3809        let a = _mm512_set_epi16(
3810            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3811            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3812        );
3813        let mut r = [0_i16; 32];
3814        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
3815        assert_eq!(&r, &[0_i16; 32]);
3816        _mm512_mask_compressstoreu_epi16(
3817            r.as_mut_ptr() as *mut _,
3818            0b11110000_11001010_11111111_00000000,
3819            a,
3820        );
3821        assert_eq!(
3822            &r,
3823            &[
3824                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3825                0, 0, 0, 0, 0, 0, 0, 0, 0
3826            ]
3827        );
3828    }
3829
3830    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3831    unsafe fn test_mm256_mask_compressstoreu_epi16() {
3832        let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
3833        let mut r = [0_i16; 16];
3834        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
3835        assert_eq!(&r, &[0_i16; 16]);
3836        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a);
3837        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
3838    }
3839
3840    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3841    unsafe fn test_mm_mask_compressstoreu_epi16() {
3842        let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
3843        let mut r = [0_i16; 8];
3844        _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a);
3845        assert_eq!(&r, &[0_i16; 8]);
3846        _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000, a);
3847        assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
3848    }
3849
3850    #[simd_test(enable = "avx512vbmi2")]
3851    unsafe fn test_mm512_mask_compressstoreu_epi8() {
3852        let a = _mm512_set_epi8(
3853            64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
3854            42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
3855            20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3856        );
3857        let mut r = [0_i8; 64];
3858        _mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
3859        assert_eq!(&r, &[0_i8; 64]);
3860        _mm512_mask_compressstoreu_epi8(
3861            r.as_mut_ptr() as *mut _,
3862            0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
3863            a,
3864        );
3865        assert_eq!(
3866            &r,
3867            &[
3868                1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46,
3869                47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3870                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3871            ]
3872        );
3873    }
3874
3875    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3876    unsafe fn test_mm256_mask_compressstoreu_epi8() {
3877        let a = _mm256_set_epi8(
3878            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3879            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3880        );
3881        let mut r = [0_i8; 32];
3882        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
3883        assert_eq!(&r, &[0_i8; 32]);
3884        _mm256_mask_compressstoreu_epi8(
3885            r.as_mut_ptr() as *mut _,
3886            0b11110000_11001010_11111111_00000000,
3887            a,
3888        );
3889        assert_eq!(
3890            &r,
3891            &[
3892                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3893                0, 0, 0, 0, 0, 0, 0, 0, 0
3894            ]
3895        );
3896    }
3897
3898    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3899    unsafe fn test_mm_mask_compressstoreu_epi8() {
3900        let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
3901        let mut r = [0_i8; 16];
3902        _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a);
3903        assert_eq!(&r, &[0_i8; 16]);
3904        _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a);
3905        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
3906    }
3907}