core/stdarch/crates/core_arch/src/x86/
avx512vbmi.rs

1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262)
10#[inline]
11#[target_feature(enable = "avx512vbmi")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
14pub fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
15    unsafe { transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64())) }
16}
17
18/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259)
21#[inline]
22#[target_feature(enable = "avx512vbmi")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpermt2b))]
25pub fn _mm512_mask_permutex2var_epi8(
26    a: __m512i,
27    k: __mmask64,
28    idx: __m512i,
29    b: __m512i,
30) -> __m512i {
31    unsafe {
32        let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
33        transmute(simd_select_bitmask(k, permute, a.as_i8x64()))
34    }
35}
36
37/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
38///
39/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261)
40#[inline]
41#[target_feature(enable = "avx512vbmi")]
42#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
43#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
44pub fn _mm512_maskz_permutex2var_epi8(
45    k: __mmask64,
46    a: __m512i,
47    idx: __m512i,
48    b: __m512i,
49) -> __m512i {
50    unsafe {
51        let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
52        transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
53    }
54}
55
56/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260)
59#[inline]
60#[target_feature(enable = "avx512vbmi")]
61#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
62#[cfg_attr(test, assert_instr(vpermi2b))]
63pub fn _mm512_mask2_permutex2var_epi8(
64    a: __m512i,
65    idx: __m512i,
66    k: __mmask64,
67    b: __m512i,
68) -> __m512i {
69    unsafe {
70        let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
71        transmute(simd_select_bitmask(k, permute, idx.as_i8x64()))
72    }
73}
74
75/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
76///
77/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258)
78#[inline]
79#[target_feature(enable = "avx512vbmi,avx512vl")]
80#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
81#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
82pub fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
83    unsafe { transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32())) }
84}
85
86/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
87///
88/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255)
89#[inline]
90#[target_feature(enable = "avx512vbmi,avx512vl")]
91#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
92#[cfg_attr(test, assert_instr(vpermt2b))]
93pub fn _mm256_mask_permutex2var_epi8(
94    a: __m256i,
95    k: __mmask32,
96    idx: __m256i,
97    b: __m256i,
98) -> __m256i {
99    unsafe {
100        let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
101        transmute(simd_select_bitmask(k, permute, a.as_i8x32()))
102    }
103}
104
105/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257)
108#[inline]
109#[target_feature(enable = "avx512vbmi,avx512vl")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
112pub fn _mm256_maskz_permutex2var_epi8(
113    k: __mmask32,
114    a: __m256i,
115    idx: __m256i,
116    b: __m256i,
117) -> __m256i {
118    unsafe {
119        let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
120        transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
121    }
122}
123
124/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256)
127#[inline]
128#[target_feature(enable = "avx512vbmi,avx512vl")]
129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130#[cfg_attr(test, assert_instr(vpermi2b))]
131pub fn _mm256_mask2_permutex2var_epi8(
132    a: __m256i,
133    idx: __m256i,
134    k: __mmask32,
135    b: __m256i,
136) -> __m256i {
137    unsafe {
138        let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
139        transmute(simd_select_bitmask(k, permute, idx.as_i8x32()))
140    }
141}
142
143/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
144///
145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254)
146#[inline]
147#[target_feature(enable = "avx512vbmi,avx512vl")]
148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
149#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
150pub fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
151    unsafe { transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16())) }
152}
153
154/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251)
157#[inline]
158#[target_feature(enable = "avx512vbmi,avx512vl")]
159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
160#[cfg_attr(test, assert_instr(vpermt2b))]
161pub fn _mm_mask_permutex2var_epi8(a: __m128i, k: __mmask16, idx: __m128i, b: __m128i) -> __m128i {
162    unsafe {
163        let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
164        transmute(simd_select_bitmask(k, permute, a.as_i8x16()))
165    }
166}
167
168/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
169///
170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253)
171#[inline]
172#[target_feature(enable = "avx512vbmi,avx512vl")]
173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
174#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
175pub fn _mm_maskz_permutex2var_epi8(k: __mmask16, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
176    unsafe {
177        let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
178        transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
179    }
180}
181
182/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
183///
184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252)
185#[inline]
186#[target_feature(enable = "avx512vbmi,avx512vl")]
187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
188#[cfg_attr(test, assert_instr(vpermi2b))]
189pub fn _mm_mask2_permutex2var_epi8(a: __m128i, idx: __m128i, k: __mmask16, b: __m128i) -> __m128i {
190    unsafe {
191        let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
192        transmute(simd_select_bitmask(k, permute, idx.as_i8x16()))
193    }
194}
195
196/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316)
199#[inline]
200#[target_feature(enable = "avx512vbmi")]
201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
202#[cfg_attr(test, assert_instr(vpermb))]
203pub fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
204    unsafe { transmute(vpermb(a.as_i8x64(), idx.as_i8x64())) }
205}
206
207/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314)
210#[inline]
211#[target_feature(enable = "avx512vbmi")]
212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
213#[cfg_attr(test, assert_instr(vpermb))]
214pub fn _mm512_mask_permutexvar_epi8(
215    src: __m512i,
216    k: __mmask64,
217    idx: __m512i,
218    a: __m512i,
219) -> __m512i {
220    unsafe {
221        let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
222        transmute(simd_select_bitmask(k, permute, src.as_i8x64()))
223    }
224}
225
226/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
227///
228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315)
229#[inline]
230#[target_feature(enable = "avx512vbmi")]
231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
232#[cfg_attr(test, assert_instr(vpermb))]
233pub fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
234    unsafe {
235        let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
236        transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
237    }
238}
239
240/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
241///
242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313)
243#[inline]
244#[target_feature(enable = "avx512vbmi,avx512vl")]
245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
246#[cfg_attr(test, assert_instr(vpermb))]
247pub fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
248    unsafe { transmute(vpermb256(a.as_i8x32(), idx.as_i8x32())) }
249}
250
251/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
252///
253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311)
254#[inline]
255#[target_feature(enable = "avx512vbmi,avx512vl")]
256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
257#[cfg_attr(test, assert_instr(vpermb))]
258pub fn _mm256_mask_permutexvar_epi8(
259    src: __m256i,
260    k: __mmask32,
261    idx: __m256i,
262    a: __m256i,
263) -> __m256i {
264    unsafe {
265        let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
266        transmute(simd_select_bitmask(k, permute, src.as_i8x32()))
267    }
268}
269
270/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
271///
272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312)
273#[inline]
274#[target_feature(enable = "avx512vbmi,avx512vl")]
275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
276#[cfg_attr(test, assert_instr(vpermb))]
277pub fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
278    unsafe {
279        let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
280        transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
281    }
282}
283
284/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
285///
286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310)
287#[inline]
288#[target_feature(enable = "avx512vbmi,avx512vl")]
289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
290#[cfg_attr(test, assert_instr(vpermb))]
291pub fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
292    unsafe { transmute(vpermb128(a.as_i8x16(), idx.as_i8x16())) }
293}
294
295/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
296///
297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308)
298#[inline]
299#[target_feature(enable = "avx512vbmi,avx512vl")]
300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
301#[cfg_attr(test, assert_instr(vpermb))]
302pub fn _mm_mask_permutexvar_epi8(src: __m128i, k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
303    unsafe {
304        let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
305        transmute(simd_select_bitmask(k, permute, src.as_i8x16()))
306    }
307}
308
309/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309)
312#[inline]
313#[target_feature(enable = "avx512vbmi,avx512vl")]
314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
315#[cfg_attr(test, assert_instr(vpermb))]
316pub fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
317    unsafe {
318        let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
319        transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
320    }
321}
322
323/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
324///
325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026)
326#[inline]
327#[target_feature(enable = "avx512vbmi")]
328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
329#[cfg_attr(test, assert_instr(vpmultishiftqb))]
330pub fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
331    unsafe { transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64())) }
332}
333
334/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
335///
336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
337#[inline]
338#[target_feature(enable = "avx512vbmi")]
339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
340#[cfg_attr(test, assert_instr(vpmultishiftqb))]
341pub fn _mm512_mask_multishift_epi64_epi8(
342    src: __m512i,
343    k: __mmask64,
344    a: __m512i,
345    b: __m512i,
346) -> __m512i {
347    unsafe {
348        let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
349        transmute(simd_select_bitmask(k, multishift, src.as_i8x64()))
350    }
351}
352
353/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
354///
355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
356#[inline]
357#[target_feature(enable = "avx512vbmi")]
358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
359#[cfg_attr(test, assert_instr(vpmultishiftqb))]
360pub fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
361    unsafe {
362        let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
363        transmute(simd_select_bitmask(k, multishift, i8x64::ZERO))
364    }
365}
366
367/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
368///
369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023)
370#[inline]
371#[target_feature(enable = "avx512vbmi,avx512vl")]
372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
373#[cfg_attr(test, assert_instr(vpmultishiftqb))]
374pub fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
375    unsafe { transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32())) }
376}
377
378/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
379///
380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
381#[inline]
382#[target_feature(enable = "avx512vbmi,avx512vl")]
383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
384#[cfg_attr(test, assert_instr(vpmultishiftqb))]
385pub fn _mm256_mask_multishift_epi64_epi8(
386    src: __m256i,
387    k: __mmask32,
388    a: __m256i,
389    b: __m256i,
390) -> __m256i {
391    unsafe {
392        let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
393        transmute(simd_select_bitmask(k, multishift, src.as_i8x32()))
394    }
395}
396
397/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
398///
399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
400#[inline]
401#[target_feature(enable = "avx512vbmi,avx512vl")]
402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
403#[cfg_attr(test, assert_instr(vpmultishiftqb))]
404pub fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
405    unsafe {
406        let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
407        transmute(simd_select_bitmask(k, multishift, i8x32::ZERO))
408    }
409}
410
411/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
412///
413/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020)
414#[inline]
415#[target_feature(enable = "avx512vbmi,avx512vl")]
416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
417#[cfg_attr(test, assert_instr(vpmultishiftqb))]
418pub fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
419    unsafe { transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16())) }
420}
421
422/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
423///
424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018)
425#[inline]
426#[target_feature(enable = "avx512vbmi,avx512vl")]
427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
428#[cfg_attr(test, assert_instr(vpmultishiftqb))]
429pub fn _mm_mask_multishift_epi64_epi8(
430    src: __m128i,
431    k: __mmask16,
432    a: __m128i,
433    b: __m128i,
434) -> __m128i {
435    unsafe {
436        let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
437        transmute(simd_select_bitmask(k, multishift, src.as_i8x16()))
438    }
439}
440
441/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
442///
443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
444#[inline]
445#[target_feature(enable = "avx512vbmi,avx512vl")]
446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
447#[cfg_attr(test, assert_instr(vpmultishiftqb))]
448pub fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
449    unsafe {
450        let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
451        transmute(simd_select_bitmask(k, multishift, i8x16::ZERO))
452    }
453}
454
455#[allow(improper_ctypes)]
456unsafe extern "C" {
457    #[link_name = "llvm.x86.avx512.vpermi2var.qi.512"]
458    fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64;
459    #[link_name = "llvm.x86.avx512.vpermi2var.qi.256"]
460    fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32;
461    #[link_name = "llvm.x86.avx512.vpermi2var.qi.128"]
462    fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16;
463
464    #[link_name = "llvm.x86.avx512.permvar.qi.512"]
465    fn vpermb(a: i8x64, idx: i8x64) -> i8x64;
466    #[link_name = "llvm.x86.avx512.permvar.qi.256"]
467    fn vpermb256(a: i8x32, idx: i8x32) -> i8x32;
468    #[link_name = "llvm.x86.avx512.permvar.qi.128"]
469    fn vpermb128(a: i8x16, idx: i8x16) -> i8x16;
470
471    #[link_name = "llvm.x86.avx512.pmultishift.qb.512"]
472    fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64;
473    #[link_name = "llvm.x86.avx512.pmultishift.qb.256"]
474    fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32;
475    #[link_name = "llvm.x86.avx512.pmultishift.qb.128"]
476    fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16;
477}
478
479#[cfg(test)]
480mod tests {
481
482    use stdarch_test::simd_test;
483
484    use crate::core_arch::x86::*;
485
486    #[simd_test(enable = "avx512vbmi")]
487    unsafe fn test_mm512_permutex2var_epi8() {
488        #[rustfmt::skip]
489        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
490                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
491                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
492                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
493        #[rustfmt::skip]
494        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
495                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
496                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
497                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
498        let b = _mm512_set1_epi8(100);
499        let r = _mm512_permutex2var_epi8(a, idx, b);
500        #[rustfmt::skip]
501        let e = _mm512_set_epi8(
502            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
503            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
504            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
505            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
506        );
507        assert_eq_m512i(r, e);
508    }
509
510    #[simd_test(enable = "avx512vbmi")]
511    unsafe fn test_mm512_mask_permutex2var_epi8() {
512        #[rustfmt::skip]
513        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
514                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
515                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
516                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
517        #[rustfmt::skip]
518        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
519                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
520                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
521                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
522        let b = _mm512_set1_epi8(100);
523        let r = _mm512_mask_permutex2var_epi8(a, 0, idx, b);
524        assert_eq_m512i(r, a);
525        let r = _mm512_mask_permutex2var_epi8(
526            a,
527            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
528            idx,
529            b,
530        );
531        #[rustfmt::skip]
532        let e = _mm512_set_epi8(
533            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
534            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
535            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
536            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
537        );
538        assert_eq_m512i(r, e);
539    }
540
541    #[simd_test(enable = "avx512vbmi")]
542    unsafe fn test_mm512_maskz_permutex2var_epi8() {
543        #[rustfmt::skip]
544        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
545                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
546                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
547                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
548        #[rustfmt::skip]
549        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
550                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
551                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
552                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
553        let b = _mm512_set1_epi8(100);
554        let r = _mm512_maskz_permutex2var_epi8(0, a, idx, b);
555        assert_eq_m512i(r, _mm512_setzero_si512());
556        let r = _mm512_maskz_permutex2var_epi8(
557            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
558            a,
559            idx,
560            b,
561        );
562        #[rustfmt::skip]
563        let e = _mm512_set_epi8(
564            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
565            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
566            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
567            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
568        );
569        assert_eq_m512i(r, e);
570    }
571
572    #[simd_test(enable = "avx512vbmi")]
573    unsafe fn test_mm512_mask2_permutex2var_epi8() {
574        #[rustfmt::skip]
575        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
576                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
577                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
578                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
579        #[rustfmt::skip]
580        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
581                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
582                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
583                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
584        let b = _mm512_set1_epi8(100);
585        let r = _mm512_mask2_permutex2var_epi8(a, idx, 0, b);
586        assert_eq_m512i(r, idx);
587        let r = _mm512_mask2_permutex2var_epi8(
588            a,
589            idx,
590            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
591            b,
592        );
593        #[rustfmt::skip]
594        let e = _mm512_set_epi8(
595            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
596            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
597            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
598            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
599        );
600        assert_eq_m512i(r, e);
601    }
602
603    #[simd_test(enable = "avx512vbmi,avx512vl")]
604    unsafe fn test_mm256_permutex2var_epi8() {
605        #[rustfmt::skip]
606        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
607                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
608        #[rustfmt::skip]
609        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
610                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
611        let b = _mm256_set1_epi8(100);
612        let r = _mm256_permutex2var_epi8(a, idx, b);
613        #[rustfmt::skip]
614        let e = _mm256_set_epi8(
615            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
616            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
617        );
618        assert_eq_m256i(r, e);
619    }
620
621    #[simd_test(enable = "avx512vbmi,avx512vl")]
622    unsafe fn test_mm256_mask_permutex2var_epi8() {
623        #[rustfmt::skip]
624        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
625                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
626        #[rustfmt::skip]
627        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
628                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
629        let b = _mm256_set1_epi8(100);
630        let r = _mm256_mask_permutex2var_epi8(a, 0, idx, b);
631        assert_eq_m256i(r, a);
632        let r = _mm256_mask_permutex2var_epi8(a, 0b11111111_11111111_11111111_11111111, idx, b);
633        #[rustfmt::skip]
634        let e = _mm256_set_epi8(
635            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
636            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
637        );
638        assert_eq_m256i(r, e);
639    }
640
641    #[simd_test(enable = "avx512vbmi,avx512vl")]
642    unsafe fn test_mm256_maskz_permutex2var_epi8() {
643        #[rustfmt::skip]
644        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
645                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
646        #[rustfmt::skip]
647        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
648                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
649        let b = _mm256_set1_epi8(100);
650        let r = _mm256_maskz_permutex2var_epi8(0, a, idx, b);
651        assert_eq_m256i(r, _mm256_setzero_si256());
652        let r = _mm256_maskz_permutex2var_epi8(0b11111111_11111111_11111111_11111111, a, idx, b);
653        #[rustfmt::skip]
654        let e = _mm256_set_epi8(
655            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
656            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
657        );
658        assert_eq_m256i(r, e);
659    }
660
661    #[simd_test(enable = "avx512vbmi,avx512vl")]
662    unsafe fn test_mm256_mask2_permutex2var_epi8() {
663        #[rustfmt::skip]
664        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
665                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
666        #[rustfmt::skip]
667        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
668                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
669        let b = _mm256_set1_epi8(100);
670        let r = _mm256_mask2_permutex2var_epi8(a, idx, 0, b);
671        assert_eq_m256i(r, idx);
672        let r = _mm256_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111_11111111_11111111, b);
673        #[rustfmt::skip]
674        let e = _mm256_set_epi8(
675            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
676            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
677        );
678        assert_eq_m256i(r, e);
679    }
680
681    #[simd_test(enable = "avx512vbmi,avx512vl")]
682    unsafe fn test_mm_permutex2var_epi8() {
683        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
684        #[rustfmt::skip]
685        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
686        let b = _mm_set1_epi8(100);
687        let r = _mm_permutex2var_epi8(a, idx, b);
688        let e = _mm_set_epi8(
689            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
690        );
691        assert_eq_m128i(r, e);
692    }
693
694    #[simd_test(enable = "avx512vbmi,avx512vl")]
695    unsafe fn test_mm_mask_permutex2var_epi8() {
696        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
697        #[rustfmt::skip]
698        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
699        let b = _mm_set1_epi8(100);
700        let r = _mm_mask_permutex2var_epi8(a, 0, idx, b);
701        assert_eq_m128i(r, a);
702        let r = _mm_mask_permutex2var_epi8(a, 0b11111111_11111111, idx, b);
703        let e = _mm_set_epi8(
704            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
705        );
706        assert_eq_m128i(r, e);
707    }
708
709    #[simd_test(enable = "avx512vbmi,avx512vl")]
710    unsafe fn test_mm_maskz_permutex2var_epi8() {
711        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
712        #[rustfmt::skip]
713        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
714        let b = _mm_set1_epi8(100);
715        let r = _mm_maskz_permutex2var_epi8(0, a, idx, b);
716        assert_eq_m128i(r, _mm_setzero_si128());
717        let r = _mm_maskz_permutex2var_epi8(0b11111111_11111111, a, idx, b);
718        let e = _mm_set_epi8(
719            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
720        );
721        assert_eq_m128i(r, e);
722    }
723
724    #[simd_test(enable = "avx512vbmi,avx512vl")]
725    unsafe fn test_mm_mask2_permutex2var_epi8() {
726        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
727        #[rustfmt::skip]
728        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
729        let b = _mm_set1_epi8(100);
730        let r = _mm_mask2_permutex2var_epi8(a, idx, 0, b);
731        assert_eq_m128i(r, idx);
732        let r = _mm_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111, b);
733        let e = _mm_set_epi8(
734            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
735        );
736        assert_eq_m128i(r, e);
737    }
738
739    #[simd_test(enable = "avx512vbmi")]
740    unsafe fn test_mm512_permutexvar_epi8() {
741        let idx = _mm512_set1_epi8(1);
742        #[rustfmt::skip]
743        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
744                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
745                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
746                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
747        let r = _mm512_permutexvar_epi8(idx, a);
748        let e = _mm512_set1_epi8(62);
749        assert_eq_m512i(r, e);
750    }
751
752    #[simd_test(enable = "avx512vbmi")]
753    unsafe fn test_mm512_mask_permutexvar_epi8() {
754        let idx = _mm512_set1_epi8(1);
755        #[rustfmt::skip]
756        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
757                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
758                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
759                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
760        let r = _mm512_mask_permutexvar_epi8(a, 0, idx, a);
761        assert_eq_m512i(r, a);
762        let r = _mm512_mask_permutexvar_epi8(
763            a,
764            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
765            idx,
766            a,
767        );
768        let e = _mm512_set1_epi8(62);
769        assert_eq_m512i(r, e);
770    }
771
772    #[simd_test(enable = "avx512vbmi")]
773    unsafe fn test_mm512_maskz_permutexvar_epi8() {
774        let idx = _mm512_set1_epi8(1);
775        #[rustfmt::skip]
776        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
777                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
778                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
779                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
780        let r = _mm512_maskz_permutexvar_epi8(0, idx, a);
781        assert_eq_m512i(r, _mm512_setzero_si512());
782        let r = _mm512_maskz_permutexvar_epi8(
783            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
784            idx,
785            a,
786        );
787        let e = _mm512_set1_epi8(62);
788        assert_eq_m512i(r, e);
789    }
790
791    #[simd_test(enable = "avx512vbmi,avx512vl")]
792    unsafe fn test_mm256_permutexvar_epi8() {
793        let idx = _mm256_set1_epi8(1);
794        #[rustfmt::skip]
795        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
796                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
797        let r = _mm256_permutexvar_epi8(idx, a);
798        let e = _mm256_set1_epi8(30);
799        assert_eq_m256i(r, e);
800    }
801
802    #[simd_test(enable = "avx512vbmi,avx512vl")]
803    unsafe fn test_mm256_mask_permutexvar_epi8() {
804        let idx = _mm256_set1_epi8(1);
805        #[rustfmt::skip]
806        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
807                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
808        let r = _mm256_mask_permutexvar_epi8(a, 0, idx, a);
809        assert_eq_m256i(r, a);
810        let r = _mm256_mask_permutexvar_epi8(a, 0b11111111_11111111_11111111_11111111, idx, a);
811        let e = _mm256_set1_epi8(30);
812        assert_eq_m256i(r, e);
813    }
814
815    #[simd_test(enable = "avx512vbmi,avx512vl")]
816    unsafe fn test_mm256_maskz_permutexvar_epi8() {
817        let idx = _mm256_set1_epi8(1);
818        #[rustfmt::skip]
819        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
820                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
821        let r = _mm256_maskz_permutexvar_epi8(0, idx, a);
822        assert_eq_m256i(r, _mm256_setzero_si256());
823        let r = _mm256_maskz_permutexvar_epi8(0b11111111_11111111_11111111_11111111, idx, a);
824        let e = _mm256_set1_epi8(30);
825        assert_eq_m256i(r, e);
826    }
827
828    #[simd_test(enable = "avx512vbmi,avx512vl")]
829    unsafe fn test_mm_permutexvar_epi8() {
830        let idx = _mm_set1_epi8(1);
831        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
832        let r = _mm_permutexvar_epi8(idx, a);
833        let e = _mm_set1_epi8(14);
834        assert_eq_m128i(r, e);
835    }
836
837    #[simd_test(enable = "avx512vbmi,avx512vl")]
838    unsafe fn test_mm_mask_permutexvar_epi8() {
839        let idx = _mm_set1_epi8(1);
840        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
841        let r = _mm_mask_permutexvar_epi8(a, 0, idx, a);
842        assert_eq_m128i(r, a);
843        let r = _mm_mask_permutexvar_epi8(a, 0b11111111_11111111, idx, a);
844        let e = _mm_set1_epi8(14);
845        assert_eq_m128i(r, e);
846    }
847
848    #[simd_test(enable = "avx512vbmi,avx512vl")]
849    unsafe fn test_mm_maskz_permutexvar_epi8() {
850        let idx = _mm_set1_epi8(1);
851        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
852        let r = _mm_maskz_permutexvar_epi8(0, idx, a);
853        assert_eq_m128i(r, _mm_setzero_si128());
854        let r = _mm_maskz_permutexvar_epi8(0b11111111_11111111, idx, a);
855        let e = _mm_set1_epi8(14);
856        assert_eq_m128i(r, e);
857    }
858
859    #[simd_test(enable = "avx512vbmi")]
860    unsafe fn test_mm512_multishift_epi64_epi8() {
861        let a = _mm512_set1_epi8(1);
862        let b = _mm512_set1_epi8(1);
863        let r = _mm512_multishift_epi64_epi8(a, b);
864        let e = _mm512_set1_epi8(1 << 7);
865        assert_eq_m512i(r, e);
866    }
867
868    #[simd_test(enable = "avx512vbmi")]
869    unsafe fn test_mm512_mask_multishift_epi64_epi8() {
870        let a = _mm512_set1_epi8(1);
871        let b = _mm512_set1_epi8(1);
872        let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b);
873        assert_eq_m512i(r, a);
874        let r = _mm512_mask_multishift_epi64_epi8(
875            a,
876            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
877            a,
878            b,
879        );
880        let e = _mm512_set1_epi8(1 << 7);
881        assert_eq_m512i(r, e);
882    }
883
884    #[simd_test(enable = "avx512vbmi")]
885    unsafe fn test_mm512_maskz_multishift_epi64_epi8() {
886        let a = _mm512_set1_epi8(1);
887        let b = _mm512_set1_epi8(1);
888        let r = _mm512_maskz_multishift_epi64_epi8(0, a, b);
889        assert_eq_m512i(r, _mm512_setzero_si512());
890        let r = _mm512_maskz_multishift_epi64_epi8(
891            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
892            a,
893            b,
894        );
895        let e = _mm512_set1_epi8(1 << 7);
896        assert_eq_m512i(r, e);
897    }
898
899    #[simd_test(enable = "avx512vbmi,avx512vl")]
900    unsafe fn test_mm256_multishift_epi64_epi8() {
901        let a = _mm256_set1_epi8(1);
902        let b = _mm256_set1_epi8(1);
903        let r = _mm256_multishift_epi64_epi8(a, b);
904        let e = _mm256_set1_epi8(1 << 7);
905        assert_eq_m256i(r, e);
906    }
907
908    #[simd_test(enable = "avx512vbmi,avx512vl")]
909    unsafe fn test_mm256_mask_multishift_epi64_epi8() {
910        let a = _mm256_set1_epi8(1);
911        let b = _mm256_set1_epi8(1);
912        let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b);
913        assert_eq_m256i(r, a);
914        let r = _mm256_mask_multishift_epi64_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
915        let e = _mm256_set1_epi8(1 << 7);
916        assert_eq_m256i(r, e);
917    }
918
919    #[simd_test(enable = "avx512vbmi,avx512vl")]
920    unsafe fn test_mm256_maskz_multishift_epi64_epi8() {
921        let a = _mm256_set1_epi8(1);
922        let b = _mm256_set1_epi8(1);
923        let r = _mm256_maskz_multishift_epi64_epi8(0, a, b);
924        assert_eq_m256i(r, _mm256_setzero_si256());
925        let r = _mm256_maskz_multishift_epi64_epi8(0b11111111_11111111_11111111_11111111, a, b);
926        let e = _mm256_set1_epi8(1 << 7);
927        assert_eq_m256i(r, e);
928    }
929
930    #[simd_test(enable = "avx512vbmi,avx512vl")]
931    unsafe fn test_mm_multishift_epi64_epi8() {
932        let a = _mm_set1_epi8(1);
933        let b = _mm_set1_epi8(1);
934        let r = _mm_multishift_epi64_epi8(a, b);
935        let e = _mm_set1_epi8(1 << 7);
936        assert_eq_m128i(r, e);
937    }
938
939    #[simd_test(enable = "avx512vbmi,avx512vl")]
940    unsafe fn test_mm_mask_multishift_epi64_epi8() {
941        let a = _mm_set1_epi8(1);
942        let b = _mm_set1_epi8(1);
943        let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b);
944        assert_eq_m128i(r, a);
945        let r = _mm_mask_multishift_epi64_epi8(a, 0b11111111_11111111, a, b);
946        let e = _mm_set1_epi8(1 << 7);
947        assert_eq_m128i(r, e);
948    }
949
950    #[simd_test(enable = "avx512vbmi,avx512vl")]
951    unsafe fn test_mm_maskz_multishift_epi64_epi8() {
952        let a = _mm_set1_epi8(1);
953        let b = _mm_set1_epi8(1);
954        let r = _mm_maskz_multishift_epi64_epi8(0, a, b);
955        assert_eq_m128i(r, _mm_setzero_si128());
956        let r = _mm_maskz_multishift_epi64_epi8(0b11111111_11111111, a, b);
957        let e = _mm_set1_epi8(1 << 7);
958        assert_eq_m128i(r, e);
959    }
960}