core/stdarch/crates/core_arch/src/x86/
avx512vbmi.rs

1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi8&expand=4262)
10#[inline]
11#[target_feature(enable = "avx512vbmi")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
14pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
15    transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64()))
16}
17
18/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi8&expand=4259)
21#[inline]
22#[target_feature(enable = "avx512vbmi")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpermt2b))]
25pub unsafe fn _mm512_mask_permutex2var_epi8(
26    a: __m512i,
27    k: __mmask64,
28    idx: __m512i,
29    b: __m512i,
30) -> __m512i {
31    let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
32    transmute(simd_select_bitmask(k, permute, a.as_i8x64()))
33}
34
35/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi8&expand=4261)
38#[inline]
39#[target_feature(enable = "avx512vbmi")]
40#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
42pub unsafe fn _mm512_maskz_permutex2var_epi8(
43    k: __mmask64,
44    a: __m512i,
45    idx: __m512i,
46    b: __m512i,
47) -> __m512i {
48    let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
49    transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
50}
51
52/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
53///
54/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi8&expand=4260)
55#[inline]
56#[target_feature(enable = "avx512vbmi")]
57#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
58#[cfg_attr(test, assert_instr(vpermi2b))]
59pub unsafe fn _mm512_mask2_permutex2var_epi8(
60    a: __m512i,
61    idx: __m512i,
62    k: __mmask64,
63    b: __m512i,
64) -> __m512i {
65    let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
66    transmute(simd_select_bitmask(k, permute, idx.as_i8x64()))
67}
68
69/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
70///
71/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi8&expand=4258)
72#[inline]
73#[target_feature(enable = "avx512vbmi,avx512vl")]
74#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
75#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
76pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
77    transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32()))
78}
79
80/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
81///
82/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi8&expand=4255)
83#[inline]
84#[target_feature(enable = "avx512vbmi,avx512vl")]
85#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
86#[cfg_attr(test, assert_instr(vpermt2b))]
87pub unsafe fn _mm256_mask_permutex2var_epi8(
88    a: __m256i,
89    k: __mmask32,
90    idx: __m256i,
91    b: __m256i,
92) -> __m256i {
93    let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
94    transmute(simd_select_bitmask(k, permute, a.as_i8x32()))
95}
96
97/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
98///
99/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi8&expand=4257)
100#[inline]
101#[target_feature(enable = "avx512vbmi,avx512vl")]
102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
103#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
104pub unsafe fn _mm256_maskz_permutex2var_epi8(
105    k: __mmask32,
106    a: __m256i,
107    idx: __m256i,
108    b: __m256i,
109) -> __m256i {
110    let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
111    transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
112}
113
114/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
115///
116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi8&expand=4256)
117#[inline]
118#[target_feature(enable = "avx512vbmi,avx512vl")]
119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
120#[cfg_attr(test, assert_instr(vpermi2b))]
121pub unsafe fn _mm256_mask2_permutex2var_epi8(
122    a: __m256i,
123    idx: __m256i,
124    k: __mmask32,
125    b: __m256i,
126) -> __m256i {
127    let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
128    transmute(simd_select_bitmask(k, permute, idx.as_i8x32()))
129}
130
131/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi8&expand=4254)
134#[inline]
135#[target_feature(enable = "avx512vbmi,avx512vl")]
136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
137#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
138pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
139    transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16()))
140}
141
142/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
143///
144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi8&expand=4251)
145#[inline]
146#[target_feature(enable = "avx512vbmi,avx512vl")]
147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
148#[cfg_attr(test, assert_instr(vpermt2b))]
149pub unsafe fn _mm_mask_permutex2var_epi8(
150    a: __m128i,
151    k: __mmask16,
152    idx: __m128i,
153    b: __m128i,
154) -> __m128i {
155    let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
156    transmute(simd_select_bitmask(k, permute, a.as_i8x16()))
157}
158
159/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi8&expand=4253)
162#[inline]
163#[target_feature(enable = "avx512vbmi,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
166pub unsafe fn _mm_maskz_permutex2var_epi8(
167    k: __mmask16,
168    a: __m128i,
169    idx: __m128i,
170    b: __m128i,
171) -> __m128i {
172    let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
173    transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
174}
175
176/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi8&expand=4252)
179#[inline]
180#[target_feature(enable = "avx512vbmi,avx512vl")]
181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
182#[cfg_attr(test, assert_instr(vpermi2b))]
183pub unsafe fn _mm_mask2_permutex2var_epi8(
184    a: __m128i,
185    idx: __m128i,
186    k: __mmask16,
187    b: __m128i,
188) -> __m128i {
189    let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
190    transmute(simd_select_bitmask(k, permute, idx.as_i8x16()))
191}
192
193/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
194///
195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi8&expand=4316)
196#[inline]
197#[target_feature(enable = "avx512vbmi")]
198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
199#[cfg_attr(test, assert_instr(vpermb))]
200pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
201    transmute(vpermb(a.as_i8x64(), idx.as_i8x64()))
202}
203
204/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
205///
206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi8&expand=4314)
207#[inline]
208#[target_feature(enable = "avx512vbmi")]
209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
210#[cfg_attr(test, assert_instr(vpermb))]
211pub unsafe fn _mm512_mask_permutexvar_epi8(
212    src: __m512i,
213    k: __mmask64,
214    idx: __m512i,
215    a: __m512i,
216) -> __m512i {
217    let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
218    transmute(simd_select_bitmask(k, permute, src.as_i8x64()))
219}
220
221/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
222///
223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi8&expand=4315)
224#[inline]
225#[target_feature(enable = "avx512vbmi")]
226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
227#[cfg_attr(test, assert_instr(vpermb))]
228pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
229    let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
230    transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
231}
232
233/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
234///
235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi8&expand=4313)
236#[inline]
237#[target_feature(enable = "avx512vbmi,avx512vl")]
238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
239#[cfg_attr(test, assert_instr(vpermb))]
240pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
241    transmute(vpermb256(a.as_i8x32(), idx.as_i8x32()))
242}
243
244/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi8&expand=4311)
247#[inline]
248#[target_feature(enable = "avx512vbmi,avx512vl")]
249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
250#[cfg_attr(test, assert_instr(vpermb))]
251pub unsafe fn _mm256_mask_permutexvar_epi8(
252    src: __m256i,
253    k: __mmask32,
254    idx: __m256i,
255    a: __m256i,
256) -> __m256i {
257    let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
258    transmute(simd_select_bitmask(k, permute, src.as_i8x32()))
259}
260
261/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi8&expand=4312)
264#[inline]
265#[target_feature(enable = "avx512vbmi,avx512vl")]
266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
267#[cfg_attr(test, assert_instr(vpermb))]
268pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
269    let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
270    transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
271}
272
273/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
274///
275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi8&expand=4310)
276#[inline]
277#[target_feature(enable = "avx512vbmi,avx512vl")]
278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
279#[cfg_attr(test, assert_instr(vpermb))]
280pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
281    transmute(vpermb128(a.as_i8x16(), idx.as_i8x16()))
282}
283
284/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
285///
286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi8&expand=4308)
287#[inline]
288#[target_feature(enable = "avx512vbmi,avx512vl")]
289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
290#[cfg_attr(test, assert_instr(vpermb))]
291pub unsafe fn _mm_mask_permutexvar_epi8(
292    src: __m128i,
293    k: __mmask16,
294    idx: __m128i,
295    a: __m128i,
296) -> __m128i {
297    let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
298    transmute(simd_select_bitmask(k, permute, src.as_i8x16()))
299}
300
301/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
302///
303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi8&expand=4309)
304#[inline]
305#[target_feature(enable = "avx512vbmi,avx512vl")]
306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
307#[cfg_attr(test, assert_instr(vpermb))]
308pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
309    let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
310    transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
311}
312
313/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
314///
315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_multishift_epi64_epi8&expand=4026)
316#[inline]
317#[target_feature(enable = "avx512vbmi")]
318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
319#[cfg_attr(test, assert_instr(vpmultishiftqb))]
320pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
321    transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64()))
322}
323
324/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_multishift_epi64_epi8&expand=4024)
327#[inline]
328#[target_feature(enable = "avx512vbmi")]
329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
330#[cfg_attr(test, assert_instr(vpmultishiftqb))]
331pub unsafe fn _mm512_mask_multishift_epi64_epi8(
332    src: __m512i,
333    k: __mmask64,
334    a: __m512i,
335    b: __m512i,
336) -> __m512i {
337    let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
338    transmute(simd_select_bitmask(k, multishift, src.as_i8x64()))
339}
340
341/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_multishift_epi64_epi8&expand=4025)
344#[inline]
345#[target_feature(enable = "avx512vbmi")]
346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
347#[cfg_attr(test, assert_instr(vpmultishiftqb))]
348pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
349    let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
350    transmute(simd_select_bitmask(k, multishift, i8x64::ZERO))
351}
352
353/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
354///
355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_multishift_epi64_epi8&expand=4023)
356#[inline]
357#[target_feature(enable = "avx512vbmi,avx512vl")]
358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
359#[cfg_attr(test, assert_instr(vpmultishiftqb))]
360pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
361    transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32()))
362}
363
364/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
365///
366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_multishift_epi64_epi8&expand=4021)
367#[inline]
368#[target_feature(enable = "avx512vbmi,avx512vl")]
369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
370#[cfg_attr(test, assert_instr(vpmultishiftqb))]
371pub unsafe fn _mm256_mask_multishift_epi64_epi8(
372    src: __m256i,
373    k: __mmask32,
374    a: __m256i,
375    b: __m256i,
376) -> __m256i {
377    let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
378    transmute(simd_select_bitmask(k, multishift, src.as_i8x32()))
379}
380
381/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_multishift_epi64_epi8&expand=4022)
384#[inline]
385#[target_feature(enable = "avx512vbmi,avx512vl")]
386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
387#[cfg_attr(test, assert_instr(vpmultishiftqb))]
388pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
389    let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
390    transmute(simd_select_bitmask(k, multishift, i8x32::ZERO))
391}
392
393/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
394///
395/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_multishift_epi64_epi8&expand=4020)
396#[inline]
397#[target_feature(enable = "avx512vbmi,avx512vl")]
398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
399#[cfg_attr(test, assert_instr(vpmultishiftqb))]
400pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
401    transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16()))
402}
403
404/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
405///
406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_multishift_epi64_epi8&expand=4018)
407#[inline]
408#[target_feature(enable = "avx512vbmi,avx512vl")]
409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
410#[cfg_attr(test, assert_instr(vpmultishiftqb))]
411pub unsafe fn _mm_mask_multishift_epi64_epi8(
412    src: __m128i,
413    k: __mmask16,
414    a: __m128i,
415    b: __m128i,
416) -> __m128i {
417    let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
418    transmute(simd_select_bitmask(k, multishift, src.as_i8x16()))
419}
420
421/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
422///
423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_multishift_epi64_epi8&expand=4019)
424#[inline]
425#[target_feature(enable = "avx512vbmi,avx512vl")]
426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
427#[cfg_attr(test, assert_instr(vpmultishiftqb))]
428pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
429    let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
430    transmute(simd_select_bitmask(k, multishift, i8x16::ZERO))
431}
432
433#[allow(improper_ctypes)]
434extern "C" {
435    #[link_name = "llvm.x86.avx512.vpermi2var.qi.512"]
436    fn vpermi2b(a: i8x64, idx: i8x64, b: i8x64) -> i8x64;
437    #[link_name = "llvm.x86.avx512.vpermi2var.qi.256"]
438    fn vpermi2b256(a: i8x32, idx: i8x32, b: i8x32) -> i8x32;
439    #[link_name = "llvm.x86.avx512.vpermi2var.qi.128"]
440    fn vpermi2b128(a: i8x16, idx: i8x16, b: i8x16) -> i8x16;
441
442    #[link_name = "llvm.x86.avx512.permvar.qi.512"]
443    fn vpermb(a: i8x64, idx: i8x64) -> i8x64;
444    #[link_name = "llvm.x86.avx512.permvar.qi.256"]
445    fn vpermb256(a: i8x32, idx: i8x32) -> i8x32;
446    #[link_name = "llvm.x86.avx512.permvar.qi.128"]
447    fn vpermb128(a: i8x16, idx: i8x16) -> i8x16;
448
449    #[link_name = "llvm.x86.avx512.pmultishift.qb.512"]
450    fn vpmultishiftqb(a: i8x64, b: i8x64) -> i8x64;
451    #[link_name = "llvm.x86.avx512.pmultishift.qb.256"]
452    fn vpmultishiftqb256(a: i8x32, b: i8x32) -> i8x32;
453    #[link_name = "llvm.x86.avx512.pmultishift.qb.128"]
454    fn vpmultishiftqb128(a: i8x16, b: i8x16) -> i8x16;
455}
456
457#[cfg(test)]
458mod tests {
459
460    use stdarch_test::simd_test;
461
462    use crate::core_arch::x86::*;
463
464    #[simd_test(enable = "avx512vbmi")]
465    unsafe fn test_mm512_permutex2var_epi8() {
466        #[rustfmt::skip]
467        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
468                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
469                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
470                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
471        #[rustfmt::skip]
472        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
473                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
474                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
475                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
476        let b = _mm512_set1_epi8(100);
477        let r = _mm512_permutex2var_epi8(a, idx, b);
478        #[rustfmt::skip]
479        let e = _mm512_set_epi8(
480            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
481            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
482            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
483            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
484        );
485        assert_eq_m512i(r, e);
486    }
487
488    #[simd_test(enable = "avx512vbmi")]
489    unsafe fn test_mm512_mask_permutex2var_epi8() {
490        #[rustfmt::skip]
491        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
492                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
493                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
494                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
495        #[rustfmt::skip]
496        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
497                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
498                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
499                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
500        let b = _mm512_set1_epi8(100);
501        let r = _mm512_mask_permutex2var_epi8(a, 0, idx, b);
502        assert_eq_m512i(r, a);
503        let r = _mm512_mask_permutex2var_epi8(
504            a,
505            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
506            idx,
507            b,
508        );
509        #[rustfmt::skip]
510        let e = _mm512_set_epi8(
511            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
512            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
513            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
514            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
515        );
516        assert_eq_m512i(r, e);
517    }
518
519    #[simd_test(enable = "avx512vbmi")]
520    unsafe fn test_mm512_maskz_permutex2var_epi8() {
521        #[rustfmt::skip]
522        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
523                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
524                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
525                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
526        #[rustfmt::skip]
527        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
528                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
529                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
530                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
531        let b = _mm512_set1_epi8(100);
532        let r = _mm512_maskz_permutex2var_epi8(0, a, idx, b);
533        assert_eq_m512i(r, _mm512_setzero_si512());
534        let r = _mm512_maskz_permutex2var_epi8(
535            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
536            a,
537            idx,
538            b,
539        );
540        #[rustfmt::skip]
541        let e = _mm512_set_epi8(
542            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
543            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
544            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
545            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
546        );
547        assert_eq_m512i(r, e);
548    }
549
550    #[simd_test(enable = "avx512vbmi")]
551    unsafe fn test_mm512_mask2_permutex2var_epi8() {
552        #[rustfmt::skip]
553        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
554                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
555                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
556                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
557        #[rustfmt::skip]
558        let idx = _mm512_set_epi8(1,  1<<6, 2,  1<<6, 3,  1<<6, 4,  1<<6, 5,  1<<6, 6,  1<<6, 7,  1<<6, 8,  1<<6,
559                                  9,  1<<6, 10, 1<<6, 11, 1<<6, 12, 1<<6, 13, 1<<6, 14, 1<<6, 15, 1<<6, 16, 1<<6,
560                                  17, 1<<6, 18, 1<<6, 19, 1<<6, 20, 1<<6, 21, 1<<6, 22, 1<<6, 23, 1<<6, 24, 1<<6,
561                                  25, 1<<6, 26, 1<<6, 27, 1<<6, 28, 1<<6, 29, 1<<6, 30, 1<<6, 31, 1<<6, 32, 1<<6);
562        let b = _mm512_set1_epi8(100);
563        let r = _mm512_mask2_permutex2var_epi8(a, idx, 0, b);
564        assert_eq_m512i(r, idx);
565        let r = _mm512_mask2_permutex2var_epi8(
566            a,
567            idx,
568            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
569            b,
570        );
571        #[rustfmt::skip]
572        let e = _mm512_set_epi8(
573            62, 100, 61, 100, 60, 100, 59, 100, 58, 100, 57, 100, 56, 100, 55, 100,
574            54, 100, 53, 100, 52, 100, 51, 100, 50, 100, 49, 100, 48, 100, 47, 100,
575            46, 100, 45, 100, 44, 100, 43, 100, 42, 100, 41, 100, 40, 100, 39, 100,
576            38, 100, 37, 100, 36, 100, 35, 100, 34, 100, 33, 100, 32, 100, 31, 100,
577        );
578        assert_eq_m512i(r, e);
579    }
580
581    #[simd_test(enable = "avx512vbmi,avx512vl")]
582    unsafe fn test_mm256_permutex2var_epi8() {
583        #[rustfmt::skip]
584        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
585                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
586        #[rustfmt::skip]
587        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
588                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
589        let b = _mm256_set1_epi8(100);
590        let r = _mm256_permutex2var_epi8(a, idx, b);
591        #[rustfmt::skip]
592        let e = _mm256_set_epi8(
593            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
594            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
595        );
596        assert_eq_m256i(r, e);
597    }
598
599    #[simd_test(enable = "avx512vbmi,avx512vl")]
600    unsafe fn test_mm256_mask_permutex2var_epi8() {
601        #[rustfmt::skip]
602        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
603                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
604        #[rustfmt::skip]
605        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
606                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
607        let b = _mm256_set1_epi8(100);
608        let r = _mm256_mask_permutex2var_epi8(a, 0, idx, b);
609        assert_eq_m256i(r, a);
610        let r = _mm256_mask_permutex2var_epi8(a, 0b11111111_11111111_11111111_11111111, idx, b);
611        #[rustfmt::skip]
612        let e = _mm256_set_epi8(
613            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
614            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
615        );
616        assert_eq_m256i(r, e);
617    }
618
619    #[simd_test(enable = "avx512vbmi,avx512vl")]
620    unsafe fn test_mm256_maskz_permutex2var_epi8() {
621        #[rustfmt::skip]
622        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
623                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
624        #[rustfmt::skip]
625        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
626                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
627        let b = _mm256_set1_epi8(100);
628        let r = _mm256_maskz_permutex2var_epi8(0, a, idx, b);
629        assert_eq_m256i(r, _mm256_setzero_si256());
630        let r = _mm256_maskz_permutex2var_epi8(0b11111111_11111111_11111111_11111111, a, idx, b);
631        #[rustfmt::skip]
632        let e = _mm256_set_epi8(
633            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
634            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
635        );
636        assert_eq_m256i(r, e);
637    }
638
639    #[simd_test(enable = "avx512vbmi,avx512vl")]
640    unsafe fn test_mm256_mask2_permutex2var_epi8() {
641        #[rustfmt::skip]
642        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
643                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
644        #[rustfmt::skip]
645        let idx = _mm256_set_epi8(1,  1<<5, 2,  1<<5, 3,  1<<5, 4,  1<<5, 5,  1<<5, 6,  1<<5, 7,  1<<5, 8,  1<<5,
646                                  9,  1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
647        let b = _mm256_set1_epi8(100);
648        let r = _mm256_mask2_permutex2var_epi8(a, idx, 0, b);
649        assert_eq_m256i(r, idx);
650        let r = _mm256_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111_11111111_11111111, b);
651        #[rustfmt::skip]
652        let e = _mm256_set_epi8(
653            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
654            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
655        );
656        assert_eq_m256i(r, e);
657    }
658
659    #[simd_test(enable = "avx512vbmi,avx512vl")]
660    unsafe fn test_mm_permutex2var_epi8() {
661        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
662        #[rustfmt::skip]
663        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
664        let b = _mm_set1_epi8(100);
665        let r = _mm_permutex2var_epi8(a, idx, b);
666        let e = _mm_set_epi8(
667            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
668        );
669        assert_eq_m128i(r, e);
670    }
671
672    #[simd_test(enable = "avx512vbmi,avx512vl")]
673    unsafe fn test_mm_mask_permutex2var_epi8() {
674        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
675        #[rustfmt::skip]
676        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
677        let b = _mm_set1_epi8(100);
678        let r = _mm_mask_permutex2var_epi8(a, 0, idx, b);
679        assert_eq_m128i(r, a);
680        let r = _mm_mask_permutex2var_epi8(a, 0b11111111_11111111, idx, b);
681        let e = _mm_set_epi8(
682            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
683        );
684        assert_eq_m128i(r, e);
685    }
686
687    #[simd_test(enable = "avx512vbmi,avx512vl")]
688    unsafe fn test_mm_maskz_permutex2var_epi8() {
689        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
690        #[rustfmt::skip]
691        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
692        let b = _mm_set1_epi8(100);
693        let r = _mm_maskz_permutex2var_epi8(0, a, idx, b);
694        assert_eq_m128i(r, _mm_setzero_si128());
695        let r = _mm_maskz_permutex2var_epi8(0b11111111_11111111, a, idx, b);
696        let e = _mm_set_epi8(
697            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
698        );
699        assert_eq_m128i(r, e);
700    }
701
702    #[simd_test(enable = "avx512vbmi,avx512vl")]
703    unsafe fn test_mm_mask2_permutex2var_epi8() {
704        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
705        #[rustfmt::skip]
706        let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
707        let b = _mm_set1_epi8(100);
708        let r = _mm_mask2_permutex2var_epi8(a, idx, 0, b);
709        assert_eq_m128i(r, idx);
710        let r = _mm_mask2_permutex2var_epi8(a, idx, 0b11111111_11111111, b);
711        let e = _mm_set_epi8(
712            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
713        );
714        assert_eq_m128i(r, e);
715    }
716
717    #[simd_test(enable = "avx512vbmi")]
718    unsafe fn test_mm512_permutexvar_epi8() {
719        let idx = _mm512_set1_epi8(1);
720        #[rustfmt::skip]
721        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
722                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
723                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
724                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
725        let r = _mm512_permutexvar_epi8(idx, a);
726        let e = _mm512_set1_epi8(62);
727        assert_eq_m512i(r, e);
728    }
729
730    #[simd_test(enable = "avx512vbmi")]
731    unsafe fn test_mm512_mask_permutexvar_epi8() {
732        let idx = _mm512_set1_epi8(1);
733        #[rustfmt::skip]
734        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
735                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
736                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
737                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
738        let r = _mm512_mask_permutexvar_epi8(a, 0, idx, a);
739        assert_eq_m512i(r, a);
740        let r = _mm512_mask_permutexvar_epi8(
741            a,
742            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
743            idx,
744            a,
745        );
746        let e = _mm512_set1_epi8(62);
747        assert_eq_m512i(r, e);
748    }
749
750    #[simd_test(enable = "avx512vbmi")]
751    unsafe fn test_mm512_maskz_permutexvar_epi8() {
752        let idx = _mm512_set1_epi8(1);
753        #[rustfmt::skip]
754        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
755                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
756                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
757                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
758        let r = _mm512_maskz_permutexvar_epi8(0, idx, a);
759        assert_eq_m512i(r, _mm512_setzero_si512());
760        let r = _mm512_maskz_permutexvar_epi8(
761            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
762            idx,
763            a,
764        );
765        let e = _mm512_set1_epi8(62);
766        assert_eq_m512i(r, e);
767    }
768
769    #[simd_test(enable = "avx512vbmi,avx512vl")]
770    unsafe fn test_mm256_permutexvar_epi8() {
771        let idx = _mm256_set1_epi8(1);
772        #[rustfmt::skip]
773        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
774                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
775        let r = _mm256_permutexvar_epi8(idx, a);
776        let e = _mm256_set1_epi8(30);
777        assert_eq_m256i(r, e);
778    }
779
780    #[simd_test(enable = "avx512vbmi,avx512vl")]
781    unsafe fn test_mm256_mask_permutexvar_epi8() {
782        let idx = _mm256_set1_epi8(1);
783        #[rustfmt::skip]
784        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
785                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
786        let r = _mm256_mask_permutexvar_epi8(a, 0, idx, a);
787        assert_eq_m256i(r, a);
788        let r = _mm256_mask_permutexvar_epi8(a, 0b11111111_11111111_11111111_11111111, idx, a);
789        let e = _mm256_set1_epi8(30);
790        assert_eq_m256i(r, e);
791    }
792
793    #[simd_test(enable = "avx512vbmi,avx512vl")]
794    unsafe fn test_mm256_maskz_permutexvar_epi8() {
795        let idx = _mm256_set1_epi8(1);
796        #[rustfmt::skip]
797        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
798                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
799        let r = _mm256_maskz_permutexvar_epi8(0, idx, a);
800        assert_eq_m256i(r, _mm256_setzero_si256());
801        let r = _mm256_maskz_permutexvar_epi8(0b11111111_11111111_11111111_11111111, idx, a);
802        let e = _mm256_set1_epi8(30);
803        assert_eq_m256i(r, e);
804    }
805
806    #[simd_test(enable = "avx512vbmi,avx512vl")]
807    unsafe fn test_mm_permutexvar_epi8() {
808        let idx = _mm_set1_epi8(1);
809        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
810        let r = _mm_permutexvar_epi8(idx, a);
811        let e = _mm_set1_epi8(14);
812        assert_eq_m128i(r, e);
813    }
814
815    #[simd_test(enable = "avx512vbmi,avx512vl")]
816    unsafe fn test_mm_mask_permutexvar_epi8() {
817        let idx = _mm_set1_epi8(1);
818        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
819        let r = _mm_mask_permutexvar_epi8(a, 0, idx, a);
820        assert_eq_m128i(r, a);
821        let r = _mm_mask_permutexvar_epi8(a, 0b11111111_11111111, idx, a);
822        let e = _mm_set1_epi8(14);
823        assert_eq_m128i(r, e);
824    }
825
826    #[simd_test(enable = "avx512vbmi,avx512vl")]
827    unsafe fn test_mm_maskz_permutexvar_epi8() {
828        let idx = _mm_set1_epi8(1);
829        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
830        let r = _mm_maskz_permutexvar_epi8(0, idx, a);
831        assert_eq_m128i(r, _mm_setzero_si128());
832        let r = _mm_maskz_permutexvar_epi8(0b11111111_11111111, idx, a);
833        let e = _mm_set1_epi8(14);
834        assert_eq_m128i(r, e);
835    }
836
837    #[simd_test(enable = "avx512vbmi")]
838    unsafe fn test_mm512_multishift_epi64_epi8() {
839        let a = _mm512_set1_epi8(1);
840        let b = _mm512_set1_epi8(1);
841        let r = _mm512_multishift_epi64_epi8(a, b);
842        let e = _mm512_set1_epi8(1 << 7);
843        assert_eq_m512i(r, e);
844    }
845
846    #[simd_test(enable = "avx512vbmi")]
847    unsafe fn test_mm512_mask_multishift_epi64_epi8() {
848        let a = _mm512_set1_epi8(1);
849        let b = _mm512_set1_epi8(1);
850        let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b);
851        assert_eq_m512i(r, a);
852        let r = _mm512_mask_multishift_epi64_epi8(
853            a,
854            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
855            a,
856            b,
857        );
858        let e = _mm512_set1_epi8(1 << 7);
859        assert_eq_m512i(r, e);
860    }
861
862    #[simd_test(enable = "avx512vbmi")]
863    unsafe fn test_mm512_maskz_multishift_epi64_epi8() {
864        let a = _mm512_set1_epi8(1);
865        let b = _mm512_set1_epi8(1);
866        let r = _mm512_maskz_multishift_epi64_epi8(0, a, b);
867        assert_eq_m512i(r, _mm512_setzero_si512());
868        let r = _mm512_maskz_multishift_epi64_epi8(
869            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
870            a,
871            b,
872        );
873        let e = _mm512_set1_epi8(1 << 7);
874        assert_eq_m512i(r, e);
875    }
876
877    #[simd_test(enable = "avx512vbmi,avx512vl")]
878    unsafe fn test_mm256_multishift_epi64_epi8() {
879        let a = _mm256_set1_epi8(1);
880        let b = _mm256_set1_epi8(1);
881        let r = _mm256_multishift_epi64_epi8(a, b);
882        let e = _mm256_set1_epi8(1 << 7);
883        assert_eq_m256i(r, e);
884    }
885
886    #[simd_test(enable = "avx512vbmi,avx512vl")]
887    unsafe fn test_mm256_mask_multishift_epi64_epi8() {
888        let a = _mm256_set1_epi8(1);
889        let b = _mm256_set1_epi8(1);
890        let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b);
891        assert_eq_m256i(r, a);
892        let r = _mm256_mask_multishift_epi64_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
893        let e = _mm256_set1_epi8(1 << 7);
894        assert_eq_m256i(r, e);
895    }
896
897    #[simd_test(enable = "avx512vbmi,avx512vl")]
898    unsafe fn test_mm256_maskz_multishift_epi64_epi8() {
899        let a = _mm256_set1_epi8(1);
900        let b = _mm256_set1_epi8(1);
901        let r = _mm256_maskz_multishift_epi64_epi8(0, a, b);
902        assert_eq_m256i(r, _mm256_setzero_si256());
903        let r = _mm256_maskz_multishift_epi64_epi8(0b11111111_11111111_11111111_11111111, a, b);
904        let e = _mm256_set1_epi8(1 << 7);
905        assert_eq_m256i(r, e);
906    }
907
908    #[simd_test(enable = "avx512vbmi,avx512vl")]
909    unsafe fn test_mm_multishift_epi64_epi8() {
910        let a = _mm_set1_epi8(1);
911        let b = _mm_set1_epi8(1);
912        let r = _mm_multishift_epi64_epi8(a, b);
913        let e = _mm_set1_epi8(1 << 7);
914        assert_eq_m128i(r, e);
915    }
916
917    #[simd_test(enable = "avx512vbmi,avx512vl")]
918    unsafe fn test_mm_mask_multishift_epi64_epi8() {
919        let a = _mm_set1_epi8(1);
920        let b = _mm_set1_epi8(1);
921        let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b);
922        assert_eq_m128i(r, a);
923        let r = _mm_mask_multishift_epi64_epi8(a, 0b11111111_11111111, a, b);
924        let e = _mm_set1_epi8(1 << 7);
925        assert_eq_m128i(r, e);
926    }
927
928    #[simd_test(enable = "avx512vbmi,avx512vl")]
929    unsafe fn test_mm_maskz_multishift_epi64_epi8() {
930        let a = _mm_set1_epi8(1);
931        let b = _mm_set1_epi8(1);
932        let r = _mm_maskz_multishift_epi64_epi8(0, a, b);
933        assert_eq_m128i(r, _mm_setzero_si128());
934        let r = _mm_maskz_multishift_epi64_epi8(0b11111111_11111111, a, b);
935        let e = _mm_set1_epi8(1 << 7);
936        assert_eq_m128i(r, e);
937    }
938}