core/stdarch/crates/core_arch/src/x86/
avx512bitalg.rs

1//! Bit-oriented Algorithms (BITALG)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::simd::i16x8;
14use crate::core_arch::simd::i16x16;
15use crate::core_arch::simd::i16x32;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask8;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask32;
22use crate::core_arch::x86::__mmask64;
23use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
24use crate::mem::transmute;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[allow(improper_ctypes)]
30unsafe extern "C" {
31    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
32    fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
33    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
34    fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
35    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
36    fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
37}
38
39/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
42#[inline]
43#[target_feature(enable = "avx512bitalg")]
44#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
45#[cfg_attr(test, assert_instr(vpopcntw))]
46pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
47    unsafe { transmute(simd_ctpop(a.as_i16x32())) }
48}
49
50/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
51///
52/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
53/// Otherwise the computation result is written into the result.
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
56#[inline]
57#[target_feature(enable = "avx512bitalg")]
58#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
59#[cfg_attr(test, assert_instr(vpopcntw))]
60pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
61    unsafe {
62        transmute(simd_select_bitmask(
63            k,
64            simd_ctpop(a.as_i16x32()),
65            i16x32::ZERO,
66        ))
67    }
68}
69
70/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
71///
72/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
73/// Otherwise the computation result is written into the result.
74///
75/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
76#[inline]
77#[target_feature(enable = "avx512bitalg")]
78#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
79#[cfg_attr(test, assert_instr(vpopcntw))]
80pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
81    unsafe {
82        transmute(simd_select_bitmask(
83            k,
84            simd_ctpop(a.as_i16x32()),
85            src.as_i16x32(),
86        ))
87    }
88}
89
90/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
93#[inline]
94#[target_feature(enable = "avx512bitalg,avx512vl")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpopcntw))]
97pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
98    unsafe { transmute(simd_ctpop(a.as_i16x16())) }
99}
100
101/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
102///
103/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
104/// Otherwise the computation result is written into the result.
105///
106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
107#[inline]
108#[target_feature(enable = "avx512bitalg,avx512vl")]
109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
110#[cfg_attr(test, assert_instr(vpopcntw))]
111pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
112    unsafe {
113        transmute(simd_select_bitmask(
114            k,
115            simd_ctpop(a.as_i16x16()),
116            i16x16::ZERO,
117        ))
118    }
119}
120
121/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
122///
123/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
124/// Otherwise the computation result is written into the result.
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
127#[inline]
128#[target_feature(enable = "avx512bitalg,avx512vl")]
129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130#[cfg_attr(test, assert_instr(vpopcntw))]
131pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
132    unsafe {
133        transmute(simd_select_bitmask(
134            k,
135            simd_ctpop(a.as_i16x16()),
136            src.as_i16x16(),
137        ))
138    }
139}
140
141/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
142///
143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
144#[inline]
145#[target_feature(enable = "avx512bitalg,avx512vl")]
146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
147#[cfg_attr(test, assert_instr(vpopcntw))]
148pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
149    unsafe { transmute(simd_ctpop(a.as_i16x8())) }
150}
151
152/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
153///
154/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
155/// Otherwise the computation result is written into the result.
156///
157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
158#[inline]
159#[target_feature(enable = "avx512bitalg,avx512vl")]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161#[cfg_attr(test, assert_instr(vpopcntw))]
162pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
163    unsafe {
164        transmute(simd_select_bitmask(
165            k,
166            simd_ctpop(a.as_i16x8()),
167            i16x8::ZERO,
168        ))
169    }
170}
171
172/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
173///
174/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
175/// Otherwise the computation result is written into the result.
176///
177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
178#[inline]
179#[target_feature(enable = "avx512bitalg,avx512vl")]
180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
181#[cfg_attr(test, assert_instr(vpopcntw))]
182pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
183    unsafe {
184        transmute(simd_select_bitmask(
185            k,
186            simd_ctpop(a.as_i16x8()),
187            src.as_i16x8(),
188        ))
189    }
190}
191
192/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
195#[inline]
196#[target_feature(enable = "avx512bitalg")]
197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198#[cfg_attr(test, assert_instr(vpopcntb))]
199pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
200    unsafe { transmute(simd_ctpop(a.as_i8x64())) }
201}
202
203/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
204///
205/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
206/// Otherwise the computation result is written into the result.
207///
208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
209#[inline]
210#[target_feature(enable = "avx512bitalg")]
211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
212#[cfg_attr(test, assert_instr(vpopcntb))]
213pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
214    unsafe {
215        transmute(simd_select_bitmask(
216            k,
217            simd_ctpop(a.as_i8x64()),
218            i8x64::ZERO,
219        ))
220    }
221}
222
223/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
224///
225/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
226/// Otherwise the computation result is written into the result.
227///
228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
229#[inline]
230#[target_feature(enable = "avx512bitalg")]
231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
232#[cfg_attr(test, assert_instr(vpopcntb))]
233pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
234    unsafe {
235        transmute(simd_select_bitmask(
236            k,
237            simd_ctpop(a.as_i8x64()),
238            src.as_i8x64(),
239        ))
240    }
241}
242
243/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
244///
245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
246#[inline]
247#[target_feature(enable = "avx512bitalg,avx512vl")]
248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
249#[cfg_attr(test, assert_instr(vpopcntb))]
250pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
251    unsafe { transmute(simd_ctpop(a.as_i8x32())) }
252}
253
254/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
255///
256/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
257/// Otherwise the computation result is written into the result.
258///
259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
260#[inline]
261#[target_feature(enable = "avx512bitalg,avx512vl")]
262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
263#[cfg_attr(test, assert_instr(vpopcntb))]
264pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
265    unsafe {
266        transmute(simd_select_bitmask(
267            k,
268            simd_ctpop(a.as_i8x32()),
269            i8x32::ZERO,
270        ))
271    }
272}
273
274/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
275///
276/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
277/// Otherwise the computation result is written into the result.
278///
279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
280#[inline]
281#[target_feature(enable = "avx512bitalg,avx512vl")]
282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
283#[cfg_attr(test, assert_instr(vpopcntb))]
284pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
285    unsafe {
286        transmute(simd_select_bitmask(
287            k,
288            simd_ctpop(a.as_i8x32()),
289            src.as_i8x32(),
290        ))
291    }
292}
293
294/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
297#[inline]
298#[target_feature(enable = "avx512bitalg,avx512vl")]
299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
300#[cfg_attr(test, assert_instr(vpopcntb))]
301pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
302    unsafe { transmute(simd_ctpop(a.as_i8x16())) }
303}
304
305/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
306///
307/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
308/// Otherwise the computation result is written into the result.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
311#[inline]
312#[target_feature(enable = "avx512bitalg,avx512vl")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpopcntb))]
315pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
316    unsafe {
317        transmute(simd_select_bitmask(
318            k,
319            simd_ctpop(a.as_i8x16()),
320            i8x16::ZERO,
321        ))
322    }
323}
324
325/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
326///
327/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
328/// Otherwise the computation result is written into the result.
329///
330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
331#[inline]
332#[target_feature(enable = "avx512bitalg,avx512vl")]
333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
334#[cfg_attr(test, assert_instr(vpopcntb))]
335pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
336    unsafe {
337        transmute(simd_select_bitmask(
338            k,
339            simd_ctpop(a.as_i8x16()),
340            src.as_i8x16(),
341        ))
342    }
343}
344
345/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
346/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
347/// It then selects these bits and packs them into the output.
348///
349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
350#[inline]
351#[target_feature(enable = "avx512bitalg")]
352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
353#[cfg_attr(test, assert_instr(vpshufbitqmb))]
354pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
355    unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
356}
357
358/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
359/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
360/// It then selects these bits and packs them into the output.
361///
362/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
363/// Otherwise the computation result is written into the result.
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
366#[inline]
367#[target_feature(enable = "avx512bitalg")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369#[cfg_attr(test, assert_instr(vpshufbitqmb))]
370pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
371    unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
372}
373
374/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
375/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
376/// It then selects these bits and packs them into the output.
377///
378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
379#[inline]
380#[target_feature(enable = "avx512bitalg,avx512vl")]
381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
382#[cfg_attr(test, assert_instr(vpshufbitqmb))]
383pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
384    unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
385}
386
387/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
388/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
389/// It then selects these bits and packs them into the output.
390///
391/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
392/// Otherwise the computation result is written into the result.
393///
394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
395#[inline]
396#[target_feature(enable = "avx512bitalg,avx512vl")]
397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
398#[cfg_attr(test, assert_instr(vpshufbitqmb))]
399pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
400    unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
401}
402
403/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
404/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
405/// It then selects these bits and packs them into the output.
406///
407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
408#[inline]
409#[target_feature(enable = "avx512bitalg,avx512vl")]
410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
411#[cfg_attr(test, assert_instr(vpshufbitqmb))]
412pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
413    unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
414}
415
416/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
417/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
418/// It then selects these bits and packs them into the output.
419///
420/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
421/// Otherwise the computation result is written into the result.
422///
423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
424#[inline]
425#[target_feature(enable = "avx512bitalg,avx512vl")]
426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
427#[cfg_attr(test, assert_instr(vpshufbitqmb))]
428pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
429    unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
430}
431
432#[cfg(test)]
433mod tests {
434    // Some of the constants in the tests below are just bit patterns. They should not
435    // be interpreted as integers; signedness does not make sense for them, but
436    // __mXXXi happens to be defined in terms of signed integers.
437    #![allow(overflowing_literals)]
438
439    use stdarch_test::simd_test;
440
441    use crate::core_arch::x86::*;
442
443    #[simd_test(enable = "avx512bitalg,avx512f")]
444    unsafe fn test_mm512_popcnt_epi16() {
445        let test_data = _mm512_set_epi16(
446            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
447            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
448            1024, 2048,
449        );
450        let actual_result = _mm512_popcnt_epi16(test_data);
451        let reference_result = _mm512_set_epi16(
452            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
453            1, 1, 1, 1, 1, 1,
454        );
455        assert_eq_m512i(actual_result, reference_result);
456    }
457
458    #[simd_test(enable = "avx512bitalg,avx512f")]
459    unsafe fn test_mm512_maskz_popcnt_epi16() {
460        let test_data = _mm512_set_epi16(
461            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
462            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
463            1024, 2048,
464        );
465        let mask = 0xFF_FF_00_00;
466        let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
467        let reference_result = _mm512_set_epi16(
468            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
469            0, 0, 0, 0, 0,
470        );
471        assert_eq_m512i(actual_result, reference_result);
472    }
473
474    #[simd_test(enable = "avx512bitalg,avx512f")]
475    unsafe fn test_mm512_mask_popcnt_epi16() {
476        let test_data = _mm512_set_epi16(
477            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
478            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
479            1024, 2048,
480        );
481        let mask = 0xFF_FF_00_00;
482        let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
483        let reference_result = _mm512_set_epi16(
484            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
485            4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
486        );
487        assert_eq_m512i(actual_result, reference_result);
488    }
489
490    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
491    unsafe fn test_mm256_popcnt_epi16() {
492        let test_data = _mm256_set_epi16(
493            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
494            0x3F_FF, 0x7F_FF,
495        );
496        let actual_result = _mm256_popcnt_epi16(test_data);
497        let reference_result =
498            _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
499        assert_eq_m256i(actual_result, reference_result);
500    }
501
502    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
503    unsafe fn test_mm256_maskz_popcnt_epi16() {
504        let test_data = _mm256_set_epi16(
505            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
506            0x3F_FF, 0x7F_FF,
507        );
508        let mask = 0xFF_00;
509        let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
510        let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
511        assert_eq_m256i(actual_result, reference_result);
512    }
513
514    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
515    unsafe fn test_mm256_mask_popcnt_epi16() {
516        let test_data = _mm256_set_epi16(
517            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
518            0x3F_FF, 0x7F_FF,
519        );
520        let mask = 0xFF_00;
521        let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
522        let reference_result = _mm256_set_epi16(
523            0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
524        );
525        assert_eq_m256i(actual_result, reference_result);
526    }
527
528    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
529    unsafe fn test_mm_popcnt_epi16() {
530        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
531        let actual_result = _mm_popcnt_epi16(test_data);
532        let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
533        assert_eq_m128i(actual_result, reference_result);
534    }
535
536    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
537    unsafe fn test_mm_maskz_popcnt_epi16() {
538        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
539        let mask = 0xF0;
540        let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
541        let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
542        assert_eq_m128i(actual_result, reference_result);
543    }
544
545    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
546    unsafe fn test_mm_mask_popcnt_epi16() {
547        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
548        let mask = 0xF0;
549        let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
550        let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
551        assert_eq_m128i(actual_result, reference_result);
552    }
553
554    #[simd_test(enable = "avx512bitalg,avx512f")]
555    unsafe fn test_mm512_popcnt_epi8() {
556        let test_data = _mm512_set_epi8(
557            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
558            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
559            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
560            225, 21, 249, 211, 155, 228, 70,
561        );
562        let actual_result = _mm512_popcnt_epi8(test_data);
563        let reference_result = _mm512_set_epi8(
564            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
565            2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
566            3, 6, 5, 5, 4, 3,
567        );
568        assert_eq_m512i(actual_result, reference_result);
569    }
570
571    #[simd_test(enable = "avx512bitalg,avx512f")]
572    unsafe fn test_mm512_maskz_popcnt_epi8() {
573        let test_data = _mm512_set_epi8(
574            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
575            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
576            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
577            225, 21, 249, 211, 155, 228, 70,
578        );
579        let mask = 0xFF_FF_FF_FF_00_00_00_00;
580        let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
581        let reference_result = _mm512_set_epi8(
582            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
583            2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584            0, 0, 0, 0, 0, 0,
585        );
586        assert_eq_m512i(actual_result, reference_result);
587    }
588
589    #[simd_test(enable = "avx512bitalg,avx512f")]
590    unsafe fn test_mm512_mask_popcnt_epi8() {
591        let test_data = _mm512_set_epi8(
592            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
593            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
594            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
595            225, 21, 249, 211, 155, 228, 70,
596        );
597        let mask = 0xFF_FF_FF_FF_00_00_00_00;
598        let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
599        let reference_result = _mm512_set_epi8(
600            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
601            2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
602            251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
603        );
604        assert_eq_m512i(actual_result, reference_result);
605    }
606
607    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
608    unsafe fn test_mm256_popcnt_epi8() {
609        let test_data = _mm256_set_epi8(
610            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
611            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
612        );
613        let actual_result = _mm256_popcnt_epi8(test_data);
614        let reference_result = _mm256_set_epi8(
615            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
616            2, 4, 4,
617        );
618        assert_eq_m256i(actual_result, reference_result);
619    }
620
621    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
622    unsafe fn test_mm256_maskz_popcnt_epi8() {
623        let test_data = _mm256_set_epi8(
624            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
625            145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
626        );
627        let mask = 0xFF_FF_00_00;
628        let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
629        let reference_result = _mm256_set_epi8(
630            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631            0, 0, 0,
632        );
633        assert_eq_m256i(actual_result, reference_result);
634    }
635
636    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
637    unsafe fn test_mm256_mask_popcnt_epi8() {
638        let test_data = _mm256_set_epi8(
639            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
640            145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
641        );
642        let mask = 0xFF_FF_00_00;
643        let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
644        let reference_result = _mm256_set_epi8(
645            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
646            90, 225, 21, 249, 211, 155, 228, 70,
647        );
648        assert_eq_m256i(actual_result, reference_result);
649    }
650
651    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
652    unsafe fn test_mm_popcnt_epi8() {
653        let test_data = _mm_set_epi8(
654            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
655        );
656        let actual_result = _mm_popcnt_epi8(test_data);
657        let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
658        assert_eq_m128i(actual_result, reference_result);
659    }
660
661    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
662    unsafe fn test_mm_maskz_popcnt_epi8() {
663        let test_data = _mm_set_epi8(
664            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
665        );
666        let mask = 0xFF_00;
667        let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
668        let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
669        assert_eq_m128i(actual_result, reference_result);
670    }
671
672    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
673    unsafe fn test_mm_mask_popcnt_epi8() {
674        let test_data = _mm_set_epi8(
675            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
676        );
677        let mask = 0xFF_00;
678        let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
679        let reference_result =
680            _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
681        assert_eq_m128i(actual_result, reference_result);
682    }
683
684    #[simd_test(enable = "avx512bitalg,avx512f")]
685    unsafe fn test_mm512_bitshuffle_epi64_mask() {
686        let test_indices = _mm512_set_epi8(
687            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
688            8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
689            58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
690        );
691        let test_data = _mm512_setr_epi64(
692            0xFF_FF_FF_FF_00_00_00_00,
693            0xFF_00_FF_00_FF_00_FF_00,
694            0xFF_00_00_00_00_00_00_00,
695            0xAC_00_00_00_00_00_00_00,
696            0xFF_FF_FF_FF_00_00_00_00,
697            0xFF_00_FF_00_FF_00_FF_00,
698            0xFF_00_00_00_00_00_00_00,
699            0xAC_00_00_00_00_00_00_00,
700        );
701        let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
702        let reference_result = 0xF0 << 0
703            | 0x03 << 8
704            | 0xFF << 16
705            | 0xAC << 24
706            | 0xF0 << 32
707            | 0x03 << 40
708            | 0xFF << 48
709            | 0xAC << 56;
710
711        assert_eq!(actual_result, reference_result);
712    }
713
714    #[simd_test(enable = "avx512bitalg,avx512f")]
715    unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
716        let test_indices = _mm512_set_epi8(
717            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
718            8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
719            58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
720        );
721        let test_data = _mm512_setr_epi64(
722            0xFF_FF_FF_FF_00_00_00_00,
723            0xFF_00_FF_00_FF_00_FF_00,
724            0xFF_00_00_00_00_00_00_00,
725            0xAC_00_00_00_00_00_00_00,
726            0xFF_FF_FF_FF_00_00_00_00,
727            0xFF_00_FF_00_FF_00_FF_00,
728            0xFF_00_00_00_00_00_00_00,
729            0xAC_00_00_00_00_00_00_00,
730        );
731        let mask = 0xFF_FF_FF_FF_00_00_00_00;
732        let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
733        let reference_result = 0x00 << 0
734            | 0x00 << 8
735            | 0x00 << 16
736            | 0x00 << 24
737            | 0xF0 << 32
738            | 0x03 << 40
739            | 0xFF << 48
740            | 0xAC << 56;
741
742        assert_eq!(actual_result, reference_result);
743    }
744
745    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
746    unsafe fn test_mm256_bitshuffle_epi64_mask() {
747        let test_indices = _mm256_set_epi8(
748            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
749            8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
750        );
751        let test_data = _mm256_setr_epi64x(
752            0xFF_FF_FF_FF_00_00_00_00,
753            0xFF_00_FF_00_FF_00_FF_00,
754            0xFF_00_00_00_00_00_00_00,
755            0xAC_00_00_00_00_00_00_00,
756        );
757        let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
758        let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
759
760        assert_eq!(actual_result, reference_result);
761    }
762
763    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
764    unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
765        let test_indices = _mm256_set_epi8(
766            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
767            8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
768        );
769        let test_data = _mm256_setr_epi64x(
770            0xFF_FF_FF_FF_00_00_00_00,
771            0xFF_00_FF_00_FF_00_FF_00,
772            0xFF_00_00_00_00_00_00_00,
773            0xAC_00_00_00_00_00_00_00,
774        );
775        let mask = 0xFF_FF_00_00;
776        let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
777        let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
778
779        assert_eq!(actual_result, reference_result);
780    }
781
782    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
783    unsafe fn test_mm_bitshuffle_epi64_mask() {
784        let test_indices = _mm_set_epi8(
785            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
786        );
787        let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
788        let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
789        let reference_result = 0xFF << 0 | 0xAC << 8;
790
791        assert_eq!(actual_result, reference_result);
792    }
793
794    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
795    unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
796        let test_indices = _mm_set_epi8(
797            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
798        );
799        let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
800        let mask = 0xFF_00;
801        let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
802        let reference_result = 0x00 << 0 | 0xAC << 8;
803
804        assert_eq!(actual_result, reference_result);
805    }
806}