core/stdarch/crates/core_arch/src/x86/
avx512bitalg.rs

1//! Bit-oriented Algorithms (BITALG)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i16x16;
11use crate::core_arch::simd::i16x32;
12use crate::core_arch::simd::i16x8;
13use crate::core_arch::simd::i8x16;
14use crate::core_arch::simd::i8x32;
15use crate::core_arch::simd::i8x64;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask16;
20use crate::core_arch::x86::__mmask32;
21use crate::core_arch::x86::__mmask64;
22use crate::core_arch::x86::__mmask8;
23use crate::core_arch::x86::m128iExt;
24use crate::core_arch::x86::m256iExt;
25use crate::core_arch::x86::m512iExt;
26use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
27use crate::mem::transmute;
28
29#[cfg(test)]
30use stdarch_test::assert_instr;
31
32#[allow(improper_ctypes)]
33extern "C" {
34    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
35    fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
36    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
37    fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
38    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
39    fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
40}
41
42/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
43///
44/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
45#[inline]
46#[target_feature(enable = "avx512bitalg")]
47#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
48#[cfg_attr(test, assert_instr(vpopcntw))]
49pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
50    transmute(simd_ctpop(a.as_i16x32()))
51}
52
53/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
54///
55/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
56/// Otherwise the computation result is written into the result.
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
59#[inline]
60#[target_feature(enable = "avx512bitalg")]
61#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
62#[cfg_attr(test, assert_instr(vpopcntw))]
63pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
64    transmute(simd_select_bitmask(
65        k,
66        simd_ctpop(a.as_i16x32()),
67        i16x32::ZERO,
68    ))
69}
70
71/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
72///
73/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
74/// Otherwise the computation result is written into the result.
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
77#[inline]
78#[target_feature(enable = "avx512bitalg")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpopcntw))]
81pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
82    transmute(simd_select_bitmask(
83        k,
84        simd_ctpop(a.as_i16x32()),
85        src.as_i16x32(),
86    ))
87}
88
89/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
90///
91/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
92#[inline]
93#[target_feature(enable = "avx512bitalg,avx512vl")]
94#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
95#[cfg_attr(test, assert_instr(vpopcntw))]
96pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
97    transmute(simd_ctpop(a.as_i16x16()))
98}
99
100/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
101///
102/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
103/// Otherwise the computation result is written into the result.
104///
105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
106#[inline]
107#[target_feature(enable = "avx512bitalg,avx512vl")]
108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
109#[cfg_attr(test, assert_instr(vpopcntw))]
110pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
111    transmute(simd_select_bitmask(
112        k,
113        simd_ctpop(a.as_i16x16()),
114        i16x16::ZERO,
115    ))
116}
117
118/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
119///
120/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
121/// Otherwise the computation result is written into the result.
122///
123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
124#[inline]
125#[target_feature(enable = "avx512bitalg,avx512vl")]
126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
127#[cfg_attr(test, assert_instr(vpopcntw))]
128pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
129    transmute(simd_select_bitmask(
130        k,
131        simd_ctpop(a.as_i16x16()),
132        src.as_i16x16(),
133    ))
134}
135
136/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
137///
138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
139#[inline]
140#[target_feature(enable = "avx512bitalg,avx512vl")]
141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142#[cfg_attr(test, assert_instr(vpopcntw))]
143pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
144    transmute(simd_ctpop(a.as_i16x8()))
145}
146
147/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
148///
149/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
150/// Otherwise the computation result is written into the result.
151///
152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
153#[inline]
154#[target_feature(enable = "avx512bitalg,avx512vl")]
155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
156#[cfg_attr(test, assert_instr(vpopcntw))]
157pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
158    transmute(simd_select_bitmask(
159        k,
160        simd_ctpop(a.as_i16x8()),
161        i16x8::ZERO,
162    ))
163}
164
165/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
166///
167/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
168/// Otherwise the computation result is written into the result.
169///
170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
171#[inline]
172#[target_feature(enable = "avx512bitalg,avx512vl")]
173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
174#[cfg_attr(test, assert_instr(vpopcntw))]
175pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
176    transmute(simd_select_bitmask(
177        k,
178        simd_ctpop(a.as_i16x8()),
179        src.as_i16x8(),
180    ))
181}
182
183/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
184///
185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
186#[inline]
187#[target_feature(enable = "avx512bitalg")]
188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
189#[cfg_attr(test, assert_instr(vpopcntb))]
190pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
191    transmute(simd_ctpop(a.as_i8x64()))
192}
193
194/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
195///
196/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
197/// Otherwise the computation result is written into the result.
198///
199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
200#[inline]
201#[target_feature(enable = "avx512bitalg")]
202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
203#[cfg_attr(test, assert_instr(vpopcntb))]
204pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
205    transmute(simd_select_bitmask(
206        k,
207        simd_ctpop(a.as_i8x64()),
208        i8x64::ZERO,
209    ))
210}
211
212/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
213///
214/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
215/// Otherwise the computation result is written into the result.
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
218#[inline]
219#[target_feature(enable = "avx512bitalg")]
220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
221#[cfg_attr(test, assert_instr(vpopcntb))]
222pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
223    transmute(simd_select_bitmask(
224        k,
225        simd_ctpop(a.as_i8x64()),
226        src.as_i8x64(),
227    ))
228}
229
230/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
231///
232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
233#[inline]
234#[target_feature(enable = "avx512bitalg,avx512vl")]
235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
236#[cfg_attr(test, assert_instr(vpopcntb))]
237pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
238    transmute(simd_ctpop(a.as_i8x32()))
239}
240
241/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
242///
243/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
244/// Otherwise the computation result is written into the result.
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
247#[inline]
248#[target_feature(enable = "avx512bitalg,avx512vl")]
249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
250#[cfg_attr(test, assert_instr(vpopcntb))]
251pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
252    transmute(simd_select_bitmask(
253        k,
254        simd_ctpop(a.as_i8x32()),
255        i8x32::ZERO,
256    ))
257}
258
259/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
260///
261/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
262/// Otherwise the computation result is written into the result.
263///
264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
265#[inline]
266#[target_feature(enable = "avx512bitalg,avx512vl")]
267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
268#[cfg_attr(test, assert_instr(vpopcntb))]
269pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
270    transmute(simd_select_bitmask(
271        k,
272        simd_ctpop(a.as_i8x32()),
273        src.as_i8x32(),
274    ))
275}
276
277/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
278///
279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
280#[inline]
281#[target_feature(enable = "avx512bitalg,avx512vl")]
282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
283#[cfg_attr(test, assert_instr(vpopcntb))]
284pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
285    transmute(simd_ctpop(a.as_i8x16()))
286}
287
288/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
289///
290/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
291/// Otherwise the computation result is written into the result.
292///
293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
294#[inline]
295#[target_feature(enable = "avx512bitalg,avx512vl")]
296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
297#[cfg_attr(test, assert_instr(vpopcntb))]
298pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
299    transmute(simd_select_bitmask(
300        k,
301        simd_ctpop(a.as_i8x16()),
302        i8x16::ZERO,
303    ))
304}
305
306/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
307///
308/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
309/// Otherwise the computation result is written into the result.
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
312#[inline]
313#[target_feature(enable = "avx512bitalg,avx512vl")]
314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
315#[cfg_attr(test, assert_instr(vpopcntb))]
316pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
317    transmute(simd_select_bitmask(
318        k,
319        simd_ctpop(a.as_i8x16()),
320        src.as_i8x16(),
321    ))
322}
323
324/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
325/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
326/// It then selects these bits and packs them into the output.
327///
328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
329#[inline]
330#[target_feature(enable = "avx512bitalg")]
331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
332#[cfg_attr(test, assert_instr(vpshufbitqmb))]
333pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
334    bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0)
335}
336
337/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
338/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
339/// It then selects these bits and packs them into the output.
340///
341/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
342/// Otherwise the computation result is written into the result.
343///
344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
345#[inline]
346#[target_feature(enable = "avx512bitalg")]
347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
348#[cfg_attr(test, assert_instr(vpshufbitqmb))]
349pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
350    bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k)
351}
352
353/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
354/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
355/// It then selects these bits and packs them into the output.
356///
357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
358#[inline]
359#[target_feature(enable = "avx512bitalg,avx512vl")]
360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
361#[cfg_attr(test, assert_instr(vpshufbitqmb))]
362pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
363    bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0)
364}
365
366/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
367/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
368/// It then selects these bits and packs them into the output.
369///
370/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
371/// Otherwise the computation result is written into the result.
372///
373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
374#[inline]
375#[target_feature(enable = "avx512bitalg,avx512vl")]
376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
377#[cfg_attr(test, assert_instr(vpshufbitqmb))]
378pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
379    bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k)
380}
381
382/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
383/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
384/// It then selects these bits and packs them into the output.
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
387#[inline]
388#[target_feature(enable = "avx512bitalg,avx512vl")]
389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
390#[cfg_attr(test, assert_instr(vpshufbitqmb))]
391pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
392    bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0)
393}
394
395/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
396/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
397/// It then selects these bits and packs them into the output.
398///
399/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
400/// Otherwise the computation result is written into the result.
401///
402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
403#[inline]
404#[target_feature(enable = "avx512bitalg,avx512vl")]
405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
406#[cfg_attr(test, assert_instr(vpshufbitqmb))]
407pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
408    bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k)
409}
410
411#[cfg(test)]
412mod tests {
413    // Some of the constants in the tests below are just bit patterns. They should not
414    // be interpreted as integers; signedness does not make sense for them, but
415    // __mXXXi happens to be defined in terms of signed integers.
416    #![allow(overflowing_literals)]
417
418    use stdarch_test::simd_test;
419
420    use crate::core_arch::x86::*;
421
422    #[simd_test(enable = "avx512bitalg,avx512f")]
423    unsafe fn test_mm512_popcnt_epi16() {
424        let test_data = _mm512_set_epi16(
425            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
426            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
427            1024, 2048,
428        );
429        let actual_result = _mm512_popcnt_epi16(test_data);
430        let reference_result = _mm512_set_epi16(
431            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
432            1, 1, 1, 1, 1, 1,
433        );
434        assert_eq_m512i(actual_result, reference_result);
435    }
436
437    #[simd_test(enable = "avx512bitalg,avx512f")]
438    unsafe fn test_mm512_maskz_popcnt_epi16() {
439        let test_data = _mm512_set_epi16(
440            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
441            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
442            1024, 2048,
443        );
444        let mask = 0xFF_FF_00_00;
445        let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
446        let reference_result = _mm512_set_epi16(
447            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448            0, 0, 0, 0, 0,
449        );
450        assert_eq_m512i(actual_result, reference_result);
451    }
452
453    #[simd_test(enable = "avx512bitalg,avx512f")]
454    unsafe fn test_mm512_mask_popcnt_epi16() {
455        let test_data = _mm512_set_epi16(
456            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
457            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
458            1024, 2048,
459        );
460        let mask = 0xFF_FF_00_00;
461        let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
462        let reference_result = _mm512_set_epi16(
463            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
464            4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
465        );
466        assert_eq_m512i(actual_result, reference_result);
467    }
468
469    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
470    unsafe fn test_mm256_popcnt_epi16() {
471        let test_data = _mm256_set_epi16(
472            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
473            0x3F_FF, 0x7F_FF,
474        );
475        let actual_result = _mm256_popcnt_epi16(test_data);
476        let reference_result =
477            _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
478        assert_eq_m256i(actual_result, reference_result);
479    }
480
481    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
482    unsafe fn test_mm256_maskz_popcnt_epi16() {
483        let test_data = _mm256_set_epi16(
484            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
485            0x3F_FF, 0x7F_FF,
486        );
487        let mask = 0xFF_00;
488        let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
489        let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
490        assert_eq_m256i(actual_result, reference_result);
491    }
492
493    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
494    unsafe fn test_mm256_mask_popcnt_epi16() {
495        let test_data = _mm256_set_epi16(
496            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
497            0x3F_FF, 0x7F_FF,
498        );
499        let mask = 0xFF_00;
500        let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
501        let reference_result = _mm256_set_epi16(
502            0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
503        );
504        assert_eq_m256i(actual_result, reference_result);
505    }
506
507    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
508    unsafe fn test_mm_popcnt_epi16() {
509        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
510        let actual_result = _mm_popcnt_epi16(test_data);
511        let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
512        assert_eq_m128i(actual_result, reference_result);
513    }
514
515    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
516    unsafe fn test_mm_maskz_popcnt_epi16() {
517        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
518        let mask = 0xF0;
519        let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
520        let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
521        assert_eq_m128i(actual_result, reference_result);
522    }
523
524    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
525    unsafe fn test_mm_mask_popcnt_epi16() {
526        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
527        let mask = 0xF0;
528        let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
529        let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
530        assert_eq_m128i(actual_result, reference_result);
531    }
532
533    #[simd_test(enable = "avx512bitalg,avx512f")]
534    unsafe fn test_mm512_popcnt_epi8() {
535        let test_data = _mm512_set_epi8(
536            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
537            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
538            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
539            225, 21, 249, 211, 155, 228, 70,
540        );
541        let actual_result = _mm512_popcnt_epi8(test_data);
542        let reference_result = _mm512_set_epi8(
543            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
544            2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
545            3, 6, 5, 5, 4, 3,
546        );
547        assert_eq_m512i(actual_result, reference_result);
548    }
549
550    #[simd_test(enable = "avx512bitalg,avx512f")]
551    unsafe fn test_mm512_maskz_popcnt_epi8() {
552        let test_data = _mm512_set_epi8(
553            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
554            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
555            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
556            225, 21, 249, 211, 155, 228, 70,
557        );
558        let mask = 0xFF_FF_FF_FF_00_00_00_00;
559        let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
560        let reference_result = _mm512_set_epi8(
561            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
562            2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563            0, 0, 0, 0, 0, 0,
564        );
565        assert_eq_m512i(actual_result, reference_result);
566    }
567
568    #[simd_test(enable = "avx512bitalg,avx512f")]
569    unsafe fn test_mm512_mask_popcnt_epi8() {
570        let test_data = _mm512_set_epi8(
571            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
572            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
573            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
574            225, 21, 249, 211, 155, 228, 70,
575        );
576        let mask = 0xFF_FF_FF_FF_00_00_00_00;
577        let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
578        let reference_result = _mm512_set_epi8(
579            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
580            2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
581            251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
582        );
583        assert_eq_m512i(actual_result, reference_result);
584    }
585
586    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
587    unsafe fn test_mm256_popcnt_epi8() {
588        let test_data = _mm256_set_epi8(
589            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
590            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
591        );
592        let actual_result = _mm256_popcnt_epi8(test_data);
593        let reference_result = _mm256_set_epi8(
594            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
595            2, 4, 4,
596        );
597        assert_eq_m256i(actual_result, reference_result);
598    }
599
600    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
601    unsafe fn test_mm256_maskz_popcnt_epi8() {
602        let test_data = _mm256_set_epi8(
603            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
604            145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
605        );
606        let mask = 0xFF_FF_00_00;
607        let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
608        let reference_result = _mm256_set_epi8(
609            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610            0, 0, 0,
611        );
612        assert_eq_m256i(actual_result, reference_result);
613    }
614
615    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
616    unsafe fn test_mm256_mask_popcnt_epi8() {
617        let test_data = _mm256_set_epi8(
618            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
619            145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
620        );
621        let mask = 0xFF_FF_00_00;
622        let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
623        let reference_result = _mm256_set_epi8(
624            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
625            90, 225, 21, 249, 211, 155, 228, 70,
626        );
627        assert_eq_m256i(actual_result, reference_result);
628    }
629
630    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
631    unsafe fn test_mm_popcnt_epi8() {
632        let test_data = _mm_set_epi8(
633            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
634        );
635        let actual_result = _mm_popcnt_epi8(test_data);
636        let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
637        assert_eq_m128i(actual_result, reference_result);
638    }
639
640    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
641    unsafe fn test_mm_maskz_popcnt_epi8() {
642        let test_data = _mm_set_epi8(
643            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
644        );
645        let mask = 0xFF_00;
646        let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
647        let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
648        assert_eq_m128i(actual_result, reference_result);
649    }
650
651    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
652    unsafe fn test_mm_mask_popcnt_epi8() {
653        let test_data = _mm_set_epi8(
654            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
655        );
656        let mask = 0xFF_00;
657        let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
658        let reference_result =
659            _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
660        assert_eq_m128i(actual_result, reference_result);
661    }
662
663    #[simd_test(enable = "avx512bitalg,avx512f")]
664    unsafe fn test_mm512_bitshuffle_epi64_mask() {
665        let test_indices = _mm512_set_epi8(
666            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
667            8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
668            58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
669        );
670        let test_data = _mm512_setr_epi64(
671            0xFF_FF_FF_FF_00_00_00_00,
672            0xFF_00_FF_00_FF_00_FF_00,
673            0xFF_00_00_00_00_00_00_00,
674            0xAC_00_00_00_00_00_00_00,
675            0xFF_FF_FF_FF_00_00_00_00,
676            0xFF_00_FF_00_FF_00_FF_00,
677            0xFF_00_00_00_00_00_00_00,
678            0xAC_00_00_00_00_00_00_00,
679        );
680        let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
681        let reference_result = 0xF0 << 0
682            | 0x03 << 8
683            | 0xFF << 16
684            | 0xAC << 24
685            | 0xF0 << 32
686            | 0x03 << 40
687            | 0xFF << 48
688            | 0xAC << 56;
689
690        assert_eq!(actual_result, reference_result);
691    }
692
693    #[simd_test(enable = "avx512bitalg,avx512f")]
694    unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
695        let test_indices = _mm512_set_epi8(
696            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
697            8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
698            58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
699        );
700        let test_data = _mm512_setr_epi64(
701            0xFF_FF_FF_FF_00_00_00_00,
702            0xFF_00_FF_00_FF_00_FF_00,
703            0xFF_00_00_00_00_00_00_00,
704            0xAC_00_00_00_00_00_00_00,
705            0xFF_FF_FF_FF_00_00_00_00,
706            0xFF_00_FF_00_FF_00_FF_00,
707            0xFF_00_00_00_00_00_00_00,
708            0xAC_00_00_00_00_00_00_00,
709        );
710        let mask = 0xFF_FF_FF_FF_00_00_00_00;
711        let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
712        let reference_result = 0x00 << 0
713            | 0x00 << 8
714            | 0x00 << 16
715            | 0x00 << 24
716            | 0xF0 << 32
717            | 0x03 << 40
718            | 0xFF << 48
719            | 0xAC << 56;
720
721        assert_eq!(actual_result, reference_result);
722    }
723
724    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
725    unsafe fn test_mm256_bitshuffle_epi64_mask() {
726        let test_indices = _mm256_set_epi8(
727            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
728            8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
729        );
730        let test_data = _mm256_setr_epi64x(
731            0xFF_FF_FF_FF_00_00_00_00,
732            0xFF_00_FF_00_FF_00_FF_00,
733            0xFF_00_00_00_00_00_00_00,
734            0xAC_00_00_00_00_00_00_00,
735        );
736        let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
737        let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
738
739        assert_eq!(actual_result, reference_result);
740    }
741
742    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
743    unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
744        let test_indices = _mm256_set_epi8(
745            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
746            8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
747        );
748        let test_data = _mm256_setr_epi64x(
749            0xFF_FF_FF_FF_00_00_00_00,
750            0xFF_00_FF_00_FF_00_FF_00,
751            0xFF_00_00_00_00_00_00_00,
752            0xAC_00_00_00_00_00_00_00,
753        );
754        let mask = 0xFF_FF_00_00;
755        let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
756        let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
757
758        assert_eq!(actual_result, reference_result);
759    }
760
761    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
762    unsafe fn test_mm_bitshuffle_epi64_mask() {
763        let test_indices = _mm_set_epi8(
764            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
765        );
766        let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
767        let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
768        let reference_result = 0xFF << 0 | 0xAC << 8;
769
770        assert_eq!(actual_result, reference_result);
771    }
772
773    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
774    unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
775        let test_indices = _mm_set_epi8(
776            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
777        );
778        let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
779        let mask = 0xFF_00;
780        let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
781        let reference_result = 0x00 << 0 | 0xAC << 8;
782
783        assert_eq!(actual_result, reference_result);
784    }
785}