core/stdarch/crates/core_arch/src/x86/
avx512bitalg.rs

1//! Bit-oriented Algorithms (BITALG)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::simd::i16x8;
14use crate::core_arch::simd::i16x16;
15use crate::core_arch::simd::i16x32;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask8;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask32;
22use crate::core_arch::x86::__mmask64;
23use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
24use crate::mem::transmute;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[allow(improper_ctypes)]
30unsafe extern "C" {
31    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
32    fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
33    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
34    fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
35    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
36    fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
37}
38
39/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
42#[inline]
43#[target_feature(enable = "avx512bitalg")]
44#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
45#[cfg_attr(test, assert_instr(vpopcntw))]
46#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
47pub const fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
48    unsafe { transmute(simd_ctpop(a.as_i16x32())) }
49}
50
51/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
52///
53/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
54/// Otherwise the computation result is written into the result.
55///
56/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
57#[inline]
58#[target_feature(enable = "avx512bitalg")]
59#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
60#[cfg_attr(test, assert_instr(vpopcntw))]
61#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
62pub const fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
63    unsafe {
64        transmute(simd_select_bitmask(
65            k,
66            simd_ctpop(a.as_i16x32()),
67            i16x32::ZERO,
68        ))
69    }
70}
71
72/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
73///
74/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
75/// Otherwise the computation result is written into the result.
76///
77/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
78#[inline]
79#[target_feature(enable = "avx512bitalg")]
80#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
81#[cfg_attr(test, assert_instr(vpopcntw))]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
84    unsafe {
85        transmute(simd_select_bitmask(
86            k,
87            simd_ctpop(a.as_i16x32()),
88            src.as_i16x32(),
89        ))
90    }
91}
92
93/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
96#[inline]
97#[target_feature(enable = "avx512bitalg,avx512vl")]
98#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99#[cfg_attr(test, assert_instr(vpopcntw))]
100#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101pub const fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
102    unsafe { transmute(simd_ctpop(a.as_i16x16())) }
103}
104
105/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
106///
107/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
108/// Otherwise the computation result is written into the result.
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
111#[inline]
112#[target_feature(enable = "avx512bitalg,avx512vl")]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114#[cfg_attr(test, assert_instr(vpopcntw))]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
117    unsafe {
118        transmute(simd_select_bitmask(
119            k,
120            simd_ctpop(a.as_i16x16()),
121            i16x16::ZERO,
122        ))
123    }
124}
125
126/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
127///
128/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
129/// Otherwise the computation result is written into the result.
130///
131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
132#[inline]
133#[target_feature(enable = "avx512bitalg,avx512vl")]
134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
135#[cfg_attr(test, assert_instr(vpopcntw))]
136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
137pub const fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
138    unsafe {
139        transmute(simd_select_bitmask(
140            k,
141            simd_ctpop(a.as_i16x16()),
142            src.as_i16x16(),
143        ))
144    }
145}
146
147/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
150#[inline]
151#[target_feature(enable = "avx512bitalg,avx512vl")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpopcntw))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
156    unsafe { transmute(simd_ctpop(a.as_i16x8())) }
157}
158
159/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
160///
161/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
162/// Otherwise the computation result is written into the result.
163///
164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
165#[inline]
166#[target_feature(enable = "avx512bitalg,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpopcntw))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
171    unsafe {
172        transmute(simd_select_bitmask(
173            k,
174            simd_ctpop(a.as_i16x8()),
175            i16x8::ZERO,
176        ))
177    }
178}
179
180/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
181///
182/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
183/// Otherwise the computation result is written into the result.
184///
185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
186#[inline]
187#[target_feature(enable = "avx512bitalg,avx512vl")]
188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
189#[cfg_attr(test, assert_instr(vpopcntw))]
190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
191pub const fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
192    unsafe {
193        transmute(simd_select_bitmask(
194            k,
195            simd_ctpop(a.as_i16x8()),
196            src.as_i16x8(),
197        ))
198    }
199}
200
201/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
202///
203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
204#[inline]
205#[target_feature(enable = "avx512bitalg")]
206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
207#[cfg_attr(test, assert_instr(vpopcntb))]
208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
209pub const fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
210    unsafe { transmute(simd_ctpop(a.as_i8x64())) }
211}
212
213/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
214///
215/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
216/// Otherwise the computation result is written into the result.
217///
218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
219#[inline]
220#[target_feature(enable = "avx512bitalg")]
221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
222#[cfg_attr(test, assert_instr(vpopcntb))]
223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
224pub const fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
225    unsafe {
226        transmute(simd_select_bitmask(
227            k,
228            simd_ctpop(a.as_i8x64()),
229            i8x64::ZERO,
230        ))
231    }
232}
233
234/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
235///
236/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
237/// Otherwise the computation result is written into the result.
238///
239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
240#[inline]
241#[target_feature(enable = "avx512bitalg")]
242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
243#[cfg_attr(test, assert_instr(vpopcntb))]
244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
245pub const fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
246    unsafe {
247        transmute(simd_select_bitmask(
248            k,
249            simd_ctpop(a.as_i8x64()),
250            src.as_i8x64(),
251        ))
252    }
253}
254
255/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
256///
257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
258#[inline]
259#[target_feature(enable = "avx512bitalg,avx512vl")]
260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
261#[cfg_attr(test, assert_instr(vpopcntb))]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
264    unsafe { transmute(simd_ctpop(a.as_i8x32())) }
265}
266
267/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
268///
269/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
270/// Otherwise the computation result is written into the result.
271///
272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
273#[inline]
274#[target_feature(enable = "avx512bitalg,avx512vl")]
275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
276#[cfg_attr(test, assert_instr(vpopcntb))]
277#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
278pub const fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
279    unsafe {
280        transmute(simd_select_bitmask(
281            k,
282            simd_ctpop(a.as_i8x32()),
283            i8x32::ZERO,
284        ))
285    }
286}
287
288/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
289///
290/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
291/// Otherwise the computation result is written into the result.
292///
293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
294#[inline]
295#[target_feature(enable = "avx512bitalg,avx512vl")]
296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
297#[cfg_attr(test, assert_instr(vpopcntb))]
298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
299pub const fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
300    unsafe {
301        transmute(simd_select_bitmask(
302            k,
303            simd_ctpop(a.as_i8x32()),
304            src.as_i8x32(),
305        ))
306    }
307}
308
309/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
312#[inline]
313#[target_feature(enable = "avx512bitalg,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpopcntb))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
318    unsafe { transmute(simd_ctpop(a.as_i8x16())) }
319}
320
321/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
322///
323/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
324/// Otherwise the computation result is written into the result.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
327#[inline]
328#[target_feature(enable = "avx512bitalg,avx512vl")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpopcntb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
333    unsafe {
334        transmute(simd_select_bitmask(
335            k,
336            simd_ctpop(a.as_i8x16()),
337            i8x16::ZERO,
338        ))
339    }
340}
341
342/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
343///
344/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
345/// Otherwise the computation result is written into the result.
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
348#[inline]
349#[target_feature(enable = "avx512bitalg,avx512vl")]
350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
351#[cfg_attr(test, assert_instr(vpopcntb))]
352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
353pub const fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
354    unsafe {
355        transmute(simd_select_bitmask(
356            k,
357            simd_ctpop(a.as_i8x16()),
358            src.as_i8x16(),
359        ))
360    }
361}
362
363/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
364/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
365/// It then selects these bits and packs them into the output.
366///
367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
368#[inline]
369#[target_feature(enable = "avx512bitalg")]
370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
371#[cfg_attr(test, assert_instr(vpshufbitqmb))]
372pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
373    unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
374}
375
376/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
377/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
378/// It then selects these bits and packs them into the output.
379///
380/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
381/// Otherwise the computation result is written into the result.
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
384#[inline]
385#[target_feature(enable = "avx512bitalg")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpshufbitqmb))]
388pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
389    unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
390}
391
392/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
393/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
394/// It then selects these bits and packs them into the output.
395///
396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
397#[inline]
398#[target_feature(enable = "avx512bitalg,avx512vl")]
399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
400#[cfg_attr(test, assert_instr(vpshufbitqmb))]
401pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
402    unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
403}
404
405/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
406/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
407/// It then selects these bits and packs them into the output.
408///
409/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
410/// Otherwise the computation result is written into the result.
411///
412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
413#[inline]
414#[target_feature(enable = "avx512bitalg,avx512vl")]
415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
416#[cfg_attr(test, assert_instr(vpshufbitqmb))]
417pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
418    unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
419}
420
421/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
422/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
423/// It then selects these bits and packs them into the output.
424///
425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
426#[inline]
427#[target_feature(enable = "avx512bitalg,avx512vl")]
428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429#[cfg_attr(test, assert_instr(vpshufbitqmb))]
430pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
431    unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
432}
433
434/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
435/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
436/// It then selects these bits and packs them into the output.
437///
438/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
439/// Otherwise the computation result is written into the result.
440///
441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
442#[inline]
443#[target_feature(enable = "avx512bitalg,avx512vl")]
444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
445#[cfg_attr(test, assert_instr(vpshufbitqmb))]
446pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
447    unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
448}
449
450#[cfg(test)]
451mod tests {
452    // Some of the constants in the tests below are just bit patterns. They should not
453    // be interpreted as integers; signedness does not make sense for them, but
454    // __mXXXi happens to be defined in terms of signed integers.
455    #![allow(overflowing_literals)]
456
457    use crate::core_arch::assert_eq_const as assert_eq;
458    use stdarch_test::simd_test;
459
460    use crate::core_arch::x86::*;
461
462    #[simd_test(enable = "avx512bitalg,avx512f")]
463    const fn test_mm512_popcnt_epi16() {
464        let test_data = _mm512_set_epi16(
465            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
466            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
467            1024, 2048,
468        );
469        let actual_result = _mm512_popcnt_epi16(test_data);
470        let reference_result = _mm512_set_epi16(
471            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
472            1, 1, 1, 1, 1, 1,
473        );
474        assert_eq_m512i(actual_result, reference_result);
475    }
476
477    #[simd_test(enable = "avx512bitalg,avx512f")]
478    const fn test_mm512_maskz_popcnt_epi16() {
479        let test_data = _mm512_set_epi16(
480            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
481            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
482            1024, 2048,
483        );
484        let mask = 0xFF_FF_00_00;
485        let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
486        let reference_result = _mm512_set_epi16(
487            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488            0, 0, 0, 0, 0,
489        );
490        assert_eq_m512i(actual_result, reference_result);
491    }
492
493    #[simd_test(enable = "avx512bitalg,avx512f")]
494    const fn test_mm512_mask_popcnt_epi16() {
495        let test_data = _mm512_set_epi16(
496            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
497            0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
498            1024, 2048,
499        );
500        let mask = 0xFF_FF_00_00;
501        let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
502        let reference_result = _mm512_set_epi16(
503            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
504            4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
505        );
506        assert_eq_m512i(actual_result, reference_result);
507    }
508
509    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
510    const fn test_mm256_popcnt_epi16() {
511        let test_data = _mm256_set_epi16(
512            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
513            0x3F_FF, 0x7F_FF,
514        );
515        let actual_result = _mm256_popcnt_epi16(test_data);
516        let reference_result =
517            _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
518        assert_eq_m256i(actual_result, reference_result);
519    }
520
521    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
522    const fn test_mm256_maskz_popcnt_epi16() {
523        let test_data = _mm256_set_epi16(
524            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
525            0x3F_FF, 0x7F_FF,
526        );
527        let mask = 0xFF_00;
528        let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
529        let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
530        assert_eq_m256i(actual_result, reference_result);
531    }
532
533    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
534    const fn test_mm256_mask_popcnt_epi16() {
535        let test_data = _mm256_set_epi16(
536            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
537            0x3F_FF, 0x7F_FF,
538        );
539        let mask = 0xFF_00;
540        let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
541        let reference_result = _mm256_set_epi16(
542            0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
543        );
544        assert_eq_m256i(actual_result, reference_result);
545    }
546
547    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
548    const fn test_mm_popcnt_epi16() {
549        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
550        let actual_result = _mm_popcnt_epi16(test_data);
551        let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
552        assert_eq_m128i(actual_result, reference_result);
553    }
554
555    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
556    const fn test_mm_maskz_popcnt_epi16() {
557        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
558        let mask = 0xF0;
559        let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
560        let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
561        assert_eq_m128i(actual_result, reference_result);
562    }
563
564    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
565    const fn test_mm_mask_popcnt_epi16() {
566        let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
567        let mask = 0xF0;
568        let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
569        let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
570        assert_eq_m128i(actual_result, reference_result);
571    }
572
573    #[simd_test(enable = "avx512bitalg,avx512f")]
574    const fn test_mm512_popcnt_epi8() {
575        let test_data = _mm512_set_epi8(
576            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
577            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
578            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
579            225, 21, 249, 211, 155, 228, 70,
580        );
581        let actual_result = _mm512_popcnt_epi8(test_data);
582        let reference_result = _mm512_set_epi8(
583            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
584            2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
585            3, 6, 5, 5, 4, 3,
586        );
587        assert_eq_m512i(actual_result, reference_result);
588    }
589
590    #[simd_test(enable = "avx512bitalg,avx512f")]
591    const fn test_mm512_maskz_popcnt_epi8() {
592        let test_data = _mm512_set_epi8(
593            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
594            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
595            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
596            225, 21, 249, 211, 155, 228, 70,
597        );
598        let mask = 0xFF_FF_FF_FF_00_00_00_00;
599        let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
600        let reference_result = _mm512_set_epi8(
601            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
602            2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
603            0, 0, 0, 0, 0, 0,
604        );
605        assert_eq_m512i(actual_result, reference_result);
606    }
607
608    #[simd_test(enable = "avx512bitalg,avx512f")]
609    const fn test_mm512_mask_popcnt_epi8() {
610        let test_data = _mm512_set_epi8(
611            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
612            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
613            140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
614            225, 21, 249, 211, 155, 228, 70,
615        );
616        let mask = 0xFF_FF_FF_FF_00_00_00_00;
617        let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
618        let reference_result = _mm512_set_epi8(
619            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
620            2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
621            251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
622        );
623        assert_eq_m512i(actual_result, reference_result);
624    }
625
626    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
627    const fn test_mm256_popcnt_epi8() {
628        let test_data = _mm256_set_epi8(
629            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
630            217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
631        );
632        let actual_result = _mm256_popcnt_epi8(test_data);
633        let reference_result = _mm256_set_epi8(
634            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
635            2, 4, 4,
636        );
637        assert_eq_m256i(actual_result, reference_result);
638    }
639
640    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
641    const fn test_mm256_maskz_popcnt_epi8() {
642        let test_data = _mm256_set_epi8(
643            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
644            145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
645        );
646        let mask = 0xFF_FF_00_00;
647        let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
648        let reference_result = _mm256_set_epi8(
649            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650            0, 0, 0,
651        );
652        assert_eq_m256i(actual_result, reference_result);
653    }
654
655    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
656    const fn test_mm256_mask_popcnt_epi8() {
657        let test_data = _mm256_set_epi8(
658            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
659            145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
660        );
661        let mask = 0xFF_FF_00_00;
662        let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
663        let reference_result = _mm256_set_epi8(
664            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
665            90, 225, 21, 249, 211, 155, 228, 70,
666        );
667        assert_eq_m256i(actual_result, reference_result);
668    }
669
670    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
671    const fn test_mm_popcnt_epi8() {
672        let test_data = _mm_set_epi8(
673            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
674        );
675        let actual_result = _mm_popcnt_epi8(test_data);
676        let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
677        assert_eq_m128i(actual_result, reference_result);
678    }
679
680    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
681    const fn test_mm_maskz_popcnt_epi8() {
682        let test_data = _mm_set_epi8(
683            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
684        );
685        let mask = 0xFF_00;
686        let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
687        let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
688        assert_eq_m128i(actual_result, reference_result);
689    }
690
691    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
692    const fn test_mm_mask_popcnt_epi8() {
693        let test_data = _mm_set_epi8(
694            0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
695        );
696        let mask = 0xFF_00;
697        let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
698        let reference_result =
699            _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
700        assert_eq_m128i(actual_result, reference_result);
701    }
702
703    #[simd_test(enable = "avx512bitalg,avx512f")]
704    fn test_mm512_bitshuffle_epi64_mask() {
705        let test_indices = _mm512_set_epi8(
706            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
707            8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
708            58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
709        );
710        let test_data = _mm512_setr_epi64(
711            0xFF_FF_FF_FF_00_00_00_00,
712            0xFF_00_FF_00_FF_00_FF_00,
713            0xFF_00_00_00_00_00_00_00,
714            0xAC_00_00_00_00_00_00_00,
715            0xFF_FF_FF_FF_00_00_00_00,
716            0xFF_00_FF_00_FF_00_FF_00,
717            0xFF_00_00_00_00_00_00_00,
718            0xAC_00_00_00_00_00_00_00,
719        );
720        let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
721        let reference_result = 0xF0 << 0
722            | 0x03 << 8
723            | 0xFF << 16
724            | 0xAC << 24
725            | 0xF0 << 32
726            | 0x03 << 40
727            | 0xFF << 48
728            | 0xAC << 56;
729
730        assert_eq!(actual_result, reference_result);
731    }
732
733    #[simd_test(enable = "avx512bitalg,avx512f")]
734    fn test_mm512_mask_bitshuffle_epi64_mask() {
735        let test_indices = _mm512_set_epi8(
736            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
737            8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
738            58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
739        );
740        let test_data = _mm512_setr_epi64(
741            0xFF_FF_FF_FF_00_00_00_00,
742            0xFF_00_FF_00_FF_00_FF_00,
743            0xFF_00_00_00_00_00_00_00,
744            0xAC_00_00_00_00_00_00_00,
745            0xFF_FF_FF_FF_00_00_00_00,
746            0xFF_00_FF_00_FF_00_FF_00,
747            0xFF_00_00_00_00_00_00_00,
748            0xAC_00_00_00_00_00_00_00,
749        );
750        let mask = 0xFF_FF_FF_FF_00_00_00_00;
751        let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
752        let reference_result = 0x00 << 0
753            | 0x00 << 8
754            | 0x00 << 16
755            | 0x00 << 24
756            | 0xF0 << 32
757            | 0x03 << 40
758            | 0xFF << 48
759            | 0xAC << 56;
760
761        assert_eq!(actual_result, reference_result);
762    }
763
764    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
765    fn test_mm256_bitshuffle_epi64_mask() {
766        let test_indices = _mm256_set_epi8(
767            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
768            8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
769        );
770        let test_data = _mm256_setr_epi64x(
771            0xFF_FF_FF_FF_00_00_00_00,
772            0xFF_00_FF_00_FF_00_FF_00,
773            0xFF_00_00_00_00_00_00_00,
774            0xAC_00_00_00_00_00_00_00,
775        );
776        let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
777        let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
778
779        assert_eq!(actual_result, reference_result);
780    }
781
782    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
783    fn test_mm256_mask_bitshuffle_epi64_mask() {
784        let test_indices = _mm256_set_epi8(
785            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
786            8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
787        );
788        let test_data = _mm256_setr_epi64x(
789            0xFF_FF_FF_FF_00_00_00_00,
790            0xFF_00_FF_00_FF_00_FF_00,
791            0xFF_00_00_00_00_00_00_00,
792            0xAC_00_00_00_00_00_00_00,
793        );
794        let mask = 0xFF_FF_00_00;
795        let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
796        let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
797
798        assert_eq!(actual_result, reference_result);
799    }
800
801    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
802    fn test_mm_bitshuffle_epi64_mask() {
803        let test_indices = _mm_set_epi8(
804            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
805        );
806        let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
807        let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
808        let reference_result = 0xFF << 0 | 0xAC << 8;
809
810        assert_eq!(actual_result, reference_result);
811    }
812
813    #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
814    fn test_mm_mask_bitshuffle_epi64_mask() {
815        let test_indices = _mm_set_epi8(
816            63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
817        );
818        let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
819        let mask = 0xFF_00;
820        let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
821        let reference_result = 0x00 << 0 | 0xAC << 8;
822
823        assert_eq!(actual_result, reference_result);
824    }
825}