1use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::simd::i16x8;
14use crate::core_arch::simd::i16x16;
15use crate::core_arch::simd::i16x32;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask8;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask32;
22use crate::core_arch::x86::__mmask64;
23use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
24use crate::mem::transmute;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[allow(improper_ctypes)]
30unsafe extern "C" {
31 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
32 fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
33 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
34 fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
35 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
36 fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
37}
38
39#[inline]
43#[target_feature(enable = "avx512bitalg")]
44#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
45#[cfg_attr(test, assert_instr(vpopcntw))]
46#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
47pub const fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
48 unsafe { transmute(simd_ctpop(a.as_i16x32())) }
49}
50
51#[inline]
58#[target_feature(enable = "avx512bitalg")]
59#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
60#[cfg_attr(test, assert_instr(vpopcntw))]
61#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
62pub const fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
63 unsafe {
64 transmute(simd_select_bitmask(
65 k,
66 simd_ctpop(a.as_i16x32()),
67 i16x32::ZERO,
68 ))
69 }
70}
71
72#[inline]
79#[target_feature(enable = "avx512bitalg")]
80#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
81#[cfg_attr(test, assert_instr(vpopcntw))]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
84 unsafe {
85 transmute(simd_select_bitmask(
86 k,
87 simd_ctpop(a.as_i16x32()),
88 src.as_i16x32(),
89 ))
90 }
91}
92
93#[inline]
97#[target_feature(enable = "avx512bitalg,avx512vl")]
98#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99#[cfg_attr(test, assert_instr(vpopcntw))]
100#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101pub const fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
102 unsafe { transmute(simd_ctpop(a.as_i16x16())) }
103}
104
105#[inline]
112#[target_feature(enable = "avx512bitalg,avx512vl")]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114#[cfg_attr(test, assert_instr(vpopcntw))]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
117 unsafe {
118 transmute(simd_select_bitmask(
119 k,
120 simd_ctpop(a.as_i16x16()),
121 i16x16::ZERO,
122 ))
123 }
124}
125
126#[inline]
133#[target_feature(enable = "avx512bitalg,avx512vl")]
134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
135#[cfg_attr(test, assert_instr(vpopcntw))]
136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
137pub const fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
138 unsafe {
139 transmute(simd_select_bitmask(
140 k,
141 simd_ctpop(a.as_i16x16()),
142 src.as_i16x16(),
143 ))
144 }
145}
146
147#[inline]
151#[target_feature(enable = "avx512bitalg,avx512vl")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpopcntw))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
156 unsafe { transmute(simd_ctpop(a.as_i16x8())) }
157}
158
159#[inline]
166#[target_feature(enable = "avx512bitalg,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpopcntw))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
171 unsafe {
172 transmute(simd_select_bitmask(
173 k,
174 simd_ctpop(a.as_i16x8()),
175 i16x8::ZERO,
176 ))
177 }
178}
179
180#[inline]
187#[target_feature(enable = "avx512bitalg,avx512vl")]
188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
189#[cfg_attr(test, assert_instr(vpopcntw))]
190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
191pub const fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
192 unsafe {
193 transmute(simd_select_bitmask(
194 k,
195 simd_ctpop(a.as_i16x8()),
196 src.as_i16x8(),
197 ))
198 }
199}
200
201#[inline]
205#[target_feature(enable = "avx512bitalg")]
206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
207#[cfg_attr(test, assert_instr(vpopcntb))]
208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
209pub const fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
210 unsafe { transmute(simd_ctpop(a.as_i8x64())) }
211}
212
213#[inline]
220#[target_feature(enable = "avx512bitalg")]
221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
222#[cfg_attr(test, assert_instr(vpopcntb))]
223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
224pub const fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
225 unsafe {
226 transmute(simd_select_bitmask(
227 k,
228 simd_ctpop(a.as_i8x64()),
229 i8x64::ZERO,
230 ))
231 }
232}
233
234#[inline]
241#[target_feature(enable = "avx512bitalg")]
242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
243#[cfg_attr(test, assert_instr(vpopcntb))]
244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
245pub const fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
246 unsafe {
247 transmute(simd_select_bitmask(
248 k,
249 simd_ctpop(a.as_i8x64()),
250 src.as_i8x64(),
251 ))
252 }
253}
254
255#[inline]
259#[target_feature(enable = "avx512bitalg,avx512vl")]
260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
261#[cfg_attr(test, assert_instr(vpopcntb))]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
264 unsafe { transmute(simd_ctpop(a.as_i8x32())) }
265}
266
267#[inline]
274#[target_feature(enable = "avx512bitalg,avx512vl")]
275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
276#[cfg_attr(test, assert_instr(vpopcntb))]
277#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
278pub const fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
279 unsafe {
280 transmute(simd_select_bitmask(
281 k,
282 simd_ctpop(a.as_i8x32()),
283 i8x32::ZERO,
284 ))
285 }
286}
287
288#[inline]
295#[target_feature(enable = "avx512bitalg,avx512vl")]
296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
297#[cfg_attr(test, assert_instr(vpopcntb))]
298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
299pub const fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
300 unsafe {
301 transmute(simd_select_bitmask(
302 k,
303 simd_ctpop(a.as_i8x32()),
304 src.as_i8x32(),
305 ))
306 }
307}
308
309#[inline]
313#[target_feature(enable = "avx512bitalg,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpopcntb))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
318 unsafe { transmute(simd_ctpop(a.as_i8x16())) }
319}
320
321#[inline]
328#[target_feature(enable = "avx512bitalg,avx512vl")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpopcntb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
333 unsafe {
334 transmute(simd_select_bitmask(
335 k,
336 simd_ctpop(a.as_i8x16()),
337 i8x16::ZERO,
338 ))
339 }
340}
341
342#[inline]
349#[target_feature(enable = "avx512bitalg,avx512vl")]
350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
351#[cfg_attr(test, assert_instr(vpopcntb))]
352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
353pub const fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
354 unsafe {
355 transmute(simd_select_bitmask(
356 k,
357 simd_ctpop(a.as_i8x16()),
358 src.as_i8x16(),
359 ))
360 }
361}
362
363#[inline]
369#[target_feature(enable = "avx512bitalg")]
370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
371#[cfg_attr(test, assert_instr(vpshufbitqmb))]
372pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
373 unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
374}
375
376#[inline]
385#[target_feature(enable = "avx512bitalg")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpshufbitqmb))]
388pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
389 unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
390}
391
392#[inline]
398#[target_feature(enable = "avx512bitalg,avx512vl")]
399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
400#[cfg_attr(test, assert_instr(vpshufbitqmb))]
401pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
402 unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
403}
404
405#[inline]
414#[target_feature(enable = "avx512bitalg,avx512vl")]
415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
416#[cfg_attr(test, assert_instr(vpshufbitqmb))]
417pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
418 unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
419}
420
421#[inline]
427#[target_feature(enable = "avx512bitalg,avx512vl")]
428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429#[cfg_attr(test, assert_instr(vpshufbitqmb))]
430pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
431 unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
432}
433
434#[inline]
443#[target_feature(enable = "avx512bitalg,avx512vl")]
444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
445#[cfg_attr(test, assert_instr(vpshufbitqmb))]
446pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
447 unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
448}
449
450#[cfg(test)]
451mod tests {
452 #![allow(overflowing_literals)]
456
457 use crate::core_arch::assert_eq_const as assert_eq;
458 use stdarch_test::simd_test;
459
460 use crate::core_arch::x86::*;
461
462 #[simd_test(enable = "avx512bitalg,avx512f")]
463 const fn test_mm512_popcnt_epi16() {
464 let test_data = _mm512_set_epi16(
465 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
466 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
467 1024, 2048,
468 );
469 let actual_result = _mm512_popcnt_epi16(test_data);
470 let reference_result = _mm512_set_epi16(
471 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
472 1, 1, 1, 1, 1, 1,
473 );
474 assert_eq_m512i(actual_result, reference_result);
475 }
476
477 #[simd_test(enable = "avx512bitalg,avx512f")]
478 const fn test_mm512_maskz_popcnt_epi16() {
479 let test_data = _mm512_set_epi16(
480 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
481 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
482 1024, 2048,
483 );
484 let mask = 0xFF_FF_00_00;
485 let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
486 let reference_result = _mm512_set_epi16(
487 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0,
489 );
490 assert_eq_m512i(actual_result, reference_result);
491 }
492
493 #[simd_test(enable = "avx512bitalg,avx512f")]
494 const fn test_mm512_mask_popcnt_epi16() {
495 let test_data = _mm512_set_epi16(
496 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
497 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
498 1024, 2048,
499 );
500 let mask = 0xFF_FF_00_00;
501 let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
502 let reference_result = _mm512_set_epi16(
503 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
504 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
505 );
506 assert_eq_m512i(actual_result, reference_result);
507 }
508
509 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
510 const fn test_mm256_popcnt_epi16() {
511 let test_data = _mm256_set_epi16(
512 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
513 0x3F_FF, 0x7F_FF,
514 );
515 let actual_result = _mm256_popcnt_epi16(test_data);
516 let reference_result =
517 _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
518 assert_eq_m256i(actual_result, reference_result);
519 }
520
521 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
522 const fn test_mm256_maskz_popcnt_epi16() {
523 let test_data = _mm256_set_epi16(
524 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
525 0x3F_FF, 0x7F_FF,
526 );
527 let mask = 0xFF_00;
528 let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
529 let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
530 assert_eq_m256i(actual_result, reference_result);
531 }
532
533 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
534 const fn test_mm256_mask_popcnt_epi16() {
535 let test_data = _mm256_set_epi16(
536 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
537 0x3F_FF, 0x7F_FF,
538 );
539 let mask = 0xFF_00;
540 let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
541 let reference_result = _mm256_set_epi16(
542 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
543 );
544 assert_eq_m256i(actual_result, reference_result);
545 }
546
547 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
548 const fn test_mm_popcnt_epi16() {
549 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
550 let actual_result = _mm_popcnt_epi16(test_data);
551 let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
552 assert_eq_m128i(actual_result, reference_result);
553 }
554
555 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
556 const fn test_mm_maskz_popcnt_epi16() {
557 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
558 let mask = 0xF0;
559 let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
560 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
561 assert_eq_m128i(actual_result, reference_result);
562 }
563
564 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
565 const fn test_mm_mask_popcnt_epi16() {
566 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
567 let mask = 0xF0;
568 let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
569 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
570 assert_eq_m128i(actual_result, reference_result);
571 }
572
573 #[simd_test(enable = "avx512bitalg,avx512f")]
574 const fn test_mm512_popcnt_epi8() {
575 let test_data = _mm512_set_epi8(
576 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
577 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
578 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
579 225, 21, 249, 211, 155, 228, 70,
580 );
581 let actual_result = _mm512_popcnt_epi8(test_data);
582 let reference_result = _mm512_set_epi8(
583 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
584 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
585 3, 6, 5, 5, 4, 3,
586 );
587 assert_eq_m512i(actual_result, reference_result);
588 }
589
590 #[simd_test(enable = "avx512bitalg,avx512f")]
591 const fn test_mm512_maskz_popcnt_epi8() {
592 let test_data = _mm512_set_epi8(
593 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
594 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
595 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
596 225, 21, 249, 211, 155, 228, 70,
597 );
598 let mask = 0xFF_FF_FF_FF_00_00_00_00;
599 let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
600 let reference_result = _mm512_set_epi8(
601 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
602 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
603 0, 0, 0, 0, 0, 0,
604 );
605 assert_eq_m512i(actual_result, reference_result);
606 }
607
608 #[simd_test(enable = "avx512bitalg,avx512f")]
609 const fn test_mm512_mask_popcnt_epi8() {
610 let test_data = _mm512_set_epi8(
611 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
612 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
613 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
614 225, 21, 249, 211, 155, 228, 70,
615 );
616 let mask = 0xFF_FF_FF_FF_00_00_00_00;
617 let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
618 let reference_result = _mm512_set_epi8(
619 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
620 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
621 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
622 );
623 assert_eq_m512i(actual_result, reference_result);
624 }
625
626 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
627 const fn test_mm256_popcnt_epi8() {
628 let test_data = _mm256_set_epi8(
629 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
630 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
631 );
632 let actual_result = _mm256_popcnt_epi8(test_data);
633 let reference_result = _mm256_set_epi8(
634 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
635 2, 4, 4,
636 );
637 assert_eq_m256i(actual_result, reference_result);
638 }
639
640 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
641 const fn test_mm256_maskz_popcnt_epi8() {
642 let test_data = _mm256_set_epi8(
643 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
644 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
645 );
646 let mask = 0xFF_FF_00_00;
647 let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
648 let reference_result = _mm256_set_epi8(
649 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 0, 0, 0,
651 );
652 assert_eq_m256i(actual_result, reference_result);
653 }
654
655 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
656 const fn test_mm256_mask_popcnt_epi8() {
657 let test_data = _mm256_set_epi8(
658 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
659 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
660 );
661 let mask = 0xFF_FF_00_00;
662 let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
663 let reference_result = _mm256_set_epi8(
664 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
665 90, 225, 21, 249, 211, 155, 228, 70,
666 );
667 assert_eq_m256i(actual_result, reference_result);
668 }
669
670 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
671 const fn test_mm_popcnt_epi8() {
672 let test_data = _mm_set_epi8(
673 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
674 );
675 let actual_result = _mm_popcnt_epi8(test_data);
676 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
677 assert_eq_m128i(actual_result, reference_result);
678 }
679
680 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
681 const fn test_mm_maskz_popcnt_epi8() {
682 let test_data = _mm_set_epi8(
683 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
684 );
685 let mask = 0xFF_00;
686 let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
687 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
688 assert_eq_m128i(actual_result, reference_result);
689 }
690
691 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
692 const fn test_mm_mask_popcnt_epi8() {
693 let test_data = _mm_set_epi8(
694 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
695 );
696 let mask = 0xFF_00;
697 let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
698 let reference_result =
699 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
700 assert_eq_m128i(actual_result, reference_result);
701 }
702
703 #[simd_test(enable = "avx512bitalg,avx512f")]
704 fn test_mm512_bitshuffle_epi64_mask() {
705 let test_indices = _mm512_set_epi8(
706 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
707 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
708 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
709 );
710 let test_data = _mm512_setr_epi64(
711 0xFF_FF_FF_FF_00_00_00_00,
712 0xFF_00_FF_00_FF_00_FF_00,
713 0xFF_00_00_00_00_00_00_00,
714 0xAC_00_00_00_00_00_00_00,
715 0xFF_FF_FF_FF_00_00_00_00,
716 0xFF_00_FF_00_FF_00_FF_00,
717 0xFF_00_00_00_00_00_00_00,
718 0xAC_00_00_00_00_00_00_00,
719 );
720 let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
721 let reference_result = 0xF0 << 0
722 | 0x03 << 8
723 | 0xFF << 16
724 | 0xAC << 24
725 | 0xF0 << 32
726 | 0x03 << 40
727 | 0xFF << 48
728 | 0xAC << 56;
729
730 assert_eq!(actual_result, reference_result);
731 }
732
733 #[simd_test(enable = "avx512bitalg,avx512f")]
734 fn test_mm512_mask_bitshuffle_epi64_mask() {
735 let test_indices = _mm512_set_epi8(
736 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
737 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
738 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
739 );
740 let test_data = _mm512_setr_epi64(
741 0xFF_FF_FF_FF_00_00_00_00,
742 0xFF_00_FF_00_FF_00_FF_00,
743 0xFF_00_00_00_00_00_00_00,
744 0xAC_00_00_00_00_00_00_00,
745 0xFF_FF_FF_FF_00_00_00_00,
746 0xFF_00_FF_00_FF_00_FF_00,
747 0xFF_00_00_00_00_00_00_00,
748 0xAC_00_00_00_00_00_00_00,
749 );
750 let mask = 0xFF_FF_FF_FF_00_00_00_00;
751 let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
752 let reference_result = 0x00 << 0
753 | 0x00 << 8
754 | 0x00 << 16
755 | 0x00 << 24
756 | 0xF0 << 32
757 | 0x03 << 40
758 | 0xFF << 48
759 | 0xAC << 56;
760
761 assert_eq!(actual_result, reference_result);
762 }
763
764 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
765 fn test_mm256_bitshuffle_epi64_mask() {
766 let test_indices = _mm256_set_epi8(
767 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
768 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
769 );
770 let test_data = _mm256_setr_epi64x(
771 0xFF_FF_FF_FF_00_00_00_00,
772 0xFF_00_FF_00_FF_00_FF_00,
773 0xFF_00_00_00_00_00_00_00,
774 0xAC_00_00_00_00_00_00_00,
775 );
776 let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
777 let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
778
779 assert_eq!(actual_result, reference_result);
780 }
781
782 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
783 fn test_mm256_mask_bitshuffle_epi64_mask() {
784 let test_indices = _mm256_set_epi8(
785 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
786 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
787 );
788 let test_data = _mm256_setr_epi64x(
789 0xFF_FF_FF_FF_00_00_00_00,
790 0xFF_00_FF_00_FF_00_FF_00,
791 0xFF_00_00_00_00_00_00_00,
792 0xAC_00_00_00_00_00_00_00,
793 );
794 let mask = 0xFF_FF_00_00;
795 let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
796 let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
797
798 assert_eq!(actual_result, reference_result);
799 }
800
801 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
802 fn test_mm_bitshuffle_epi64_mask() {
803 let test_indices = _mm_set_epi8(
804 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
805 );
806 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
807 let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
808 let reference_result = 0xFF << 0 | 0xAC << 8;
809
810 assert_eq!(actual_result, reference_result);
811 }
812
813 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
814 fn test_mm_mask_bitshuffle_epi64_mask() {
815 let test_indices = _mm_set_epi8(
816 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
817 );
818 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
819 let mask = 0xFF_00;
820 let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
821 let reference_result = 0x00 << 0 | 0xAC << 8;
822
823 assert_eq!(actual_result, reference_result);
824 }
825}