1use crate::core_arch::simd::i16x16;
11use crate::core_arch::simd::i16x32;
12use crate::core_arch::simd::i16x8;
13use crate::core_arch::simd::i8x16;
14use crate::core_arch::simd::i8x32;
15use crate::core_arch::simd::i8x64;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask16;
20use crate::core_arch::x86::__mmask32;
21use crate::core_arch::x86::__mmask64;
22use crate::core_arch::x86::__mmask8;
23use crate::core_arch::x86::m128iExt;
24use crate::core_arch::x86::m256iExt;
25use crate::core_arch::x86::m512iExt;
26use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
27use crate::mem::transmute;
28
29#[cfg(test)]
30use stdarch_test::assert_instr;
31
32#[allow(improper_ctypes)]
33extern "C" {
34 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
35 fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
36 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
37 fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
38 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
39 fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
40}
41
42#[inline]
46#[target_feature(enable = "avx512bitalg")]
47#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
48#[cfg_attr(test, assert_instr(vpopcntw))]
49pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
50 transmute(simd_ctpop(a.as_i16x32()))
51}
52
53#[inline]
60#[target_feature(enable = "avx512bitalg")]
61#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
62#[cfg_attr(test, assert_instr(vpopcntw))]
63pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
64 transmute(simd_select_bitmask(
65 k,
66 simd_ctpop(a.as_i16x32()),
67 i16x32::ZERO,
68 ))
69}
70
71#[inline]
78#[target_feature(enable = "avx512bitalg")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpopcntw))]
81pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
82 transmute(simd_select_bitmask(
83 k,
84 simd_ctpop(a.as_i16x32()),
85 src.as_i16x32(),
86 ))
87}
88
89#[inline]
93#[target_feature(enable = "avx512bitalg,avx512vl")]
94#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
95#[cfg_attr(test, assert_instr(vpopcntw))]
96pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
97 transmute(simd_ctpop(a.as_i16x16()))
98}
99
100#[inline]
107#[target_feature(enable = "avx512bitalg,avx512vl")]
108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
109#[cfg_attr(test, assert_instr(vpopcntw))]
110pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
111 transmute(simd_select_bitmask(
112 k,
113 simd_ctpop(a.as_i16x16()),
114 i16x16::ZERO,
115 ))
116}
117
118#[inline]
125#[target_feature(enable = "avx512bitalg,avx512vl")]
126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
127#[cfg_attr(test, assert_instr(vpopcntw))]
128pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
129 transmute(simd_select_bitmask(
130 k,
131 simd_ctpop(a.as_i16x16()),
132 src.as_i16x16(),
133 ))
134}
135
136#[inline]
140#[target_feature(enable = "avx512bitalg,avx512vl")]
141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142#[cfg_attr(test, assert_instr(vpopcntw))]
143pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
144 transmute(simd_ctpop(a.as_i16x8()))
145}
146
147#[inline]
154#[target_feature(enable = "avx512bitalg,avx512vl")]
155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
156#[cfg_attr(test, assert_instr(vpopcntw))]
157pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
158 transmute(simd_select_bitmask(
159 k,
160 simd_ctpop(a.as_i16x8()),
161 i16x8::ZERO,
162 ))
163}
164
165#[inline]
172#[target_feature(enable = "avx512bitalg,avx512vl")]
173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
174#[cfg_attr(test, assert_instr(vpopcntw))]
175pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
176 transmute(simd_select_bitmask(
177 k,
178 simd_ctpop(a.as_i16x8()),
179 src.as_i16x8(),
180 ))
181}
182
183#[inline]
187#[target_feature(enable = "avx512bitalg")]
188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
189#[cfg_attr(test, assert_instr(vpopcntb))]
190pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
191 transmute(simd_ctpop(a.as_i8x64()))
192}
193
194#[inline]
201#[target_feature(enable = "avx512bitalg")]
202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
203#[cfg_attr(test, assert_instr(vpopcntb))]
204pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
205 transmute(simd_select_bitmask(
206 k,
207 simd_ctpop(a.as_i8x64()),
208 i8x64::ZERO,
209 ))
210}
211
212#[inline]
219#[target_feature(enable = "avx512bitalg")]
220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
221#[cfg_attr(test, assert_instr(vpopcntb))]
222pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
223 transmute(simd_select_bitmask(
224 k,
225 simd_ctpop(a.as_i8x64()),
226 src.as_i8x64(),
227 ))
228}
229
230#[inline]
234#[target_feature(enable = "avx512bitalg,avx512vl")]
235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
236#[cfg_attr(test, assert_instr(vpopcntb))]
237pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
238 transmute(simd_ctpop(a.as_i8x32()))
239}
240
241#[inline]
248#[target_feature(enable = "avx512bitalg,avx512vl")]
249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
250#[cfg_attr(test, assert_instr(vpopcntb))]
251pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
252 transmute(simd_select_bitmask(
253 k,
254 simd_ctpop(a.as_i8x32()),
255 i8x32::ZERO,
256 ))
257}
258
259#[inline]
266#[target_feature(enable = "avx512bitalg,avx512vl")]
267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
268#[cfg_attr(test, assert_instr(vpopcntb))]
269pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
270 transmute(simd_select_bitmask(
271 k,
272 simd_ctpop(a.as_i8x32()),
273 src.as_i8x32(),
274 ))
275}
276
277#[inline]
281#[target_feature(enable = "avx512bitalg,avx512vl")]
282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
283#[cfg_attr(test, assert_instr(vpopcntb))]
284pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
285 transmute(simd_ctpop(a.as_i8x16()))
286}
287
288#[inline]
295#[target_feature(enable = "avx512bitalg,avx512vl")]
296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
297#[cfg_attr(test, assert_instr(vpopcntb))]
298pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
299 transmute(simd_select_bitmask(
300 k,
301 simd_ctpop(a.as_i8x16()),
302 i8x16::ZERO,
303 ))
304}
305
306#[inline]
313#[target_feature(enable = "avx512bitalg,avx512vl")]
314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
315#[cfg_attr(test, assert_instr(vpopcntb))]
316pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
317 transmute(simd_select_bitmask(
318 k,
319 simd_ctpop(a.as_i8x16()),
320 src.as_i8x16(),
321 ))
322}
323
324#[inline]
330#[target_feature(enable = "avx512bitalg")]
331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
332#[cfg_attr(test, assert_instr(vpshufbitqmb))]
333pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
334 bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0)
335}
336
337#[inline]
346#[target_feature(enable = "avx512bitalg")]
347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
348#[cfg_attr(test, assert_instr(vpshufbitqmb))]
349pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
350 bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k)
351}
352
353#[inline]
359#[target_feature(enable = "avx512bitalg,avx512vl")]
360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
361#[cfg_attr(test, assert_instr(vpshufbitqmb))]
362pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
363 bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0)
364}
365
366#[inline]
375#[target_feature(enable = "avx512bitalg,avx512vl")]
376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
377#[cfg_attr(test, assert_instr(vpshufbitqmb))]
378pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
379 bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k)
380}
381
382#[inline]
388#[target_feature(enable = "avx512bitalg,avx512vl")]
389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
390#[cfg_attr(test, assert_instr(vpshufbitqmb))]
391pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
392 bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0)
393}
394
395#[inline]
404#[target_feature(enable = "avx512bitalg,avx512vl")]
405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
406#[cfg_attr(test, assert_instr(vpshufbitqmb))]
407pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
408 bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k)
409}
410
411#[cfg(test)]
412mod tests {
413 #![allow(overflowing_literals)]
417
418 use stdarch_test::simd_test;
419
420 use crate::core_arch::x86::*;
421
422 #[simd_test(enable = "avx512bitalg,avx512f")]
423 unsafe fn test_mm512_popcnt_epi16() {
424 let test_data = _mm512_set_epi16(
425 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
426 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
427 1024, 2048,
428 );
429 let actual_result = _mm512_popcnt_epi16(test_data);
430 let reference_result = _mm512_set_epi16(
431 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
432 1, 1, 1, 1, 1, 1,
433 );
434 assert_eq_m512i(actual_result, reference_result);
435 }
436
437 #[simd_test(enable = "avx512bitalg,avx512f")]
438 unsafe fn test_mm512_maskz_popcnt_epi16() {
439 let test_data = _mm512_set_epi16(
440 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
441 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
442 1024, 2048,
443 );
444 let mask = 0xFF_FF_00_00;
445 let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
446 let reference_result = _mm512_set_epi16(
447 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0,
449 );
450 assert_eq_m512i(actual_result, reference_result);
451 }
452
453 #[simd_test(enable = "avx512bitalg,avx512f")]
454 unsafe fn test_mm512_mask_popcnt_epi16() {
455 let test_data = _mm512_set_epi16(
456 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
457 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
458 1024, 2048,
459 );
460 let mask = 0xFF_FF_00_00;
461 let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
462 let reference_result = _mm512_set_epi16(
463 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
464 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
465 );
466 assert_eq_m512i(actual_result, reference_result);
467 }
468
469 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
470 unsafe fn test_mm256_popcnt_epi16() {
471 let test_data = _mm256_set_epi16(
472 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
473 0x3F_FF, 0x7F_FF,
474 );
475 let actual_result = _mm256_popcnt_epi16(test_data);
476 let reference_result =
477 _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
478 assert_eq_m256i(actual_result, reference_result);
479 }
480
481 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
482 unsafe fn test_mm256_maskz_popcnt_epi16() {
483 let test_data = _mm256_set_epi16(
484 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
485 0x3F_FF, 0x7F_FF,
486 );
487 let mask = 0xFF_00;
488 let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
489 let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
490 assert_eq_m256i(actual_result, reference_result);
491 }
492
493 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
494 unsafe fn test_mm256_mask_popcnt_epi16() {
495 let test_data = _mm256_set_epi16(
496 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
497 0x3F_FF, 0x7F_FF,
498 );
499 let mask = 0xFF_00;
500 let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
501 let reference_result = _mm256_set_epi16(
502 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
503 );
504 assert_eq_m256i(actual_result, reference_result);
505 }
506
507 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
508 unsafe fn test_mm_popcnt_epi16() {
509 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
510 let actual_result = _mm_popcnt_epi16(test_data);
511 let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
512 assert_eq_m128i(actual_result, reference_result);
513 }
514
515 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
516 unsafe fn test_mm_maskz_popcnt_epi16() {
517 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
518 let mask = 0xF0;
519 let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
520 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
521 assert_eq_m128i(actual_result, reference_result);
522 }
523
524 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
525 unsafe fn test_mm_mask_popcnt_epi16() {
526 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
527 let mask = 0xF0;
528 let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
529 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
530 assert_eq_m128i(actual_result, reference_result);
531 }
532
533 #[simd_test(enable = "avx512bitalg,avx512f")]
534 unsafe fn test_mm512_popcnt_epi8() {
535 let test_data = _mm512_set_epi8(
536 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
537 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
538 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
539 225, 21, 249, 211, 155, 228, 70,
540 );
541 let actual_result = _mm512_popcnt_epi8(test_data);
542 let reference_result = _mm512_set_epi8(
543 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
544 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
545 3, 6, 5, 5, 4, 3,
546 );
547 assert_eq_m512i(actual_result, reference_result);
548 }
549
550 #[simd_test(enable = "avx512bitalg,avx512f")]
551 unsafe fn test_mm512_maskz_popcnt_epi8() {
552 let test_data = _mm512_set_epi8(
553 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
554 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
555 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
556 225, 21, 249, 211, 155, 228, 70,
557 );
558 let mask = 0xFF_FF_FF_FF_00_00_00_00;
559 let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
560 let reference_result = _mm512_set_epi8(
561 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
562 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0,
564 );
565 assert_eq_m512i(actual_result, reference_result);
566 }
567
568 #[simd_test(enable = "avx512bitalg,avx512f")]
569 unsafe fn test_mm512_mask_popcnt_epi8() {
570 let test_data = _mm512_set_epi8(
571 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
572 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
573 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
574 225, 21, 249, 211, 155, 228, 70,
575 );
576 let mask = 0xFF_FF_FF_FF_00_00_00_00;
577 let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
578 let reference_result = _mm512_set_epi8(
579 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
580 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
581 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
582 );
583 assert_eq_m512i(actual_result, reference_result);
584 }
585
586 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
587 unsafe fn test_mm256_popcnt_epi8() {
588 let test_data = _mm256_set_epi8(
589 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
590 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
591 );
592 let actual_result = _mm256_popcnt_epi8(test_data);
593 let reference_result = _mm256_set_epi8(
594 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
595 2, 4, 4,
596 );
597 assert_eq_m256i(actual_result, reference_result);
598 }
599
600 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
601 unsafe fn test_mm256_maskz_popcnt_epi8() {
602 let test_data = _mm256_set_epi8(
603 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
604 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
605 );
606 let mask = 0xFF_FF_00_00;
607 let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
608 let reference_result = _mm256_set_epi8(
609 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 0, 0, 0,
611 );
612 assert_eq_m256i(actual_result, reference_result);
613 }
614
615 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
616 unsafe fn test_mm256_mask_popcnt_epi8() {
617 let test_data = _mm256_set_epi8(
618 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
619 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
620 );
621 let mask = 0xFF_FF_00_00;
622 let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
623 let reference_result = _mm256_set_epi8(
624 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
625 90, 225, 21, 249, 211, 155, 228, 70,
626 );
627 assert_eq_m256i(actual_result, reference_result);
628 }
629
630 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
631 unsafe fn test_mm_popcnt_epi8() {
632 let test_data = _mm_set_epi8(
633 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
634 );
635 let actual_result = _mm_popcnt_epi8(test_data);
636 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
637 assert_eq_m128i(actual_result, reference_result);
638 }
639
640 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
641 unsafe fn test_mm_maskz_popcnt_epi8() {
642 let test_data = _mm_set_epi8(
643 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
644 );
645 let mask = 0xFF_00;
646 let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
647 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
648 assert_eq_m128i(actual_result, reference_result);
649 }
650
651 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
652 unsafe fn test_mm_mask_popcnt_epi8() {
653 let test_data = _mm_set_epi8(
654 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
655 );
656 let mask = 0xFF_00;
657 let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
658 let reference_result =
659 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
660 assert_eq_m128i(actual_result, reference_result);
661 }
662
663 #[simd_test(enable = "avx512bitalg,avx512f")]
664 unsafe fn test_mm512_bitshuffle_epi64_mask() {
665 let test_indices = _mm512_set_epi8(
666 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
667 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
668 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
669 );
670 let test_data = _mm512_setr_epi64(
671 0xFF_FF_FF_FF_00_00_00_00,
672 0xFF_00_FF_00_FF_00_FF_00,
673 0xFF_00_00_00_00_00_00_00,
674 0xAC_00_00_00_00_00_00_00,
675 0xFF_FF_FF_FF_00_00_00_00,
676 0xFF_00_FF_00_FF_00_FF_00,
677 0xFF_00_00_00_00_00_00_00,
678 0xAC_00_00_00_00_00_00_00,
679 );
680 let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
681 let reference_result = 0xF0 << 0
682 | 0x03 << 8
683 | 0xFF << 16
684 | 0xAC << 24
685 | 0xF0 << 32
686 | 0x03 << 40
687 | 0xFF << 48
688 | 0xAC << 56;
689
690 assert_eq!(actual_result, reference_result);
691 }
692
693 #[simd_test(enable = "avx512bitalg,avx512f")]
694 unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
695 let test_indices = _mm512_set_epi8(
696 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
697 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
698 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
699 );
700 let test_data = _mm512_setr_epi64(
701 0xFF_FF_FF_FF_00_00_00_00,
702 0xFF_00_FF_00_FF_00_FF_00,
703 0xFF_00_00_00_00_00_00_00,
704 0xAC_00_00_00_00_00_00_00,
705 0xFF_FF_FF_FF_00_00_00_00,
706 0xFF_00_FF_00_FF_00_FF_00,
707 0xFF_00_00_00_00_00_00_00,
708 0xAC_00_00_00_00_00_00_00,
709 );
710 let mask = 0xFF_FF_FF_FF_00_00_00_00;
711 let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
712 let reference_result = 0x00 << 0
713 | 0x00 << 8
714 | 0x00 << 16
715 | 0x00 << 24
716 | 0xF0 << 32
717 | 0x03 << 40
718 | 0xFF << 48
719 | 0xAC << 56;
720
721 assert_eq!(actual_result, reference_result);
722 }
723
724 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
725 unsafe fn test_mm256_bitshuffle_epi64_mask() {
726 let test_indices = _mm256_set_epi8(
727 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
728 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
729 );
730 let test_data = _mm256_setr_epi64x(
731 0xFF_FF_FF_FF_00_00_00_00,
732 0xFF_00_FF_00_FF_00_FF_00,
733 0xFF_00_00_00_00_00_00_00,
734 0xAC_00_00_00_00_00_00_00,
735 );
736 let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
737 let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
738
739 assert_eq!(actual_result, reference_result);
740 }
741
742 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
743 unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
744 let test_indices = _mm256_set_epi8(
745 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
746 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
747 );
748 let test_data = _mm256_setr_epi64x(
749 0xFF_FF_FF_FF_00_00_00_00,
750 0xFF_00_FF_00_FF_00_FF_00,
751 0xFF_00_00_00_00_00_00_00,
752 0xAC_00_00_00_00_00_00_00,
753 );
754 let mask = 0xFF_FF_00_00;
755 let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
756 let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
757
758 assert_eq!(actual_result, reference_result);
759 }
760
761 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
762 unsafe fn test_mm_bitshuffle_epi64_mask() {
763 let test_indices = _mm_set_epi8(
764 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
765 );
766 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
767 let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
768 let reference_result = 0xFF << 0 | 0xAC << 8;
769
770 assert_eq!(actual_result, reference_result);
771 }
772
773 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
774 unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
775 let test_indices = _mm_set_epi8(
776 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
777 );
778 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
779 let mask = 0xFF_00;
780 let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
781 let reference_result = 0x00 << 0 | 0xAC << 8;
782
783 assert_eq!(actual_result, reference_result);
784 }
785}