1use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::simd::i16x8;
14use crate::core_arch::simd::i16x16;
15use crate::core_arch::simd::i16x32;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask8;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask32;
22use crate::core_arch::x86::__mmask64;
23use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
24use crate::mem::transmute;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[allow(improper_ctypes)]
30unsafe extern "C" {
31 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
32 fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
33 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
34 fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
35 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
36 fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
37}
38
39#[inline]
43#[target_feature(enable = "avx512bitalg")]
44#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
45#[cfg_attr(test, assert_instr(vpopcntw))]
46pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
47 unsafe { transmute(simd_ctpop(a.as_i16x32())) }
48}
49
50#[inline]
57#[target_feature(enable = "avx512bitalg")]
58#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
59#[cfg_attr(test, assert_instr(vpopcntw))]
60pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
61 unsafe {
62 transmute(simd_select_bitmask(
63 k,
64 simd_ctpop(a.as_i16x32()),
65 i16x32::ZERO,
66 ))
67 }
68}
69
70#[inline]
77#[target_feature(enable = "avx512bitalg")]
78#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
79#[cfg_attr(test, assert_instr(vpopcntw))]
80pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
81 unsafe {
82 transmute(simd_select_bitmask(
83 k,
84 simd_ctpop(a.as_i16x32()),
85 src.as_i16x32(),
86 ))
87 }
88}
89
90#[inline]
94#[target_feature(enable = "avx512bitalg,avx512vl")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpopcntw))]
97pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
98 unsafe { transmute(simd_ctpop(a.as_i16x16())) }
99}
100
101#[inline]
108#[target_feature(enable = "avx512bitalg,avx512vl")]
109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
110#[cfg_attr(test, assert_instr(vpopcntw))]
111pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
112 unsafe {
113 transmute(simd_select_bitmask(
114 k,
115 simd_ctpop(a.as_i16x16()),
116 i16x16::ZERO,
117 ))
118 }
119}
120
121#[inline]
128#[target_feature(enable = "avx512bitalg,avx512vl")]
129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130#[cfg_attr(test, assert_instr(vpopcntw))]
131pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
132 unsafe {
133 transmute(simd_select_bitmask(
134 k,
135 simd_ctpop(a.as_i16x16()),
136 src.as_i16x16(),
137 ))
138 }
139}
140
141#[inline]
145#[target_feature(enable = "avx512bitalg,avx512vl")]
146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
147#[cfg_attr(test, assert_instr(vpopcntw))]
148pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
149 unsafe { transmute(simd_ctpop(a.as_i16x8())) }
150}
151
152#[inline]
159#[target_feature(enable = "avx512bitalg,avx512vl")]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161#[cfg_attr(test, assert_instr(vpopcntw))]
162pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
163 unsafe {
164 transmute(simd_select_bitmask(
165 k,
166 simd_ctpop(a.as_i16x8()),
167 i16x8::ZERO,
168 ))
169 }
170}
171
172#[inline]
179#[target_feature(enable = "avx512bitalg,avx512vl")]
180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
181#[cfg_attr(test, assert_instr(vpopcntw))]
182pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
183 unsafe {
184 transmute(simd_select_bitmask(
185 k,
186 simd_ctpop(a.as_i16x8()),
187 src.as_i16x8(),
188 ))
189 }
190}
191
192#[inline]
196#[target_feature(enable = "avx512bitalg")]
197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198#[cfg_attr(test, assert_instr(vpopcntb))]
199pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
200 unsafe { transmute(simd_ctpop(a.as_i8x64())) }
201}
202
203#[inline]
210#[target_feature(enable = "avx512bitalg")]
211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
212#[cfg_attr(test, assert_instr(vpopcntb))]
213pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
214 unsafe {
215 transmute(simd_select_bitmask(
216 k,
217 simd_ctpop(a.as_i8x64()),
218 i8x64::ZERO,
219 ))
220 }
221}
222
223#[inline]
230#[target_feature(enable = "avx512bitalg")]
231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
232#[cfg_attr(test, assert_instr(vpopcntb))]
233pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
234 unsafe {
235 transmute(simd_select_bitmask(
236 k,
237 simd_ctpop(a.as_i8x64()),
238 src.as_i8x64(),
239 ))
240 }
241}
242
243#[inline]
247#[target_feature(enable = "avx512bitalg,avx512vl")]
248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
249#[cfg_attr(test, assert_instr(vpopcntb))]
250pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
251 unsafe { transmute(simd_ctpop(a.as_i8x32())) }
252}
253
254#[inline]
261#[target_feature(enable = "avx512bitalg,avx512vl")]
262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
263#[cfg_attr(test, assert_instr(vpopcntb))]
264pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
265 unsafe {
266 transmute(simd_select_bitmask(
267 k,
268 simd_ctpop(a.as_i8x32()),
269 i8x32::ZERO,
270 ))
271 }
272}
273
274#[inline]
281#[target_feature(enable = "avx512bitalg,avx512vl")]
282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
283#[cfg_attr(test, assert_instr(vpopcntb))]
284pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
285 unsafe {
286 transmute(simd_select_bitmask(
287 k,
288 simd_ctpop(a.as_i8x32()),
289 src.as_i8x32(),
290 ))
291 }
292}
293
294#[inline]
298#[target_feature(enable = "avx512bitalg,avx512vl")]
299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
300#[cfg_attr(test, assert_instr(vpopcntb))]
301pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
302 unsafe { transmute(simd_ctpop(a.as_i8x16())) }
303}
304
305#[inline]
312#[target_feature(enable = "avx512bitalg,avx512vl")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpopcntb))]
315pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
316 unsafe {
317 transmute(simd_select_bitmask(
318 k,
319 simd_ctpop(a.as_i8x16()),
320 i8x16::ZERO,
321 ))
322 }
323}
324
325#[inline]
332#[target_feature(enable = "avx512bitalg,avx512vl")]
333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
334#[cfg_attr(test, assert_instr(vpopcntb))]
335pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
336 unsafe {
337 transmute(simd_select_bitmask(
338 k,
339 simd_ctpop(a.as_i8x16()),
340 src.as_i8x16(),
341 ))
342 }
343}
344
345#[inline]
351#[target_feature(enable = "avx512bitalg")]
352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
353#[cfg_attr(test, assert_instr(vpshufbitqmb))]
354pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
355 unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
356}
357
358#[inline]
367#[target_feature(enable = "avx512bitalg")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369#[cfg_attr(test, assert_instr(vpshufbitqmb))]
370pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
371 unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
372}
373
374#[inline]
380#[target_feature(enable = "avx512bitalg,avx512vl")]
381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
382#[cfg_attr(test, assert_instr(vpshufbitqmb))]
383pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
384 unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
385}
386
387#[inline]
396#[target_feature(enable = "avx512bitalg,avx512vl")]
397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
398#[cfg_attr(test, assert_instr(vpshufbitqmb))]
399pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
400 unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
401}
402
403#[inline]
409#[target_feature(enable = "avx512bitalg,avx512vl")]
410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
411#[cfg_attr(test, assert_instr(vpshufbitqmb))]
412pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
413 unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
414}
415
416#[inline]
425#[target_feature(enable = "avx512bitalg,avx512vl")]
426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
427#[cfg_attr(test, assert_instr(vpshufbitqmb))]
428pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
429 unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
430}
431
432#[cfg(test)]
433mod tests {
434 #![allow(overflowing_literals)]
438
439 use stdarch_test::simd_test;
440
441 use crate::core_arch::x86::*;
442
443 #[simd_test(enable = "avx512bitalg,avx512f")]
444 unsafe fn test_mm512_popcnt_epi16() {
445 let test_data = _mm512_set_epi16(
446 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
447 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
448 1024, 2048,
449 );
450 let actual_result = _mm512_popcnt_epi16(test_data);
451 let reference_result = _mm512_set_epi16(
452 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
453 1, 1, 1, 1, 1, 1,
454 );
455 assert_eq_m512i(actual_result, reference_result);
456 }
457
458 #[simd_test(enable = "avx512bitalg,avx512f")]
459 unsafe fn test_mm512_maskz_popcnt_epi16() {
460 let test_data = _mm512_set_epi16(
461 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
462 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
463 1024, 2048,
464 );
465 let mask = 0xFF_FF_00_00;
466 let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
467 let reference_result = _mm512_set_epi16(
468 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
469 0, 0, 0, 0, 0,
470 );
471 assert_eq_m512i(actual_result, reference_result);
472 }
473
474 #[simd_test(enable = "avx512bitalg,avx512f")]
475 unsafe fn test_mm512_mask_popcnt_epi16() {
476 let test_data = _mm512_set_epi16(
477 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
478 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
479 1024, 2048,
480 );
481 let mask = 0xFF_FF_00_00;
482 let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
483 let reference_result = _mm512_set_epi16(
484 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
485 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
486 );
487 assert_eq_m512i(actual_result, reference_result);
488 }
489
490 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
491 unsafe fn test_mm256_popcnt_epi16() {
492 let test_data = _mm256_set_epi16(
493 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
494 0x3F_FF, 0x7F_FF,
495 );
496 let actual_result = _mm256_popcnt_epi16(test_data);
497 let reference_result =
498 _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
499 assert_eq_m256i(actual_result, reference_result);
500 }
501
502 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
503 unsafe fn test_mm256_maskz_popcnt_epi16() {
504 let test_data = _mm256_set_epi16(
505 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
506 0x3F_FF, 0x7F_FF,
507 );
508 let mask = 0xFF_00;
509 let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
510 let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
511 assert_eq_m256i(actual_result, reference_result);
512 }
513
514 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
515 unsafe fn test_mm256_mask_popcnt_epi16() {
516 let test_data = _mm256_set_epi16(
517 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
518 0x3F_FF, 0x7F_FF,
519 );
520 let mask = 0xFF_00;
521 let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
522 let reference_result = _mm256_set_epi16(
523 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
524 );
525 assert_eq_m256i(actual_result, reference_result);
526 }
527
528 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
529 unsafe fn test_mm_popcnt_epi16() {
530 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
531 let actual_result = _mm_popcnt_epi16(test_data);
532 let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
533 assert_eq_m128i(actual_result, reference_result);
534 }
535
536 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
537 unsafe fn test_mm_maskz_popcnt_epi16() {
538 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
539 let mask = 0xF0;
540 let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
541 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
542 assert_eq_m128i(actual_result, reference_result);
543 }
544
545 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
546 unsafe fn test_mm_mask_popcnt_epi16() {
547 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
548 let mask = 0xF0;
549 let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
550 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
551 assert_eq_m128i(actual_result, reference_result);
552 }
553
554 #[simd_test(enable = "avx512bitalg,avx512f")]
555 unsafe fn test_mm512_popcnt_epi8() {
556 let test_data = _mm512_set_epi8(
557 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
558 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
559 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
560 225, 21, 249, 211, 155, 228, 70,
561 );
562 let actual_result = _mm512_popcnt_epi8(test_data);
563 let reference_result = _mm512_set_epi8(
564 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
565 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
566 3, 6, 5, 5, 4, 3,
567 );
568 assert_eq_m512i(actual_result, reference_result);
569 }
570
571 #[simd_test(enable = "avx512bitalg,avx512f")]
572 unsafe fn test_mm512_maskz_popcnt_epi8() {
573 let test_data = _mm512_set_epi8(
574 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
575 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
576 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
577 225, 21, 249, 211, 155, 228, 70,
578 );
579 let mask = 0xFF_FF_FF_FF_00_00_00_00;
580 let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
581 let reference_result = _mm512_set_epi8(
582 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
583 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0,
585 );
586 assert_eq_m512i(actual_result, reference_result);
587 }
588
589 #[simd_test(enable = "avx512bitalg,avx512f")]
590 unsafe fn test_mm512_mask_popcnt_epi8() {
591 let test_data = _mm512_set_epi8(
592 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
593 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
594 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
595 225, 21, 249, 211, 155, 228, 70,
596 );
597 let mask = 0xFF_FF_FF_FF_00_00_00_00;
598 let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
599 let reference_result = _mm512_set_epi8(
600 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
601 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
602 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
603 );
604 assert_eq_m512i(actual_result, reference_result);
605 }
606
607 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
608 unsafe fn test_mm256_popcnt_epi8() {
609 let test_data = _mm256_set_epi8(
610 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
611 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
612 );
613 let actual_result = _mm256_popcnt_epi8(test_data);
614 let reference_result = _mm256_set_epi8(
615 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
616 2, 4, 4,
617 );
618 assert_eq_m256i(actual_result, reference_result);
619 }
620
621 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
622 unsafe fn test_mm256_maskz_popcnt_epi8() {
623 let test_data = _mm256_set_epi8(
624 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
625 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
626 );
627 let mask = 0xFF_FF_00_00;
628 let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
629 let reference_result = _mm256_set_epi8(
630 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631 0, 0, 0,
632 );
633 assert_eq_m256i(actual_result, reference_result);
634 }
635
636 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
637 unsafe fn test_mm256_mask_popcnt_epi8() {
638 let test_data = _mm256_set_epi8(
639 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
640 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
641 );
642 let mask = 0xFF_FF_00_00;
643 let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
644 let reference_result = _mm256_set_epi8(
645 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
646 90, 225, 21, 249, 211, 155, 228, 70,
647 );
648 assert_eq_m256i(actual_result, reference_result);
649 }
650
651 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
652 unsafe fn test_mm_popcnt_epi8() {
653 let test_data = _mm_set_epi8(
654 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
655 );
656 let actual_result = _mm_popcnt_epi8(test_data);
657 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
658 assert_eq_m128i(actual_result, reference_result);
659 }
660
661 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
662 unsafe fn test_mm_maskz_popcnt_epi8() {
663 let test_data = _mm_set_epi8(
664 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
665 );
666 let mask = 0xFF_00;
667 let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
668 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
669 assert_eq_m128i(actual_result, reference_result);
670 }
671
672 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
673 unsafe fn test_mm_mask_popcnt_epi8() {
674 let test_data = _mm_set_epi8(
675 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
676 );
677 let mask = 0xFF_00;
678 let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
679 let reference_result =
680 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
681 assert_eq_m128i(actual_result, reference_result);
682 }
683
684 #[simd_test(enable = "avx512bitalg,avx512f")]
685 unsafe fn test_mm512_bitshuffle_epi64_mask() {
686 let test_indices = _mm512_set_epi8(
687 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
688 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
689 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
690 );
691 let test_data = _mm512_setr_epi64(
692 0xFF_FF_FF_FF_00_00_00_00,
693 0xFF_00_FF_00_FF_00_FF_00,
694 0xFF_00_00_00_00_00_00_00,
695 0xAC_00_00_00_00_00_00_00,
696 0xFF_FF_FF_FF_00_00_00_00,
697 0xFF_00_FF_00_FF_00_FF_00,
698 0xFF_00_00_00_00_00_00_00,
699 0xAC_00_00_00_00_00_00_00,
700 );
701 let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
702 let reference_result = 0xF0 << 0
703 | 0x03 << 8
704 | 0xFF << 16
705 | 0xAC << 24
706 | 0xF0 << 32
707 | 0x03 << 40
708 | 0xFF << 48
709 | 0xAC << 56;
710
711 assert_eq!(actual_result, reference_result);
712 }
713
714 #[simd_test(enable = "avx512bitalg,avx512f")]
715 unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
716 let test_indices = _mm512_set_epi8(
717 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
718 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
719 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
720 );
721 let test_data = _mm512_setr_epi64(
722 0xFF_FF_FF_FF_00_00_00_00,
723 0xFF_00_FF_00_FF_00_FF_00,
724 0xFF_00_00_00_00_00_00_00,
725 0xAC_00_00_00_00_00_00_00,
726 0xFF_FF_FF_FF_00_00_00_00,
727 0xFF_00_FF_00_FF_00_FF_00,
728 0xFF_00_00_00_00_00_00_00,
729 0xAC_00_00_00_00_00_00_00,
730 );
731 let mask = 0xFF_FF_FF_FF_00_00_00_00;
732 let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
733 let reference_result = 0x00 << 0
734 | 0x00 << 8
735 | 0x00 << 16
736 | 0x00 << 24
737 | 0xF0 << 32
738 | 0x03 << 40
739 | 0xFF << 48
740 | 0xAC << 56;
741
742 assert_eq!(actual_result, reference_result);
743 }
744
745 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
746 unsafe fn test_mm256_bitshuffle_epi64_mask() {
747 let test_indices = _mm256_set_epi8(
748 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
749 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
750 );
751 let test_data = _mm256_setr_epi64x(
752 0xFF_FF_FF_FF_00_00_00_00,
753 0xFF_00_FF_00_FF_00_FF_00,
754 0xFF_00_00_00_00_00_00_00,
755 0xAC_00_00_00_00_00_00_00,
756 );
757 let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
758 let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
759
760 assert_eq!(actual_result, reference_result);
761 }
762
763 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
764 unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
765 let test_indices = _mm256_set_epi8(
766 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
767 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
768 );
769 let test_data = _mm256_setr_epi64x(
770 0xFF_FF_FF_FF_00_00_00_00,
771 0xFF_00_FF_00_FF_00_FF_00,
772 0xFF_00_00_00_00_00_00_00,
773 0xAC_00_00_00_00_00_00_00,
774 );
775 let mask = 0xFF_FF_00_00;
776 let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
777 let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
778
779 assert_eq!(actual_result, reference_result);
780 }
781
782 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
783 unsafe fn test_mm_bitshuffle_epi64_mask() {
784 let test_indices = _mm_set_epi8(
785 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
786 );
787 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
788 let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
789 let reference_result = 0xFF << 0 | 0xAC << 8;
790
791 assert_eq!(actual_result, reference_result);
792 }
793
794 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
795 unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
796 let test_indices = _mm_set_epi8(
797 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
798 );
799 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
800 let mask = 0xFF_00;
801 let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
802 let reference_result = 0x00 << 0 | 0xAC << 8;
803
804 assert_eq!(actual_result, reference_result);
805 }
806}