1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19 unsafe { vpmadd52huq_512(a, b, c) }
20}
21
22#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
35 unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) }
36}
37
38#[inline]
47#[target_feature(enable = "avx512ifma")]
48#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
49#[cfg_attr(test, assert_instr(vpmadd52huq))]
50pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
51 unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) }
52}
53
54#[inline]
62#[target_feature(enable = "avx512ifma")]
63#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
64#[cfg_attr(test, assert_instr(vpmadd52luq))]
65pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
66 unsafe { vpmadd52luq_512(a, b, c) }
67}
68
69#[inline]
78#[target_feature(enable = "avx512ifma")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpmadd52luq))]
81pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
82 unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) }
83}
84
85#[inline]
94#[target_feature(enable = "avx512ifma")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpmadd52luq))]
97pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
98 unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) }
99}
100
101#[inline]
109#[target_feature(enable = "avxifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(test, assert_instr(vpmadd52huq))]
112pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
113 unsafe { vpmadd52huq_256(a, b, c) }
114}
115
116#[inline]
124#[target_feature(enable = "avx512ifma,avx512vl")]
125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
126#[cfg_attr(test, assert_instr(vpmadd52huq))]
127pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
128 unsafe { vpmadd52huq_256(a, b, c) }
129}
130
131#[inline]
140#[target_feature(enable = "avx512ifma,avx512vl")]
141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142#[cfg_attr(test, assert_instr(vpmadd52huq))]
143pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
144 unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) }
145}
146
147#[inline]
156#[target_feature(enable = "avx512ifma,avx512vl")]
157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
158#[cfg_attr(test, assert_instr(vpmadd52huq))]
159pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
160 unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) }
161}
162
163#[inline]
171#[target_feature(enable = "avxifma")]
172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
173#[cfg_attr(test, assert_instr(vpmadd52luq))]
174pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
175 unsafe { vpmadd52luq_256(a, b, c) }
176}
177
178#[inline]
186#[target_feature(enable = "avx512ifma,avx512vl")]
187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
188#[cfg_attr(test, assert_instr(vpmadd52luq))]
189pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
190 unsafe { vpmadd52luq_256(a, b, c) }
191}
192
193#[inline]
202#[target_feature(enable = "avx512ifma,avx512vl")]
203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
204#[cfg_attr(test, assert_instr(vpmadd52luq))]
205pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
206 unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) }
207}
208
209#[inline]
218#[target_feature(enable = "avx512ifma,avx512vl")]
219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
220#[cfg_attr(test, assert_instr(vpmadd52luq))]
221pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
222 unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) }
223}
224
225#[inline]
233#[target_feature(enable = "avxifma")]
234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
235#[cfg_attr(test, assert_instr(vpmadd52huq))]
236pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
237 unsafe { vpmadd52huq_128(a, b, c) }
238}
239
240#[inline]
248#[target_feature(enable = "avx512ifma,avx512vl")]
249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
250#[cfg_attr(test, assert_instr(vpmadd52huq))]
251pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
252 unsafe { vpmadd52huq_128(a, b, c) }
253}
254
255#[inline]
264#[target_feature(enable = "avx512ifma,avx512vl")]
265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
266#[cfg_attr(test, assert_instr(vpmadd52huq))]
267pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
268 unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) }
269}
270
271#[inline]
280#[target_feature(enable = "avx512ifma,avx512vl")]
281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
282#[cfg_attr(test, assert_instr(vpmadd52huq))]
283pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
284 unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) }
285}
286
287#[inline]
295#[target_feature(enable = "avxifma")]
296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
297#[cfg_attr(test, assert_instr(vpmadd52luq))]
298pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
299 unsafe { vpmadd52luq_128(a, b, c) }
300}
301
302#[inline]
310#[target_feature(enable = "avx512ifma,avx512vl")]
311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
312#[cfg_attr(test, assert_instr(vpmadd52luq))]
313pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
314 unsafe { vpmadd52luq_128(a, b, c) }
315}
316
317#[inline]
326#[target_feature(enable = "avx512ifma,avx512vl")]
327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
328#[cfg_attr(test, assert_instr(vpmadd52luq))]
329pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
330 unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) }
331}
332
333#[inline]
342#[target_feature(enable = "avx512ifma,avx512vl")]
343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
344#[cfg_attr(test, assert_instr(vpmadd52luq))]
345pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
346 unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) }
347}
348
349#[allow(improper_ctypes)]
350unsafe extern "C" {
351 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
352 fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
353 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
354 fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
355 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
356 fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
357 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
358 fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
359 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
360 fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
361 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
362 fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
363}
364
365#[cfg(test)]
366mod tests {
367
368 use stdarch_test::simd_test;
369
370 use crate::core_arch::x86::*;
371
372 const K: __mmask8 = 0b01101101;
373
374 #[simd_test(enable = "avx512ifma")]
375 unsafe fn test_mm512_madd52hi_epu64() {
376 let a = _mm512_set1_epi64(10 << 40);
377 let b = _mm512_set1_epi64((11 << 40) + 4);
378 let c = _mm512_set1_epi64((12 << 40) + 3);
379
380 let actual = _mm512_madd52hi_epu64(a, b, c);
381
382 let expected = _mm512_set1_epi64(11030549757952);
384
385 assert_eq_m512i(expected, actual);
386 }
387
388 #[simd_test(enable = "avx512ifma")]
389 unsafe fn test_mm512_mask_madd52hi_epu64() {
390 let a = _mm512_set1_epi64(10 << 40);
391 let b = _mm512_set1_epi64((11 << 40) + 4);
392 let c = _mm512_set1_epi64((12 << 40) + 3);
393
394 let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
395
396 let mut expected = _mm512_set1_epi64(11030549757952);
398 expected = _mm512_mask_blend_epi64(K, a, expected);
399
400 assert_eq_m512i(expected, actual);
401 }
402
403 #[simd_test(enable = "avx512ifma")]
404 unsafe fn test_mm512_maskz_madd52hi_epu64() {
405 let a = _mm512_set1_epi64(10 << 40);
406 let b = _mm512_set1_epi64((11 << 40) + 4);
407 let c = _mm512_set1_epi64((12 << 40) + 3);
408
409 let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
410
411 let mut expected = _mm512_set1_epi64(11030549757952);
413 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
414
415 assert_eq_m512i(expected, actual);
416 }
417
418 #[simd_test(enable = "avx512ifma")]
419 unsafe fn test_mm512_madd52lo_epu64() {
420 let a = _mm512_set1_epi64(10 << 40);
421 let b = _mm512_set1_epi64((11 << 40) + 4);
422 let c = _mm512_set1_epi64((12 << 40) + 3);
423
424 let actual = _mm512_madd52lo_epu64(a, b, c);
425
426 let expected = _mm512_set1_epi64(100055558127628);
428
429 assert_eq_m512i(expected, actual);
430 }
431
432 #[simd_test(enable = "avx512ifma")]
433 unsafe fn test_mm512_mask_madd52lo_epu64() {
434 let a = _mm512_set1_epi64(10 << 40);
435 let b = _mm512_set1_epi64((11 << 40) + 4);
436 let c = _mm512_set1_epi64((12 << 40) + 3);
437
438 let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
439
440 let mut expected = _mm512_set1_epi64(100055558127628);
442 expected = _mm512_mask_blend_epi64(K, a, expected);
443
444 assert_eq_m512i(expected, actual);
445 }
446
447 #[simd_test(enable = "avx512ifma")]
448 unsafe fn test_mm512_maskz_madd52lo_epu64() {
449 let a = _mm512_set1_epi64(10 << 40);
450 let b = _mm512_set1_epi64((11 << 40) + 4);
451 let c = _mm512_set1_epi64((12 << 40) + 3);
452
453 let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
454
455 let mut expected = _mm512_set1_epi64(100055558127628);
457 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
458
459 assert_eq_m512i(expected, actual);
460 }
461
462 #[simd_test(enable = "avxifma")]
463 unsafe fn test_mm256_madd52hi_avx_epu64() {
464 let a = _mm256_set1_epi64x(10 << 40);
465 let b = _mm256_set1_epi64x((11 << 40) + 4);
466 let c = _mm256_set1_epi64x((12 << 40) + 3);
467
468 let actual = _mm256_madd52hi_avx_epu64(a, b, c);
469
470 let expected = _mm256_set1_epi64x(11030549757952);
472
473 assert_eq_m256i(expected, actual);
474 }
475
476 #[simd_test(enable = "avx512ifma,avx512vl")]
477 unsafe fn test_mm256_madd52hi_epu64() {
478 let a = _mm256_set1_epi64x(10 << 40);
479 let b = _mm256_set1_epi64x((11 << 40) + 4);
480 let c = _mm256_set1_epi64x((12 << 40) + 3);
481
482 let actual = _mm256_madd52hi_epu64(a, b, c);
483
484 let expected = _mm256_set1_epi64x(11030549757952);
486
487 assert_eq_m256i(expected, actual);
488 }
489
490 #[simd_test(enable = "avx512ifma,avx512vl")]
491 unsafe fn test_mm256_mask_madd52hi_epu64() {
492 let a = _mm256_set1_epi64x(10 << 40);
493 let b = _mm256_set1_epi64x((11 << 40) + 4);
494 let c = _mm256_set1_epi64x((12 << 40) + 3);
495
496 let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
497
498 let mut expected = _mm256_set1_epi64x(11030549757952);
500 expected = _mm256_mask_blend_epi64(K, a, expected);
501
502 assert_eq_m256i(expected, actual);
503 }
504
505 #[simd_test(enable = "avx512ifma,avx512vl")]
506 unsafe fn test_mm256_maskz_madd52hi_epu64() {
507 let a = _mm256_set1_epi64x(10 << 40);
508 let b = _mm256_set1_epi64x((11 << 40) + 4);
509 let c = _mm256_set1_epi64x((12 << 40) + 3);
510
511 let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
512
513 let mut expected = _mm256_set1_epi64x(11030549757952);
515 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
516
517 assert_eq_m256i(expected, actual);
518 }
519
520 #[simd_test(enable = "avxifma")]
521 unsafe fn test_mm256_madd52lo_avx_epu64() {
522 let a = _mm256_set1_epi64x(10 << 40);
523 let b = _mm256_set1_epi64x((11 << 40) + 4);
524 let c = _mm256_set1_epi64x((12 << 40) + 3);
525
526 let actual = _mm256_madd52lo_avx_epu64(a, b, c);
527
528 let expected = _mm256_set1_epi64x(100055558127628);
530
531 assert_eq_m256i(expected, actual);
532 }
533
534 #[simd_test(enable = "avx512ifma,avx512vl")]
535 unsafe fn test_mm256_madd52lo_epu64() {
536 let a = _mm256_set1_epi64x(10 << 40);
537 let b = _mm256_set1_epi64x((11 << 40) + 4);
538 let c = _mm256_set1_epi64x((12 << 40) + 3);
539
540 let actual = _mm256_madd52lo_epu64(a, b, c);
541
542 let expected = _mm256_set1_epi64x(100055558127628);
544
545 assert_eq_m256i(expected, actual);
546 }
547
548 #[simd_test(enable = "avx512ifma,avx512vl")]
549 unsafe fn test_mm256_mask_madd52lo_epu64() {
550 let a = _mm256_set1_epi64x(10 << 40);
551 let b = _mm256_set1_epi64x((11 << 40) + 4);
552 let c = _mm256_set1_epi64x((12 << 40) + 3);
553
554 let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
555
556 let mut expected = _mm256_set1_epi64x(100055558127628);
558 expected = _mm256_mask_blend_epi64(K, a, expected);
559
560 assert_eq_m256i(expected, actual);
561 }
562
563 #[simd_test(enable = "avx512ifma,avx512vl")]
564 unsafe fn test_mm256_maskz_madd52lo_epu64() {
565 let a = _mm256_set1_epi64x(10 << 40);
566 let b = _mm256_set1_epi64x((11 << 40) + 4);
567 let c = _mm256_set1_epi64x((12 << 40) + 3);
568
569 let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
570
571 let mut expected = _mm256_set1_epi64x(100055558127628);
573 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
574
575 assert_eq_m256i(expected, actual);
576 }
577
578 #[simd_test(enable = "avxifma")]
579 unsafe fn test_mm_madd52hi_avx_epu64() {
580 let a = _mm_set1_epi64x(10 << 40);
581 let b = _mm_set1_epi64x((11 << 40) + 4);
582 let c = _mm_set1_epi64x((12 << 40) + 3);
583
584 let actual = _mm_madd52hi_avx_epu64(a, b, c);
585
586 let expected = _mm_set1_epi64x(11030549757952);
588
589 assert_eq_m128i(expected, actual);
590 }
591
592 #[simd_test(enable = "avx512ifma,avx512vl")]
593 unsafe fn test_mm_madd52hi_epu64() {
594 let a = _mm_set1_epi64x(10 << 40);
595 let b = _mm_set1_epi64x((11 << 40) + 4);
596 let c = _mm_set1_epi64x((12 << 40) + 3);
597
598 let actual = _mm_madd52hi_epu64(a, b, c);
599
600 let expected = _mm_set1_epi64x(11030549757952);
602
603 assert_eq_m128i(expected, actual);
604 }
605
606 #[simd_test(enable = "avx512ifma,avx512vl")]
607 unsafe fn test_mm_mask_madd52hi_epu64() {
608 let a = _mm_set1_epi64x(10 << 40);
609 let b = _mm_set1_epi64x((11 << 40) + 4);
610 let c = _mm_set1_epi64x((12 << 40) + 3);
611
612 let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
613
614 let mut expected = _mm_set1_epi64x(11030549757952);
616 expected = _mm_mask_blend_epi64(K, a, expected);
617
618 assert_eq_m128i(expected, actual);
619 }
620
621 #[simd_test(enable = "avx512ifma,avx512vl")]
622 unsafe fn test_mm_maskz_madd52hi_epu64() {
623 let a = _mm_set1_epi64x(10 << 40);
624 let b = _mm_set1_epi64x((11 << 40) + 4);
625 let c = _mm_set1_epi64x((12 << 40) + 3);
626
627 let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
628
629 let mut expected = _mm_set1_epi64x(11030549757952);
631 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
632
633 assert_eq_m128i(expected, actual);
634 }
635
636 #[simd_test(enable = "avxifma")]
637 unsafe fn test_mm_madd52lo_avx_epu64() {
638 let a = _mm_set1_epi64x(10 << 40);
639 let b = _mm_set1_epi64x((11 << 40) + 4);
640 let c = _mm_set1_epi64x((12 << 40) + 3);
641
642 let actual = _mm_madd52lo_avx_epu64(a, b, c);
643
644 let expected = _mm_set1_epi64x(100055558127628);
646
647 assert_eq_m128i(expected, actual);
648 }
649
650 #[simd_test(enable = "avx512ifma,avx512vl")]
651 unsafe fn test_mm_madd52lo_epu64() {
652 let a = _mm_set1_epi64x(10 << 40);
653 let b = _mm_set1_epi64x((11 << 40) + 4);
654 let c = _mm_set1_epi64x((12 << 40) + 3);
655
656 let actual = _mm_madd52lo_epu64(a, b, c);
657
658 let expected = _mm_set1_epi64x(100055558127628);
660
661 assert_eq_m128i(expected, actual);
662 }
663
664 #[simd_test(enable = "avx512ifma,avx512vl")]
665 unsafe fn test_mm_mask_madd52lo_epu64() {
666 let a = _mm_set1_epi64x(10 << 40);
667 let b = _mm_set1_epi64x((11 << 40) + 4);
668 let c = _mm_set1_epi64x((12 << 40) + 3);
669
670 let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
671
672 let mut expected = _mm_set1_epi64x(100055558127628);
674 expected = _mm_mask_blend_epi64(K, a, expected);
675
676 assert_eq_m128i(expected, actual);
677 }
678
679 #[simd_test(enable = "avx512ifma,avx512vl")]
680 unsafe fn test_mm_maskz_madd52lo_epu64() {
681 let a = _mm_set1_epi64x(10 << 40);
682 let b = _mm_set1_epi64x((11 << 40) + 4);
683 let c = _mm_set1_epi64x((12 << 40) + 3);
684
685 let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
686
687 let mut expected = _mm_set1_epi64x(100055558127628);
689 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
690
691 assert_eq_m128i(expected, actual);
692 }
693}