1use crate::arch::asm;
6use crate::core_arch::{simd::*, x86::*};
7use crate::intrinsics::simd::*;
8
9#[cfg(test)]
10use stdarch_test::assert_instr;
11
12#[allow(improper_ctypes)]
13unsafe extern "C" {
14 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.128"]
15 fn cvtne2ps2bf16(a: f32x4, b: f32x4) -> i16x8;
16 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.256"]
17 fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16;
18 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"]
19 fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32;
20 #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"]
21 fn cvtneps2bf16_256(a: f32x8) -> i16x8;
22 #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
23 fn cvtneps2bf16_512(a: f32x16) -> i16x16;
24 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.128"]
25 fn dpbf16ps(a: f32x4, b: i32x4, c: i32x4) -> f32x4;
26 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.256"]
27 fn dpbf16ps_256(a: f32x8, b: i32x8, c: i32x8) -> f32x8;
28 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.512"]
29 fn dpbf16ps_512(a: f32x16, b: i32x16, c: i32x16) -> f32x16;
30}
31
32#[inline]
37#[target_feature(enable = "avx512bf16,avx512vl")]
38#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
40pub fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
41 unsafe { transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4())) }
42}
43
44#[inline]
50#[target_feature(enable = "avx512bf16,avx512vl")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
53pub fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh {
54 unsafe {
55 let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
56 transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
57 }
58}
59
60#[inline]
66#[target_feature(enable = "avx512bf16,avx512vl")]
67#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
68#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
69pub fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh {
70 unsafe {
71 let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
72 transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
73 }
74}
75
76#[inline]
81#[target_feature(enable = "avx512bf16,avx512vl")]
82#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
83#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
84pub fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
85 unsafe { transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8())) }
86}
87
88#[inline]
93#[target_feature(enable = "avx512bf16,avx512vl")]
94#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
95#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
96pub fn _mm256_mask_cvtne2ps_pbh(src: __m256bh, k: __mmask16, a: __m256, b: __m256) -> __m256bh {
97 unsafe {
98 let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
99 transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
100 }
101}
102
103#[inline]
108#[target_feature(enable = "avx512bf16,avx512vl")]
109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
110#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
111pub fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh {
112 unsafe {
113 let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
114 transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
115 }
116}
117
118#[inline]
123#[target_feature(enable = "avx512bf16,avx512f")]
124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
125#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
126pub fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
127 unsafe { transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16())) }
128}
129
130#[inline]
136#[target_feature(enable = "avx512bf16,avx512f")]
137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
138#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
139pub fn _mm512_mask_cvtne2ps_pbh(src: __m512bh, k: __mmask32, a: __m512, b: __m512) -> __m512bh {
140 unsafe {
141 let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
142 transmute(simd_select_bitmask(k, cvt, src.as_u16x32()))
143 }
144}
145
146#[inline]
152#[target_feature(enable = "avx512bf16,avx512f")]
153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
154#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
155pub fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh {
156 unsafe {
157 let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
158 transmute(simd_select_bitmask(k, cvt, u16x32::ZERO))
159 }
160}
161
162#[inline]
166#[target_feature(enable = "avx512bf16,avx512vl")]
167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
168#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
169pub fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
170 unsafe { transmute(cvtneps2bf16_256(a.as_f32x8())) }
171}
172
173#[inline]
178#[target_feature(enable = "avx512bf16,avx512vl")]
179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
181pub fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh {
182 unsafe {
183 let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
184 transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
185 }
186}
187
188#[inline]
193#[target_feature(enable = "avx512bf16,avx512vl")]
194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
195#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
196pub fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
197 unsafe {
198 let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
199 transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
200 }
201}
202
203#[inline]
207#[target_feature(enable = "avx512bf16,avx512f")]
208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
209#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
210pub fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
211 unsafe { transmute(cvtneps2bf16_512(a.as_f32x16())) }
212}
213
214#[inline]
219#[target_feature(enable = "avx512bf16,avx512f")]
220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
221#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
222pub fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh {
223 unsafe {
224 let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
225 transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
226 }
227}
228
229#[inline]
234#[target_feature(enable = "avx512bf16,avx512f")]
235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
236#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
237pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
238 unsafe {
239 let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
240 transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
241 }
242}
243
244#[inline]
249#[target_feature(enable = "avx512bf16,avx512vl")]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251#[cfg_attr(test, assert_instr("vdpbf16ps"))]
252pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
253 unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) }
254}
255
256#[inline]
262#[target_feature(enable = "avx512bf16,avx512vl")]
263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
264#[cfg_attr(test, assert_instr("vdpbf16ps"))]
265pub fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 {
266 unsafe {
267 let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
268 transmute(simd_select_bitmask(k, rst, src.as_f32x4()))
269 }
270}
271
272#[inline]
278#[target_feature(enable = "avx512bf16,avx512vl")]
279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
280#[cfg_attr(test, assert_instr("vdpbf16ps"))]
281pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
282 unsafe {
283 let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
284 let zero = _mm_set1_ps(0.0_f32).as_f32x4();
285 transmute(simd_select_bitmask(k, rst, zero))
286 }
287}
288
289#[inline]
294#[target_feature(enable = "avx512bf16,avx512vl")]
295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
296#[cfg_attr(test, assert_instr("vdpbf16ps"))]
297pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
298 unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) }
299}
300
301#[inline]
307#[target_feature(enable = "avx512bf16,avx512vl")]
308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309#[cfg_attr(test, assert_instr("vdpbf16ps"))]
310pub fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 {
311 unsafe {
312 let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
313 transmute(simd_select_bitmask(k, rst, src.as_f32x8()))
314 }
315}
316
317#[inline]
323#[target_feature(enable = "avx512bf16,avx512vl")]
324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
325#[cfg_attr(test, assert_instr("vdpbf16ps"))]
326pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
327 unsafe {
328 let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
329 transmute(simd_select_bitmask(k, rst, f32x8::ZERO))
330 }
331}
332
333#[inline]
340#[target_feature(enable = "avx512bf16,avx512f")]
341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
342#[cfg_attr(test, assert_instr("vdpbf16ps"))]
343pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
344 unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) }
345}
346
347#[inline]
353#[target_feature(enable = "avx512bf16,avx512f")]
354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
355#[cfg_attr(test, assert_instr("vdpbf16ps"))]
356pub fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 {
357 unsafe {
358 let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
359 transmute(simd_select_bitmask(k, rst, src.as_f32x16()))
360 }
361}
362
363#[inline]
369#[target_feature(enable = "avx512bf16,avx512f")]
370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
371#[cfg_attr(test, assert_instr("vdpbf16ps"))]
372pub fn _mm512_maskz_dpbf16_ps(k: __mmask16, src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
373 unsafe {
374 let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
375 transmute(simd_select_bitmask(k, rst, f32x16::ZERO))
376 }
377}
378
379#[inline]
384#[target_feature(enable = "avx512bf16,avx512f")]
385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
386pub fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
387 unsafe { _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a)))) }
388}
389
390#[inline]
396#[target_feature(enable = "avx512bf16,avx512f")]
397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
398pub fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 {
399 unsafe {
400 let cvt = _mm512_cvtpbh_ps(a);
401 transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16()))
402 }
403}
404
405#[inline]
411#[target_feature(enable = "avx512bf16,avx512f")]
412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
413pub fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
414 unsafe {
415 let cvt = _mm512_cvtpbh_ps(a);
416 transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO))
417 }
418}
419
420#[inline]
425#[target_feature(enable = "avx512bf16,avx512vl")]
426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
427pub fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
428 unsafe { _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a)))) }
429}
430
431#[inline]
437#[target_feature(enable = "avx512bf16,avx512vl")]
438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
439pub fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 {
440 unsafe {
441 let cvt = _mm256_cvtpbh_ps(a);
442 transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8()))
443 }
444}
445
446#[inline]
452#[target_feature(enable = "avx512bf16,avx512vl")]
453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
454pub fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
455 unsafe {
456 let cvt = _mm256_cvtpbh_ps(a);
457 transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO))
458 }
459}
460
461#[inline]
466#[target_feature(enable = "avx512bf16,avx512vl")]
467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
468pub fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
469 unsafe { _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a)))) }
470}
471
472#[inline]
478#[target_feature(enable = "avx512bf16,avx512vl")]
479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
480pub fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 {
481 unsafe {
482 let cvt = _mm_cvtpbh_ps(a);
483 transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4()))
484 }
485}
486
487#[inline]
493#[target_feature(enable = "avx512bf16,avx512vl")]
494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
495pub fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
496 unsafe {
497 let cvt = _mm_cvtpbh_ps(a);
498 transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO))
499 }
500}
501
502#[inline]
507#[target_feature(enable = "avx512bf16,avx512f")]
508#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
509pub fn _mm_cvtsbh_ss(a: bf16) -> f32 {
510 f32::from_bits((a.to_bits() as u32) << 16)
511}
512
513#[inline]
518#[target_feature(enable = "avx512bf16,avx512vl")]
519#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
521pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
522 unsafe {
523 let mut dst: __m128bh;
524 asm!(
525 "vcvtneps2bf16 {dst}, {src}",
526 dst = lateout(xmm_reg) dst,
527 src = in(xmm_reg) a,
528 options(pure, nomem, nostack, preserves_flags)
529 );
530 dst
531 }
532}
533
534#[inline]
540#[target_feature(enable = "avx512bf16,avx512vl")]
541#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
543pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
544 unsafe {
545 let mut dst = src;
546 asm!(
547 "vcvtneps2bf16 {dst}{{{k}}},{src}",
548 dst = inlateout(xmm_reg) dst,
549 src = in(xmm_reg) a,
550 k = in(kreg) k,
551 options(pure, nomem, nostack, preserves_flags)
552 );
553 dst
554 }
555}
556
557#[inline]
563#[target_feature(enable = "avx512bf16,avx512vl")]
564#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
566pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
567 unsafe {
568 let mut dst: __m128bh;
569 asm!(
570 "vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
571 dst = lateout(xmm_reg) dst,
572 src = in(xmm_reg) a,
573 k = in(kreg) k,
574 options(pure, nomem, nostack, preserves_flags)
575 );
576 dst
577 }
578}
579
580#[inline]
585#[target_feature(enable = "avx512bf16,avx512vl")]
586#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
587pub fn _mm_cvtness_sbh(a: f32) -> bf16 {
588 unsafe {
589 let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0);
590 bf16::from_bits(value)
591 }
592}
593
594#[cfg(test)]
595mod tests {
596 use crate::core_arch::simd::u16x4;
597 use crate::{
598 core_arch::x86::*,
599 mem::{transmute, transmute_copy},
600 };
601 use stdarch_test::simd_test;
602
603 #[simd_test(enable = "avx512bf16,avx512vl")]
604 unsafe fn test_mm_cvtne2ps_pbh() {
605 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
606 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
607 let a: __m128 = transmute(a_array);
608 let b: __m128 = transmute(b_array);
609 let c: __m128bh = _mm_cvtne2ps_pbh(a, b);
610 let result: [u16; 8] = transmute(c.as_u16x8());
611 #[rustfmt::skip]
612 let expected_result: [u16; 8] = [
613 0b1_10000110_0110010,
614 0b1_10000010_0101000,
615 0b1_10000000_1110000,
616 0b1_10000100_1001001,
617 0b0_10000110_0110010,
618 0b0_10000010_0101000,
619 0b0_10000000_1110000,
620 0b0_10000100_1001001,
621 ];
622 assert_eq!(result, expected_result);
623 }
624
625 #[simd_test(enable = "avx512bf16,avx512vl")]
626 unsafe fn test_mm_mask_cvtne2ps_pbh() {
627 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
628 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
629 #[rustfmt::skip]
630 let src_array: [u16; 8] = [
631 0b0_10000110_0110010,
632 0b0_10000010_0101000,
633 0b0_10000000_1110000,
634 0b0_10000100_1001001,
635 0b0_10000110_0110010,
636 0b0_10000010_0101000,
637 0b0_10000000_1110000,
638 0b0_10000100_1001001,
639 ];
640 let src: __m128bh = transmute(src_array);
641 let a: __m128 = transmute(a_array);
642 let b: __m128 = transmute(b_array);
643 let k: __mmask8 = 0b1111_1111;
644 let c: __m128bh = _mm_mask_cvtne2ps_pbh(src, k, a, b);
645 let result: [u16; 8] = transmute(c.as_u16x8());
646 #[rustfmt::skip]
647 let expected_result: [u16; 8] = [
648 0b1_10000110_0110010,
649 0b1_10000010_0101000,
650 0b1_10000000_1110000,
651 0b1_10000100_1001001,
652 0b0_10000110_0110010,
653 0b0_10000010_0101000,
654 0b0_10000000_1110000,
655 0b0_10000100_1001001,
656 ];
657 assert_eq!(result, expected_result);
658 let k = 0b0000_0000;
659 let c = _mm_mask_cvtne2ps_pbh(src, k, a, b);
660 let result: [u16; 8] = transmute(c.as_u16x8());
661 let expected_result = src_array;
662 assert_eq!(result, expected_result);
663 }
664
665 #[simd_test(enable = "avx512bf16,avx512vl")]
666 unsafe fn test_mm_maskz_cvtne2ps_pbh() {
667 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
668 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
669 let a: __m128 = transmute(a_array);
670 let b: __m128 = transmute(b_array);
671 let k: __mmask8 = 0b1111_1111;
672 let c: __m128bh = _mm_maskz_cvtne2ps_pbh(k, a, b);
673 let result: [u16; 8] = transmute(c.as_u16x8());
674 #[rustfmt::skip]
675 let expected_result: [u16; 8] = [
676 0b1_10000110_0110010,
677 0b1_10000010_0101000,
678 0b1_10000000_1110000,
679 0b1_10000100_1001001,
680 0b0_10000110_0110010,
681 0b0_10000010_0101000,
682 0b0_10000000_1110000,
683 0b0_10000100_1001001,
684 ];
685 assert_eq!(result, expected_result);
686 let k = 0b0011_1100;
687 let c = _mm_maskz_cvtne2ps_pbh(k, a, b);
688 let result: [u16; 8] = transmute(c.as_u16x8());
689 #[rustfmt::skip]
690 let expected_result: [u16; 8] = [
691 0,
692 0,
693 0b1_10000000_1110000,
694 0b1_10000100_1001001,
695 0b0_10000110_0110010,
696 0b0_10000010_0101000,
697 0,
698 0,
699 ];
700 assert_eq!(result, expected_result);
701 }
702
703 #[simd_test(enable = "avx512bf16,avx512vl")]
704 unsafe fn test_mm256_cvtne2ps_pbh() {
705 #[rustfmt::skip]
706 let a_array = [
707 178.125_f32,
708 10.5_f32,
709 3.75_f32,
710 50.25_f32,
711 16.5_f32,
712 255.11_f32,
713 1000.158_f32,
714 575.575_f32,
715 ];
716 let b_array = [
717 -178.125_f32,
718 -10.5_f32,
719 -3.75_f32,
720 -50.25_f32,
721 -16.5_f32,
722 -255.11_f32,
723 -1000.158_f32,
724 -575.575_f32,
725 ];
726 let a: __m256 = transmute(a_array);
727 let b: __m256 = transmute(b_array);
728 let c: __m256bh = _mm256_cvtne2ps_pbh(a, b);
729 let result: [u16; 16] = transmute(c.as_u16x16());
730 #[rustfmt::skip]
731 let expected_result: [u16; 16] = [
732 0b1_10000110_0110010,
733 0b1_10000010_0101000,
734 0b1_10000000_1110000,
735 0b1_10000100_1001001,
736 0b1_10000011_0000100,
737 0b1_10000110_1111111,
738 0b1_10001000_1111010,
739 0b1_10001000_0010000,
740 0b0_10000110_0110010,
741 0b0_10000010_0101000,
742 0b0_10000000_1110000,
743 0b0_10000100_1001001,
744 0b0_10000011_0000100,
745 0b0_10000110_1111111,
746 0b0_10001000_1111010,
747 0b0_10001000_0010000,
748 ];
749 assert_eq!(result, expected_result);
750 }
751
752 #[simd_test(enable = "avx512bf16,avx512vl")]
753 unsafe fn test_mm256_mask_cvtne2ps_pbh() {
754 #[rustfmt::skip]
755 let a_array = [
756 178.125_f32,
757 10.5_f32,
758 3.75_f32,
759 50.25_f32,
760 16.5_f32,
761 255.11_f32,
762 1000.158_f32,
763 575.575_f32,
764 ];
765 let b_array = [
766 -178.125_f32,
767 -10.5_f32,
768 -3.75_f32,
769 -50.25_f32,
770 -16.5_f32,
771 -255.11_f32,
772 -1000.158_f32,
773 -575.575_f32,
774 ];
775 let src_array: [u16; 16] = [
776 0b0_10000110_0110010,
777 0b0_10000010_0101000,
778 0b0_10000000_1110000,
779 0b0_10000100_1001001,
780 0b0_10000110_0110010,
781 0b0_10000010_0101000,
782 0b0_10000000_1110000,
783 0b0_10000100_1001001,
784 0b0_10000110_0110010,
785 0b0_10000010_0101000,
786 0b0_10000000_1110000,
787 0b0_10000100_1001001,
788 0b0_10000110_0110010,
789 0b0_10000010_0101000,
790 0b0_10000000_1110000,
791 0b0_10000100_1001001,
792 ];
793 let src: __m256bh = transmute(src_array);
794 let a: __m256 = transmute(a_array);
795 let b: __m256 = transmute(b_array);
796 let k: __mmask16 = 0xffff;
797 let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
798 let result: [u16; 16] = transmute(c.as_u16x16());
799 #[rustfmt::skip]
800 let expected_result: [u16; 16] = [
801 0b1_10000110_0110010,
802 0b1_10000010_0101000,
803 0b1_10000000_1110000,
804 0b1_10000100_1001001,
805 0b1_10000011_0000100,
806 0b1_10000110_1111111,
807 0b1_10001000_1111010,
808 0b1_10001000_0010000,
809 0b0_10000110_0110010,
810 0b0_10000010_0101000,
811 0b0_10000000_1110000,
812 0b0_10000100_1001001,
813 0b0_10000011_0000100,
814 0b0_10000110_1111111,
815 0b0_10001000_1111010,
816 0b0_10001000_0010000,
817 ];
818 assert_eq!(result, expected_result);
819 let k: __mmask16 = 0;
820 let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
821 let result: [u16; 16] = transmute(c.as_u16x16());
822 let expected_result = src_array;
823 assert_eq!(result, expected_result);
824 }
825
826 #[simd_test(enable = "avx512bf16,avx512vl")]
827 unsafe fn test_mm256_maskz_cvtne2ps_pbh() {
828 #[rustfmt::skip]
829 let a_array = [
830 178.125_f32,
831 10.5_f32,
832 3.75_f32,
833 50.25_f32,
834 16.5_f32,
835 255.11_f32,
836 1000.158_f32,
837 575.575_f32,
838 ];
839 let b_array = [
840 -178.125_f32,
841 -10.5_f32,
842 -3.75_f32,
843 -50.25_f32,
844 -16.5_f32,
845 -255.11_f32,
846 -1000.158_f32,
847 -575.575_f32,
848 ];
849 let a: __m256 = transmute(a_array);
850 let b: __m256 = transmute(b_array);
851 let k: __mmask16 = 0xffff;
852 let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
853 let result: [u16; 16] = transmute(c.as_u16x16());
854 #[rustfmt::skip]
855 let expected_result: [u16; 16] = [
856 0b1_10000110_0110010,
857 0b1_10000010_0101000,
858 0b1_10000000_1110000,
859 0b1_10000100_1001001,
860 0b1_10000011_0000100,
861 0b1_10000110_1111111,
862 0b1_10001000_1111010,
863 0b1_10001000_0010000,
864 0b0_10000110_0110010,
865 0b0_10000010_0101000,
866 0b0_10000000_1110000,
867 0b0_10000100_1001001,
868 0b0_10000011_0000100,
869 0b0_10000110_1111111,
870 0b0_10001000_1111010,
871 0b0_10001000_0010000,
872 ];
873 assert_eq!(result, expected_result);
874 let k: __mmask16 = 0b0110_1100_0011_0110;
875 let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
876 let result: [u16; 16] = transmute(c.as_u16x16());
877 #[rustfmt::skip]
878 let expected_result: [u16; 16] = [
879 0,
880 0b1_10000010_0101000,
881 0b1_10000000_1110000,
882 0,
883 0b1_10000011_0000100,
884 0b1_10000110_1111111,
885 0,
886 0,
887 0,
888 0,
889 0b0_10000000_1110000,
890 0b0_10000100_1001001,
891 0,
892 0b0_10000110_1111111,
893 0b0_10001000_1111010,
894 0,
895 ];
896 assert_eq!(result, expected_result);
897 }
898
899 #[simd_test(enable = "avx512bf16,avx512f")]
900 unsafe fn test_mm512_cvtne2ps_pbh() {
901 #[rustfmt::skip]
902 let a_array = [
903 178.125_f32,
904 10.5_f32,
905 3.75_f32,
906 50.25_f32,
907 16.5_f32,
908 255.11_f32,
909 1000.158_f32,
910 575.575_f32,
911 178.125_f32,
912 10.5_f32,
913 3.75_f32,
914 50.25_f32,
915 16.5_f32,
916 255.11_f32,
917 1000.158_f32,
918 575.575_f32,
919 ];
920 let b_array = [
921 -178.125_f32,
922 -10.5_f32,
923 -3.75_f32,
924 -50.25_f32,
925 -16.5_f32,
926 -255.11_f32,
927 -1000.158_f32,
928 -575.575_f32,
929 -178.125_f32,
930 -10.5_f32,
931 -3.75_f32,
932 -50.25_f32,
933 -16.5_f32,
934 -255.11_f32,
935 -1000.158_f32,
936 -575.575_f32,
937 ];
938 let a: __m512 = transmute(a_array);
939 let b: __m512 = transmute(b_array);
940 let c: __m512bh = _mm512_cvtne2ps_pbh(a, b);
941 let result: [u16; 32] = transmute(c.as_u16x32());
942 #[rustfmt::skip]
943 let expected_result: [u16; 32] = [
944 0b1_10000110_0110010,
945 0b1_10000010_0101000,
946 0b1_10000000_1110000,
947 0b1_10000100_1001001,
948 0b1_10000011_0000100,
949 0b1_10000110_1111111,
950 0b1_10001000_1111010,
951 0b1_10001000_0010000,
952 0b1_10000110_0110010,
953 0b1_10000010_0101000,
954 0b1_10000000_1110000,
955 0b1_10000100_1001001,
956 0b1_10000011_0000100,
957 0b1_10000110_1111111,
958 0b1_10001000_1111010,
959 0b1_10001000_0010000,
960 0b0_10000110_0110010,
961 0b0_10000010_0101000,
962 0b0_10000000_1110000,
963 0b0_10000100_1001001,
964 0b0_10000011_0000100,
965 0b0_10000110_1111111,
966 0b0_10001000_1111010,
967 0b0_10001000_0010000,
968 0b0_10000110_0110010,
969 0b0_10000010_0101000,
970 0b0_10000000_1110000,
971 0b0_10000100_1001001,
972 0b0_10000011_0000100,
973 0b0_10000110_1111111,
974 0b0_10001000_1111010,
975 0b0_10001000_0010000,
976 ];
977 assert_eq!(result, expected_result);
978 }
979
980 #[simd_test(enable = "avx512bf16,avx512f")]
981 unsafe fn test_mm512_mask_cvtne2ps_pbh() {
982 #[rustfmt::skip]
983 let a_array = [
984 178.125_f32,
985 10.5_f32,
986 3.75_f32,
987 50.25_f32,
988 16.5_f32,
989 255.11_f32,
990 1000.158_f32,
991 575.575_f32,
992 178.125_f32,
993 10.5_f32,
994 3.75_f32,
995 50.25_f32,
996 16.5_f32,
997 255.11_f32,
998 1000.158_f32,
999 575.575_f32,
1000 ];
1001 let b_array = [
1002 -178.125_f32,
1003 -10.5_f32,
1004 -3.75_f32,
1005 -50.25_f32,
1006 -16.5_f32,
1007 -255.11_f32,
1008 -1000.158_f32,
1009 -575.575_f32,
1010 -178.125_f32,
1011 -10.5_f32,
1012 -3.75_f32,
1013 -50.25_f32,
1014 -16.5_f32,
1015 -255.11_f32,
1016 -1000.158_f32,
1017 -575.575_f32,
1018 ];
1019 let src_array: [u16; 32] = [
1020 0b0_10000110_0110010,
1021 0b0_10000010_0101000,
1022 0b0_10000000_1110000,
1023 0b0_10000100_1001001,
1024 0b0_10000110_0110010,
1025 0b0_10000010_0101000,
1026 0b0_10000000_1110000,
1027 0b0_10000100_1001001,
1028 0b0_10000110_0110010,
1029 0b0_10000010_0101000,
1030 0b0_10000000_1110000,
1031 0b0_10000100_1001001,
1032 0b0_10000110_0110010,
1033 0b0_10000010_0101000,
1034 0b0_10000000_1110000,
1035 0b0_10000100_1001001,
1036 0b0_10000110_0110010,
1037 0b0_10000010_0101000,
1038 0b0_10000000_1110000,
1039 0b0_10000100_1001001,
1040 0b0_10000110_0110010,
1041 0b0_10000010_0101000,
1042 0b0_10000000_1110000,
1043 0b0_10000100_1001001,
1044 0b0_10000110_0110010,
1045 0b0_10000010_0101000,
1046 0b0_10000000_1110000,
1047 0b0_10000100_1001001,
1048 0b0_10000110_0110010,
1049 0b0_10000010_0101000,
1050 0b0_10000000_1110000,
1051 0b0_10000100_1001001,
1052 ];
1053 let src: __m512bh = transmute(src_array);
1054 let a: __m512 = transmute(a_array);
1055 let b: __m512 = transmute(b_array);
1056 let k: __mmask32 = 0xffffffff;
1057 let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
1058 let result: [u16; 32] = transmute(c.as_u16x32());
1059 #[rustfmt::skip]
1060 let expected_result: [u16; 32] = [
1061 0b1_10000110_0110010,
1062 0b1_10000010_0101000,
1063 0b1_10000000_1110000,
1064 0b1_10000100_1001001,
1065 0b1_10000011_0000100,
1066 0b1_10000110_1111111,
1067 0b1_10001000_1111010,
1068 0b1_10001000_0010000,
1069 0b1_10000110_0110010,
1070 0b1_10000010_0101000,
1071 0b1_10000000_1110000,
1072 0b1_10000100_1001001,
1073 0b1_10000011_0000100,
1074 0b1_10000110_1111111,
1075 0b1_10001000_1111010,
1076 0b1_10001000_0010000,
1077 0b0_10000110_0110010,
1078 0b0_10000010_0101000,
1079 0b0_10000000_1110000,
1080 0b0_10000100_1001001,
1081 0b0_10000011_0000100,
1082 0b0_10000110_1111111,
1083 0b0_10001000_1111010,
1084 0b0_10001000_0010000,
1085 0b0_10000110_0110010,
1086 0b0_10000010_0101000,
1087 0b0_10000000_1110000,
1088 0b0_10000100_1001001,
1089 0b0_10000011_0000100,
1090 0b0_10000110_1111111,
1091 0b0_10001000_1111010,
1092 0b0_10001000_0010000,
1093 ];
1094 assert_eq!(result, expected_result);
1095 let k: __mmask32 = 0;
1096 let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
1097 let result: [u16; 32] = transmute(c.as_u16x32());
1098 let expected_result = src_array;
1099 assert_eq!(result, expected_result);
1100 }
1101
1102 #[simd_test(enable = "avx512bf16,avx512f")]
1103 unsafe fn test_mm512_maskz_cvtne2ps_pbh() {
1104 #[rustfmt::skip]
1105 let a_array = [
1106 178.125_f32,
1107 10.5_f32,
1108 3.75_f32,
1109 50.25_f32,
1110 16.5_f32,
1111 255.11_f32,
1112 1000.158_f32,
1113 575.575_f32,
1114 178.125_f32,
1115 10.5_f32,
1116 3.75_f32,
1117 50.25_f32,
1118 16.5_f32,
1119 255.11_f32,
1120 1000.158_f32,
1121 575.575_f32,
1122 ];
1123 let b_array = [
1124 -178.125_f32,
1125 -10.5_f32,
1126 -3.75_f32,
1127 -50.25_f32,
1128 -16.5_f32,
1129 -255.11_f32,
1130 -1000.158_f32,
1131 -575.575_f32,
1132 -178.125_f32,
1133 -10.5_f32,
1134 -3.75_f32,
1135 -50.25_f32,
1136 -16.5_f32,
1137 -255.11_f32,
1138 -1000.158_f32,
1139 -575.575_f32,
1140 ];
1141 let a: __m512 = transmute(a_array);
1142 let b: __m512 = transmute(b_array);
1143 let k: __mmask32 = 0xffffffff;
1144 let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
1145 let result: [u16; 32] = transmute(c.as_u16x32());
1146 #[rustfmt::skip]
1147 let expected_result: [u16; 32] = [
1148 0b1_10000110_0110010,
1149 0b1_10000010_0101000,
1150 0b1_10000000_1110000,
1151 0b1_10000100_1001001,
1152 0b1_10000011_0000100,
1153 0b1_10000110_1111111,
1154 0b1_10001000_1111010,
1155 0b1_10001000_0010000,
1156 0b1_10000110_0110010,
1157 0b1_10000010_0101000,
1158 0b1_10000000_1110000,
1159 0b1_10000100_1001001,
1160 0b1_10000011_0000100,
1161 0b1_10000110_1111111,
1162 0b1_10001000_1111010,
1163 0b1_10001000_0010000,
1164 0b0_10000110_0110010,
1165 0b0_10000010_0101000,
1166 0b0_10000000_1110000,
1167 0b0_10000100_1001001,
1168 0b0_10000011_0000100,
1169 0b0_10000110_1111111,
1170 0b0_10001000_1111010,
1171 0b0_10001000_0010000,
1172 0b0_10000110_0110010,
1173 0b0_10000010_0101000,
1174 0b0_10000000_1110000,
1175 0b0_10000100_1001001,
1176 0b0_10000011_0000100,
1177 0b0_10000110_1111111,
1178 0b0_10001000_1111010,
1179 0b0_10001000_0010000,
1180 ];
1181 assert_eq!(result, expected_result);
1182 let k: __mmask32 = 0b1100_1010_1001_0110_1010_0011_0101_0110;
1183 let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
1184 let result: [u16; 32] = transmute(c.as_u16x32());
1185 #[rustfmt::skip]
1186 let expected_result: [u16; 32] = [
1187 0,
1188 0b1_10000010_0101000,
1189 0b1_10000000_1110000,
1190 0,
1191 0b1_10000011_0000100,
1192 0,
1193 0b1_10001000_1111010,
1194 0,
1195 0b1_10000110_0110010,
1196 0b1_10000010_0101000,
1197 0,
1198 0,
1199 0,
1200 0b1_10000110_1111111,
1201 0,
1202 0b1_10001000_0010000,
1203 0,
1204 0b0_10000010_0101000,
1205 0b0_10000000_1110000,
1206 0,
1207 0b0_10000011_0000100,
1208 0,
1209 0,
1210 0b0_10001000_0010000,
1211 0,
1212 0b0_10000010_0101000,
1213 0,
1214 0b0_10000100_1001001,
1215 0,
1216 0,
1217 0b0_10001000_1111010,
1218 0b0_10001000_0010000,
1219 ];
1220 assert_eq!(result, expected_result);
1221 }
1222
1223 #[simd_test(enable = "avx512bf16,avx512vl")]
1224 unsafe fn test_mm256_cvtneps_pbh() {
1225 #[rustfmt::skip]
1226 let a_array = [
1227 178.125_f32,
1228 10.5_f32,
1229 3.75_f32,
1230 50.25_f32,
1231 16.5_f32,
1232 255.11_f32,
1233 1000.158_f32,
1234 575.575_f32,
1235 ];
1236 let a: __m256 = transmute(a_array);
1237 let c: __m128bh = _mm256_cvtneps_pbh(a);
1238 let result: [u16; 8] = transmute(c.as_u16x8());
1239 #[rustfmt::skip]
1240 let expected_result: [u16; 8] = [
1241 0b0_10000110_0110010,
1242 0b0_10000010_0101000,
1243 0b0_10000000_1110000,
1244 0b0_10000100_1001001,
1245 0b0_10000011_0000100,
1246 0b0_10000110_1111111,
1247 0b0_10001000_1111010,
1248 0b0_10001000_0010000,
1249 ];
1250 assert_eq!(result, expected_result);
1251 }
1252
1253 #[simd_test(enable = "avx512bf16,avx512vl")]
1254 unsafe fn test_mm256_mask_cvtneps_pbh() {
1255 #[rustfmt::skip]
1256 let a_array = [
1257 178.125_f32,
1258 10.5_f32,
1259 3.75_f32,
1260 50.25_f32,
1261 16.5_f32,
1262 255.11_f32,
1263 1000.158_f32,
1264 575.575_f32,
1265 ];
1266 let src_array: [u16; 8] = [
1267 0b1_10000110_0110010,
1268 0b1_10000010_0101000,
1269 0b1_10000000_1110000,
1270 0b1_10000100_1001001,
1271 0b1_10000011_0000100,
1272 0b1_10000110_1111111,
1273 0b1_10001000_1111010,
1274 0b1_10001000_0010000,
1275 ];
1276 let src: __m128bh = transmute(src_array);
1277 let a: __m256 = transmute(a_array);
1278 let k: __mmask8 = 0xff;
1279 let b = _mm256_mask_cvtneps_pbh(src, k, a);
1280 let result: [u16; 8] = transmute(b.as_u16x8());
1281 #[rustfmt::skip]
1282 let expected_result: [u16; 8] = [
1283 0b0_10000110_0110010,
1284 0b0_10000010_0101000,
1285 0b0_10000000_1110000,
1286 0b0_10000100_1001001,
1287 0b0_10000011_0000100,
1288 0b0_10000110_1111111,
1289 0b0_10001000_1111010,
1290 0b0_10001000_0010000,
1291 ];
1292 assert_eq!(result, expected_result);
1293 let k: __mmask8 = 0x0;
1294 let b: __m128bh = _mm256_mask_cvtneps_pbh(src, k, a);
1295 let result: [u16; 8] = transmute(b.as_u16x8());
1296 let expected_result: [u16; 8] = src_array;
1297 assert_eq!(result, expected_result);
1298 }
1299
1300 #[simd_test(enable = "avx512bf16,avx512vl")]
1301 unsafe fn test_mm256_maskz_cvtneps_pbh() {
1302 #[rustfmt::skip]
1303 let a_array = [
1304 178.125_f32,
1305 10.5_f32,
1306 3.75_f32,
1307 50.25_f32,
1308 16.5_f32,
1309 255.11_f32,
1310 1000.158_f32,
1311 575.575_f32,
1312 ];
1313 let a: __m256 = transmute(a_array);
1314 let k: __mmask8 = 0xff;
1315 let b = _mm256_maskz_cvtneps_pbh(k, a);
1316 let result: [u16; 8] = transmute(b.as_u16x8());
1317 #[rustfmt::skip]
1318 let expected_result: [u16; 8] = [
1319 0b0_10000110_0110010,
1320 0b0_10000010_0101000,
1321 0b0_10000000_1110000,
1322 0b0_10000100_1001001,
1323 0b0_10000011_0000100,
1324 0b0_10000110_1111111,
1325 0b0_10001000_1111010,
1326 0b0_10001000_0010000,
1327 ];
1328 assert_eq!(result, expected_result);
1329 let k: __mmask8 = 0x6;
1330 let b: __m128bh = _mm256_maskz_cvtneps_pbh(k, a);
1331 let result: [u16; 8] = transmute(b.as_u16x8());
1332 let expected_result: [u16; 8] =
1333 [0, 0b0_10000010_0101000, 0b0_10000000_1110000, 0, 0, 0, 0, 0];
1334 assert_eq!(result, expected_result);
1335 }
1336
1337 #[simd_test(enable = "avx512bf16,avx512f")]
1338 unsafe fn test_mm512_cvtneps_pbh() {
1339 #[rustfmt::skip]
1340 let a_array = [
1341 178.125_f32,
1342 10.5_f32,
1343 3.75_f32,
1344 50.25_f32,
1345 16.5_f32,
1346 255.11_f32,
1347 1000.158_f32,
1348 575.575_f32,
1349 178.125_f32,
1350 10.5_f32,
1351 3.75_f32,
1352 50.25_f32,
1353 16.5_f32,
1354 255.11_f32,
1355 1000.158_f32,
1356 575.575_f32,
1357 ];
1358 let a: __m512 = transmute(a_array);
1359 let c: __m256bh = _mm512_cvtneps_pbh(a);
1360 let result: [u16; 16] = transmute(c.as_u16x16());
1361 #[rustfmt::skip]
1362 let expected_result: [u16; 16] = [
1363 0b0_10000110_0110010,
1364 0b0_10000010_0101000,
1365 0b0_10000000_1110000,
1366 0b0_10000100_1001001,
1367 0b0_10000011_0000100,
1368 0b0_10000110_1111111,
1369 0b0_10001000_1111010,
1370 0b0_10001000_0010000,
1371 0b0_10000110_0110010,
1372 0b0_10000010_0101000,
1373 0b0_10000000_1110000,
1374 0b0_10000100_1001001,
1375 0b0_10000011_0000100,
1376 0b0_10000110_1111111,
1377 0b0_10001000_1111010,
1378 0b0_10001000_0010000,
1379 ];
1380 assert_eq!(result, expected_result);
1381 }
1382
1383 #[simd_test(enable = "avx512bf16,avx512f")]
1384 unsafe fn test_mm512_mask_cvtneps_pbh() {
1385 #[rustfmt::skip]
1386 let a_array = [
1387 178.125_f32,
1388 10.5_f32,
1389 3.75_f32,
1390 50.25_f32,
1391 16.5_f32,
1392 255.11_f32,
1393 1000.158_f32,
1394 575.575_f32,
1395 178.125_f32,
1396 10.5_f32,
1397 3.75_f32,
1398 50.25_f32,
1399 16.5_f32,
1400 255.11_f32,
1401 1000.158_f32,
1402 575.575_f32,
1403 ];
1404 let src_array: [u16; 16] = [
1405 0b1_10000110_0110010,
1406 0b1_10000010_0101000,
1407 0b1_10000000_1110000,
1408 0b1_10000100_1001001,
1409 0b1_10000011_0000100,
1410 0b1_10000110_1111111,
1411 0b1_10001000_1111010,
1412 0b1_10001000_0010000,
1413 0b1_10000110_0110010,
1414 0b1_10000010_0101000,
1415 0b1_10000000_1110000,
1416 0b1_10000100_1001001,
1417 0b1_10000011_0000100,
1418 0b1_10000110_1111111,
1419 0b1_10001000_1111010,
1420 0b1_10001000_0010000,
1421 ];
1422 let src: __m256bh = transmute(src_array);
1423 let a: __m512 = transmute(a_array);
1424 let k: __mmask16 = 0xffff;
1425 let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
1426 let result: [u16; 16] = transmute(c.as_u16x16());
1427 #[rustfmt::skip]
1428 let expected_result: [u16; 16] = [
1429 0b0_10000110_0110010,
1430 0b0_10000010_0101000,
1431 0b0_10000000_1110000,
1432 0b0_10000100_1001001,
1433 0b0_10000011_0000100,
1434 0b0_10000110_1111111,
1435 0b0_10001000_1111010,
1436 0b0_10001000_0010000,
1437 0b0_10000110_0110010,
1438 0b0_10000010_0101000,
1439 0b0_10000000_1110000,
1440 0b0_10000100_1001001,
1441 0b0_10000011_0000100,
1442 0b0_10000110_1111111,
1443 0b0_10001000_1111010,
1444 0b0_10001000_0010000,
1445 ];
1446 assert_eq!(result, expected_result);
1447 let k: __mmask16 = 0;
1448 let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
1449 let result: [u16; 16] = transmute(c.as_u16x16());
1450 let expected_result = src_array;
1451 assert_eq!(result, expected_result);
1452 }
1453
1454 #[simd_test(enable = "avx512bf16,avx512f")]
1455 unsafe fn test_mm512_maskz_cvtneps_pbh() {
1456 #[rustfmt::skip]
1457 let a_array = [
1458 178.125_f32,
1459 10.5_f32,
1460 3.75_f32,
1461 50.25_f32,
1462 16.5_f32,
1463 255.11_f32,
1464 1000.158_f32,
1465 575.575_f32,
1466 178.125_f32,
1467 10.5_f32,
1468 3.75_f32,
1469 50.25_f32,
1470 16.5_f32,
1471 255.11_f32,
1472 1000.158_f32,
1473 575.575_f32,
1474 ];
1475 let a: __m512 = transmute(a_array);
1476 let k: __mmask16 = 0xffff;
1477 let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
1478 let result: [u16; 16] = transmute(c.as_u16x16());
1479 #[rustfmt::skip]
1480 let expected_result: [u16; 16] = [
1481 0b0_10000110_0110010,
1482 0b0_10000010_0101000,
1483 0b0_10000000_1110000,
1484 0b0_10000100_1001001,
1485 0b0_10000011_0000100,
1486 0b0_10000110_1111111,
1487 0b0_10001000_1111010,
1488 0b0_10001000_0010000,
1489 0b0_10000110_0110010,
1490 0b0_10000010_0101000,
1491 0b0_10000000_1110000,
1492 0b0_10000100_1001001,
1493 0b0_10000011_0000100,
1494 0b0_10000110_1111111,
1495 0b0_10001000_1111010,
1496 0b0_10001000_0010000,
1497 ];
1498 assert_eq!(result, expected_result);
1499 let k: __mmask16 = 0x653a;
1500 let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
1501 let result: [u16; 16] = transmute(c.as_u16x16());
1502 #[rustfmt::skip]
1503 let expected_result: [u16; 16] = [
1504 0,
1505 0b0_10000010_0101000,
1506 0,
1507 0b0_10000100_1001001,
1508 0b0_10000011_0000100,
1509 0b0_10000110_1111111,
1510 0,
1511 0,
1512 0b0_10000110_0110010,
1513 0,
1514 0b0_10000000_1110000,
1515 0,
1516 0,
1517 0b0_10000110_1111111,
1518 0b0_10001000_1111010,
1519 0,
1520 ];
1521 assert_eq!(result, expected_result);
1522 }
1523
1524 #[simd_test(enable = "avx512bf16,avx512vl")]
1525 unsafe fn test_mm_dpbf16_ps() {
1526 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1527 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1528 let a1: __m128 = transmute(a_array);
1529 let b1: __m128 = transmute(b_array);
1530 let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
1531 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1532 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1533 let c: __m128 = _mm_dpbf16_ps(src, a, b);
1534 let result: [f32; 4] = transmute(c.as_f32x4());
1535 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1536 assert_eq!(result, expected_result);
1537 }
1538
1539 #[simd_test(enable = "avx512bf16,avx512vl")]
1540 unsafe fn test_mm_mask_dpbf16_ps() {
1541 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1542 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1543 let a1: __m128 = transmute(a_array);
1544 let b1: __m128 = transmute(b_array);
1545 let k: __mmask8 = 0xf3;
1546 let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
1547 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1548 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1549 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1550 let result: [f32; 4] = transmute(c.as_f32x4());
1551 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32];
1552 assert_eq!(result, expected_result);
1553 let k: __mmask8 = 0xff;
1554 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1555 let result: [f32; 4] = transmute(c.as_f32x4());
1556 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1557 assert_eq!(result, expected_result);
1558 let k: __mmask8 = 0;
1559 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1560 let result: [f32; 4] = transmute(c.as_f32x4());
1561 let expected_result: [f32; 4] = [1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32];
1562 assert_eq!(result, expected_result);
1563 }
1564
1565 #[simd_test(enable = "avx512bf16,avx512vl")]
1566 unsafe fn test_mm_maskz_dpbf16_ps() {
1567 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1568 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1569 let a1: __m128 = transmute(a_array);
1570 let b1: __m128 = transmute(b_array);
1571 let k: __mmask8 = 0xf3;
1572 let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
1573 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1574 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1575 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1576 let result: [f32; 4] = transmute(c.as_f32x4());
1577 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 0.0, 0.0];
1578 assert_eq!(result, expected_result);
1579 let k: __mmask8 = 0xff;
1580 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1581 let result: [f32; 4] = transmute(c.as_f32x4());
1582 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1583 assert_eq!(result, expected_result);
1584 let k: __mmask8 = 0;
1585 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1586 let result: [f32; 4] = transmute(c.as_f32x4());
1587 let expected_result: [f32; 4] = [0.0, 0.0, 0.0, 0.0];
1588 assert_eq!(result, expected_result);
1589 }
1590
1591 #[simd_test(enable = "avx512bf16,avx512vl")]
1592 unsafe fn test_mm256_dpbf16_ps() {
1593 #[rustfmt::skip]
1594 let a_array = [
1595 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1596 ];
1597 let b_array = [
1598 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1599 ];
1600 let a1: __m256 = transmute(a_array);
1601 let b1: __m256 = transmute(b_array);
1602 #[rustfmt::skip]
1603 let src: __m256 = transmute([
1604 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1605 ]);
1606 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1607 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1608 let c: __m256 = _mm256_dpbf16_ps(src, a, b);
1609 let result: [f32; 8] = transmute(c.as_f32x8());
1610 #[rustfmt::skip]
1611 let expected_result: [f32; 8] = [
1612 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1613 ];
1614 assert_eq!(result, expected_result);
1615 }
1616
1617 #[simd_test(enable = "avx512bf16,avx512vl")]
1618 unsafe fn test_mm256_mask_dpbf16_ps() {
1619 #[rustfmt::skip]
1620 let a_array = [
1621 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1622 ];
1623 let b_array = [
1624 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1625 ];
1626 let a1: __m256 = transmute(a_array);
1627 let b1: __m256 = transmute(b_array);
1628 let k: __mmask8 = 0x33;
1629 #[rustfmt::skip]
1630 let src: __m256 = transmute([
1631 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1632 ]);
1633 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1634 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1635 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1636 let result: [f32; 8] = transmute(c.as_f32x8());
1637 #[rustfmt::skip]
1638 let expected_result: [f32; 8] = [
1639 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1640 ];
1641 assert_eq!(result, expected_result);
1642 let k: __mmask8 = 0xff;
1643 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1644 let result: [f32; 8] = transmute(c.as_f32x8());
1645 #[rustfmt::skip]
1646 let expected_result: [f32; 8] = [
1647 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1648 ];
1649 assert_eq!(result, expected_result);
1650 let k: __mmask8 = 0;
1651 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1652 let result: [f32; 8] = transmute(c.as_f32x8());
1653 #[rustfmt::skip]
1654 let expected_result: [f32; 8] = [
1655 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1656 ];
1657 assert_eq!(result, expected_result);
1658 }
1659
1660 #[simd_test(enable = "avx512bf16,avx512vl")]
1661 unsafe fn test_mm256_maskz_dpbf16_ps() {
1662 #[rustfmt::skip]
1663 let a_array = [
1664 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1665 ];
1666 let b_array = [
1667 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1668 ];
1669 let a1: __m256 = transmute(a_array);
1670 let b1: __m256 = transmute(b_array);
1671 let k: __mmask8 = 0x33;
1672 #[rustfmt::skip]
1673 let src: __m256 = transmute([
1674 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1675 ]);
1676 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1677 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1678 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1679 let result: [f32; 8] = transmute(c.as_f32x8());
1680 #[rustfmt::skip]
1681 let expected_result: [f32; 8] = [
1682 -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
1683 ];
1684 assert_eq!(result, expected_result);
1685 let k: __mmask8 = 0xff;
1686 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1687 let result: [f32; 8] = transmute(c.as_f32x8());
1688 #[rustfmt::skip]
1689 let expected_result: [f32; 8] = [
1690 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1691 ];
1692 assert_eq!(result, expected_result);
1693 let k: __mmask8 = 0;
1694 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1695 let result: [f32; 8] = transmute(c.as_f32x8());
1696 let expected_result: [f32; 8] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1697 assert_eq!(result, expected_result);
1698 }
1699
1700 #[simd_test(enable = "avx512bf16,avx512f")]
1701 unsafe fn test_mm512_dpbf16_ps() {
1702 #[rustfmt::skip]
1703 let a_array = [
1704 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1705 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1706 ];
1707 let b_array = [
1708 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1709 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1710 ];
1711 let a1: __m512 = transmute(a_array);
1712 let b1: __m512 = transmute(b_array);
1713 let src: __m512 = transmute([
1714 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1715 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1716 ]);
1717 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1718 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1719 let c: __m512 = _mm512_dpbf16_ps(src, a, b);
1720 let result: [f32; 16] = transmute(c.as_f32x16());
1721 #[rustfmt::skip]
1722 let expected_result: [f32; 16] = [
1723 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1724 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1725 ];
1726 assert_eq!(result, expected_result);
1727 }
1728
1729 #[simd_test(enable = "avx512bf16,avx512f")]
1730 unsafe fn test_mm512_mask_dpbf16_ps() {
1731 #[rustfmt::skip]
1732 let a_array = [
1733 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1734 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1735 ];
1736 let b_array = [
1737 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1738 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1739 ];
1740 let a1: __m512 = transmute(a_array);
1741 let b1: __m512 = transmute(b_array);
1742 let k: __mmask16 = 0x3333;
1743 #[rustfmt::skip]
1744 let src: __m512 = transmute([
1745 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1746 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1747 ]);
1748 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1749 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1750 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1751 let result: [f32; 16] = transmute(c.as_f32x16());
1752 #[rustfmt::skip]
1753 let expected_result: [f32; 16] = [
1754 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1755 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1756 ];
1757 assert_eq!(result, expected_result);
1758 let k: __mmask16 = 0xffff;
1759 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1760 let result: [f32; 16] = transmute(c.as_f32x16());
1761 #[rustfmt::skip]
1762 let expected_result: [f32; 16] = [
1763 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1764 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1765 ];
1766 assert_eq!(result, expected_result);
1767 let k: __mmask16 = 0;
1768 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1769 let result: [f32; 16] = transmute(c.as_f32x16());
1770 #[rustfmt::skip]
1771 let expected_result: [f32; 16] = [
1772 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1773 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1774 ];
1775 assert_eq!(result, expected_result);
1776 }
1777
1778 #[simd_test(enable = "avx512bf16,avx512f")]
1779 unsafe fn test_mm512_maskz_dpbf16_ps() {
1780 #[rustfmt::skip]
1781 let a_array = [
1782 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1783 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1784 ];
1785 let b_array = [
1786 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1787 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1788 ];
1789 let a1: __m512 = transmute(a_array);
1790 let b1: __m512 = transmute(b_array);
1791 let k: __mmask16 = 0x3333;
1792 #[rustfmt::skip]
1793 let src: __m512 = transmute([
1794 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1795 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1796 ]);
1797 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1798 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1799 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1800 let result: [f32; 16] = transmute(c.as_f32x16());
1801 #[rustfmt::skip]
1802 let expected_result: [f32; 16] = [
1803 -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32,
1804 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
1805 ];
1806 assert_eq!(result, expected_result);
1807 let k: __mmask16 = 0xffff;
1808 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1809 let result: [f32; 16] = transmute(c.as_f32x16());
1810 #[rustfmt::skip]
1811 let expected_result: [f32; 16] = [
1812 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1813 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1814 ];
1815 assert_eq!(result, expected_result);
1816 let k: __mmask16 = 0;
1817 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1818 let result: [f32; 16] = transmute(c.as_f32x16());
1819 #[rustfmt::skip]
1820 let expected_result: [f32; 16] = [
1821 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
1822 ];
1823 assert_eq!(result, expected_result);
1824 }
1825
1826 const BF16_ONE: u16 = 0b0_01111111_0000000;
1827 const BF16_TWO: u16 = 0b0_10000000_0000000;
1828 const BF16_THREE: u16 = 0b0_10000000_1000000;
1829 const BF16_FOUR: u16 = 0b0_10000001_0000000;
1830 const BF16_FIVE: u16 = 0b0_10000001_0100000;
1831 const BF16_SIX: u16 = 0b0_10000001_1000000;
1832 const BF16_SEVEN: u16 = 0b0_10000001_1100000;
1833 const BF16_EIGHT: u16 = 0b0_10000010_0000000;
1834
1835 #[simd_test(enable = "avx512bf16")]
1836 unsafe fn test_mm512_cvtpbh_ps() {
1837 let a = __m256bh([
1838 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1839 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1840 ]);
1841 let r = _mm512_cvtpbh_ps(a);
1842 let e = _mm512_setr_ps(
1843 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
1844 );
1845 assert_eq_m512(r, e);
1846 }
1847
1848 #[simd_test(enable = "avx512bf16")]
1849 unsafe fn test_mm512_mask_cvtpbh_ps() {
1850 let a = __m256bh([
1851 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1852 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1853 ]);
1854 let src = _mm512_setr_ps(
1855 9., 10., 11., 12., 13., 14., 15., 16., 9., 10., 11., 12., 13., 14., 15., 16.,
1856 );
1857 let k = 0b1010_1010_1010_1010;
1858 let r = _mm512_mask_cvtpbh_ps(src, k, a);
1859 let e = _mm512_setr_ps(
1860 9., 2., 11., 4., 13., 6., 15., 8., 9., 2., 11., 4., 13., 6., 15., 8.,
1861 );
1862 assert_eq_m512(r, e);
1863 }
1864
1865 #[simd_test(enable = "avx512bf16")]
1866 unsafe fn test_mm512_maskz_cvtpbh_ps() {
1867 let a = __m256bh([
1868 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1869 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1870 ]);
1871 let k = 0b1010_1010_1010_1010;
1872 let r = _mm512_maskz_cvtpbh_ps(k, a);
1873 let e = _mm512_setr_ps(
1874 0., 2., 0., 4., 0., 6., 0., 8., 0., 2., 0., 4., 0., 6., 0., 8.,
1875 );
1876 assert_eq_m512(r, e);
1877 }
1878
1879 #[simd_test(enable = "avx512bf16,avx512vl")]
1880 unsafe fn test_mm256_cvtpbh_ps() {
1881 let a = __m128bh([
1882 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1883 ]);
1884 let r = _mm256_cvtpbh_ps(a);
1885 let e = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
1886 assert_eq_m256(r, e);
1887 }
1888
1889 #[simd_test(enable = "avx512bf16,avx512vl")]
1890 unsafe fn test_mm256_mask_cvtpbh_ps() {
1891 let a = __m128bh([
1892 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1893 ]);
1894 let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
1895 let k = 0b1010_1010;
1896 let r = _mm256_mask_cvtpbh_ps(src, k, a);
1897 let e = _mm256_setr_ps(9., 2., 11., 4., 13., 6., 15., 8.);
1898 assert_eq_m256(r, e);
1899 }
1900
1901 #[simd_test(enable = "avx512bf16,avx512vl")]
1902 unsafe fn test_mm256_maskz_cvtpbh_ps() {
1903 let a = __m128bh([
1904 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1905 ]);
1906 let k = 0b1010_1010;
1907 let r = _mm256_maskz_cvtpbh_ps(k, a);
1908 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
1909 assert_eq_m256(r, e);
1910 }
1911
1912 #[simd_test(enable = "avx512bf16,avx512vl")]
1913 unsafe fn test_mm_cvtpbh_ps() {
1914 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1915 let r = _mm_cvtpbh_ps(a);
1916 let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1917 assert_eq_m128(r, e);
1918 }
1919
1920 #[simd_test(enable = "avx512bf16,avx512vl")]
1921 unsafe fn test_mm_mask_cvtpbh_ps() {
1922 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1923 let src = _mm_setr_ps(9., 10., 11., 12.);
1924 let k = 0b1010;
1925 let r = _mm_mask_cvtpbh_ps(src, k, a);
1926 let e = _mm_setr_ps(9., 2., 11., 4.);
1927 assert_eq_m128(r, e);
1928 }
1929
1930 #[simd_test(enable = "avx512bf16,avx512vl")]
1931 unsafe fn test_mm_maskz_cvtpbh_ps() {
1932 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1933 let k = 0b1010;
1934 let r = _mm_maskz_cvtpbh_ps(k, a);
1935 let e = _mm_setr_ps(0., 2., 0., 4.);
1936 assert_eq_m128(r, e);
1937 }
1938
1939 #[simd_test(enable = "avx512bf16")]
1940 unsafe fn test_mm_cvtsbh_ss() {
1941 let r = _mm_cvtsbh_ss(bf16::from_bits(BF16_ONE));
1942 assert_eq!(r, 1.);
1943 }
1944
1945 #[simd_test(enable = "avx512bf16,avx512vl")]
1946 unsafe fn test_mm_cvtneps_pbh() {
1947 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1948 let r: u16x4 = transmute_copy(&_mm_cvtneps_pbh(a));
1949 let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
1950 assert_eq!(r, e);
1951 }
1952
1953 #[simd_test(enable = "avx512bf16,avx512vl")]
1954 unsafe fn test_mm_mask_cvtneps_pbh() {
1955 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1956 let src = __m128bh([5, 6, 7, 8, !0, !0, !0, !0]);
1957 let k = 0b1010;
1958 let r: u16x4 = transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a));
1959 let e = u16x4::new(5, BF16_TWO, 7, BF16_FOUR);
1960 assert_eq!(r, e);
1961 }
1962
1963 #[simd_test(enable = "avx512bf16,avx512vl")]
1964 unsafe fn test_mm_maskz_cvtneps_pbh() {
1965 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1966 let k = 0b1010;
1967 let r: u16x4 = transmute_copy(&_mm_maskz_cvtneps_pbh(k, a));
1968 let e = u16x4::new(0, BF16_TWO, 0, BF16_FOUR);
1969 assert_eq!(r, e);
1970 }
1971
1972 #[simd_test(enable = "avx512bf16,avx512vl")]
1973 unsafe fn test_mm_cvtness_sbh() {
1974 let r = _mm_cvtness_sbh(1.);
1975 assert_eq!(r.to_bits(), BF16_ONE);
1976 }
1977}