1use crate::core_arch::{simd::*, x86::*};
6use crate::intrinsics::simd::*;
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11#[allow(improper_ctypes)]
12unsafe extern "C" {
13 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.128"]
14 fn cvtne2ps2bf16(a: f32x4, b: f32x4) -> i16x8;
15 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.256"]
16 fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16;
17 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"]
18 fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32;
19 #[link_name = "llvm.x86.avx512bf16.mask.cvtneps2bf16.128"]
20 fn cvtneps2bf16_128(a: f32x4, src: i16x8, k: __mmask8) -> i16x8;
21 #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"]
22 fn cvtneps2bf16_256(a: f32x8) -> i16x8;
23 #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
24 fn cvtneps2bf16_512(a: f32x16) -> i16x16;
25 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.128"]
26 fn dpbf16ps(a: f32x4, b: i16x8, c: i16x8) -> f32x4;
27 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.256"]
28 fn dpbf16ps_256(a: f32x8, b: i16x16, c: i16x16) -> f32x8;
29 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.512"]
30 fn dpbf16ps_512(a: f32x16, b: i16x32, c: i16x32) -> f32x16;
31}
32
33#[inline]
38#[target_feature(enable = "avx512bf16,avx512vl")]
39#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
41pub fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
42 unsafe { transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4())) }
43}
44
45#[inline]
51#[target_feature(enable = "avx512bf16,avx512vl")]
52#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
54pub fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh {
55 unsafe {
56 let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
57 transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
58 }
59}
60
61#[inline]
67#[target_feature(enable = "avx512bf16,avx512vl")]
68#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
70pub fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh {
71 unsafe {
72 let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
73 transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
74 }
75}
76
77#[inline]
82#[target_feature(enable = "avx512bf16,avx512vl")]
83#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
84#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
85pub fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
86 unsafe { transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8())) }
87}
88
89#[inline]
94#[target_feature(enable = "avx512bf16,avx512vl")]
95#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
96#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
97pub fn _mm256_mask_cvtne2ps_pbh(src: __m256bh, k: __mmask16, a: __m256, b: __m256) -> __m256bh {
98 unsafe {
99 let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
100 transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
101 }
102}
103
104#[inline]
109#[target_feature(enable = "avx512bf16,avx512vl")]
110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
111#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
112pub fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh {
113 unsafe {
114 let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
115 transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
116 }
117}
118
119#[inline]
124#[target_feature(enable = "avx512bf16,avx512f")]
125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
126#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
127pub fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
128 unsafe { transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16())) }
129}
130
131#[inline]
137#[target_feature(enable = "avx512bf16,avx512f")]
138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
139#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
140pub fn _mm512_mask_cvtne2ps_pbh(src: __m512bh, k: __mmask32, a: __m512, b: __m512) -> __m512bh {
141 unsafe {
142 let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
143 transmute(simd_select_bitmask(k, cvt, src.as_u16x32()))
144 }
145}
146
147#[inline]
153#[target_feature(enable = "avx512bf16,avx512f")]
154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
155#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
156pub fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh {
157 unsafe {
158 let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
159 transmute(simd_select_bitmask(k, cvt, u16x32::ZERO))
160 }
161}
162
163#[inline]
167#[target_feature(enable = "avx512bf16,avx512vl")]
168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
169#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
170pub fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
171 unsafe { transmute(cvtneps2bf16_256(a.as_f32x8())) }
172}
173
174#[inline]
179#[target_feature(enable = "avx512bf16,avx512vl")]
180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
181#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
182pub fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh {
183 unsafe {
184 let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
185 transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
186 }
187}
188
189#[inline]
194#[target_feature(enable = "avx512bf16,avx512vl")]
195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
196#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
197pub fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
198 unsafe {
199 let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
200 transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
201 }
202}
203
204#[inline]
208#[target_feature(enable = "avx512bf16,avx512f")]
209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
210#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
211pub fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
212 unsafe { transmute(cvtneps2bf16_512(a.as_f32x16())) }
213}
214
215#[inline]
220#[target_feature(enable = "avx512bf16,avx512f")]
221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
222#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
223pub fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh {
224 unsafe {
225 let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
226 transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
227 }
228}
229
230#[inline]
235#[target_feature(enable = "avx512bf16,avx512f")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
238pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
239 unsafe {
240 let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
241 transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
242 }
243}
244
245#[inline]
250#[target_feature(enable = "avx512bf16,avx512vl")]
251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
252#[cfg_attr(test, assert_instr("vdpbf16ps"))]
253pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
254 unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i16x8(), b.as_i16x8())) }
255}
256
257#[inline]
263#[target_feature(enable = "avx512bf16,avx512vl")]
264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
265#[cfg_attr(test, assert_instr("vdpbf16ps"))]
266pub fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 {
267 unsafe {
268 let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
269 transmute(simd_select_bitmask(k, rst, src.as_f32x4()))
270 }
271}
272
273#[inline]
279#[target_feature(enable = "avx512bf16,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr("vdpbf16ps"))]
282pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
283 unsafe {
284 let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
285 let zero = _mm_set1_ps(0.0_f32).as_f32x4();
286 transmute(simd_select_bitmask(k, rst, zero))
287 }
288}
289
290#[inline]
295#[target_feature(enable = "avx512bf16,avx512vl")]
296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
297#[cfg_attr(test, assert_instr("vdpbf16ps"))]
298pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
299 unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i16x16(), b.as_i16x16())) }
300}
301
302#[inline]
308#[target_feature(enable = "avx512bf16,avx512vl")]
309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
310#[cfg_attr(test, assert_instr("vdpbf16ps"))]
311pub fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 {
312 unsafe {
313 let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
314 transmute(simd_select_bitmask(k, rst, src.as_f32x8()))
315 }
316}
317
318#[inline]
324#[target_feature(enable = "avx512bf16,avx512vl")]
325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
326#[cfg_attr(test, assert_instr("vdpbf16ps"))]
327pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
328 unsafe {
329 let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
330 transmute(simd_select_bitmask(k, rst, f32x8::ZERO))
331 }
332}
333
334#[inline]
341#[target_feature(enable = "avx512bf16,avx512f")]
342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
343#[cfg_attr(test, assert_instr("vdpbf16ps"))]
344pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
345 unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i16x32(), b.as_i16x32())) }
346}
347
348#[inline]
354#[target_feature(enable = "avx512bf16,avx512f")]
355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
356#[cfg_attr(test, assert_instr("vdpbf16ps"))]
357pub fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 {
358 unsafe {
359 let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
360 transmute(simd_select_bitmask(k, rst, src.as_f32x16()))
361 }
362}
363
364#[inline]
370#[target_feature(enable = "avx512bf16,avx512f")]
371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
372#[cfg_attr(test, assert_instr("vdpbf16ps"))]
373pub fn _mm512_maskz_dpbf16_ps(k: __mmask16, src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
374 unsafe {
375 let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
376 transmute(simd_select_bitmask(k, rst, f32x16::ZERO))
377 }
378}
379
380#[inline]
385#[target_feature(enable = "avx512bf16,avx512f")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387pub fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
388 unsafe { _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a)))) }
389}
390
391#[inline]
397#[target_feature(enable = "avx512bf16,avx512f")]
398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
399pub fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 {
400 unsafe {
401 let cvt = _mm512_cvtpbh_ps(a);
402 transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16()))
403 }
404}
405
406#[inline]
412#[target_feature(enable = "avx512bf16,avx512f")]
413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
414pub fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
415 unsafe {
416 let cvt = _mm512_cvtpbh_ps(a);
417 transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO))
418 }
419}
420
421#[inline]
426#[target_feature(enable = "avx512bf16,avx512vl")]
427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
428pub fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
429 unsafe { _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a)))) }
430}
431
432#[inline]
438#[target_feature(enable = "avx512bf16,avx512vl")]
439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
440pub fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 {
441 unsafe {
442 let cvt = _mm256_cvtpbh_ps(a);
443 transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8()))
444 }
445}
446
447#[inline]
453#[target_feature(enable = "avx512bf16,avx512vl")]
454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
455pub fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
456 unsafe {
457 let cvt = _mm256_cvtpbh_ps(a);
458 transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO))
459 }
460}
461
462#[inline]
467#[target_feature(enable = "avx512bf16,avx512vl")]
468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
469pub fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
470 unsafe { _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a)))) }
471}
472
473#[inline]
479#[target_feature(enable = "avx512bf16,avx512vl")]
480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
481pub fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 {
482 unsafe {
483 let cvt = _mm_cvtpbh_ps(a);
484 transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4()))
485 }
486}
487
488#[inline]
494#[target_feature(enable = "avx512bf16,avx512vl")]
495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
496pub fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
497 unsafe {
498 let cvt = _mm_cvtpbh_ps(a);
499 transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO))
500 }
501}
502
503#[inline]
508#[target_feature(enable = "avx512bf16,avx512f")]
509#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
510pub fn _mm_cvtsbh_ss(a: bf16) -> f32 {
511 f32::from_bits((a.to_bits() as u32) << 16)
512}
513
514#[inline]
519#[target_feature(enable = "avx512bf16,avx512vl")]
520#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
522pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
523 _mm_mask_cvtneps_pbh(__m128bh::splat(0), !0, a)
524}
525
526#[inline]
532#[target_feature(enable = "avx512bf16,avx512vl")]
533#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
535pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
536 unsafe { cvtneps2bf16_128(a.as_f32x4(), src.as_i16x8(), k).as_m128bh() }
537}
538
539#[inline]
545#[target_feature(enable = "avx512bf16,avx512vl")]
546#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
548pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
549 _mm_mask_cvtneps_pbh(__m128bh::splat(0), k, a)
550}
551
552#[inline]
557#[target_feature(enable = "avx512bf16,avx512vl")]
558#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
559pub fn _mm_cvtness_sbh(a: f32) -> bf16 {
560 unsafe {
561 let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0);
562 bf16::from_bits(value)
563 }
564}
565
566#[cfg(test)]
567mod tests {
568 use crate::core_arch::simd::{f32x4, f32x8, f32x16, u16x4, u16x8, u16x16, u16x32};
569 use crate::{
570 core_arch::x86::*,
571 mem::{transmute, transmute_copy},
572 };
573 use stdarch_test::simd_test;
574
575 #[simd_test(enable = "avx512bf16,avx512vl")]
576 fn test_mm_cvtne2ps_pbh() {
577 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
578 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
579 let a = f32x4::from_array(a_array).as_m128();
580 let b = f32x4::from_array(b_array).as_m128();
581 let c: __m128bh = _mm_cvtne2ps_pbh(a, b);
582 let result = *c.as_u16x8().as_array();
583 #[rustfmt::skip]
584 let expected_result: [u16; 8] = [
585 0b1_10000110_0110010,
586 0b1_10000010_0101000,
587 0b1_10000000_1110000,
588 0b1_10000100_1001001,
589 0b0_10000110_0110010,
590 0b0_10000010_0101000,
591 0b0_10000000_1110000,
592 0b0_10000100_1001001,
593 ];
594 assert_eq!(result, expected_result);
595 }
596
597 #[simd_test(enable = "avx512bf16,avx512vl")]
598 fn test_mm_mask_cvtne2ps_pbh() {
599 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
600 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
601 #[rustfmt::skip]
602 let src_array: [u16; 8] = [
603 0b0_10000110_0110010,
604 0b0_10000010_0101000,
605 0b0_10000000_1110000,
606 0b0_10000100_1001001,
607 0b0_10000110_0110010,
608 0b0_10000010_0101000,
609 0b0_10000000_1110000,
610 0b0_10000100_1001001,
611 ];
612 let src = u16x8::from_array(src_array).as_m128bh();
613 let a = f32x4::from_array(a_array).as_m128();
614 let b = f32x4::from_array(b_array).as_m128();
615 let k: __mmask8 = 0b1111_1111;
616 let c: __m128bh = _mm_mask_cvtne2ps_pbh(src, k, a, b);
617 let result = *c.as_u16x8().as_array();
618 #[rustfmt::skip]
619 let expected_result: [u16; 8] = [
620 0b1_10000110_0110010,
621 0b1_10000010_0101000,
622 0b1_10000000_1110000,
623 0b1_10000100_1001001,
624 0b0_10000110_0110010,
625 0b0_10000010_0101000,
626 0b0_10000000_1110000,
627 0b0_10000100_1001001,
628 ];
629 assert_eq!(result, expected_result);
630 let k = 0b0000_0000;
631 let c = _mm_mask_cvtne2ps_pbh(src, k, a, b);
632 let result = *c.as_u16x8().as_array();
633 let expected_result = src_array;
634 assert_eq!(result, expected_result);
635 }
636
637 #[simd_test(enable = "avx512bf16,avx512vl")]
638 fn test_mm_maskz_cvtne2ps_pbh() {
639 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
640 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
641 let a = f32x4::from_array(a_array).as_m128();
642 let b = f32x4::from_array(b_array).as_m128();
643 let k: __mmask8 = 0b1111_1111;
644 let c: __m128bh = _mm_maskz_cvtne2ps_pbh(k, a, b);
645 let result = *c.as_u16x8().as_array();
646 #[rustfmt::skip]
647 let expected_result: [u16; 8] = [
648 0b1_10000110_0110010,
649 0b1_10000010_0101000,
650 0b1_10000000_1110000,
651 0b1_10000100_1001001,
652 0b0_10000110_0110010,
653 0b0_10000010_0101000,
654 0b0_10000000_1110000,
655 0b0_10000100_1001001,
656 ];
657 assert_eq!(result, expected_result);
658 let k = 0b0011_1100;
659 let c = _mm_maskz_cvtne2ps_pbh(k, a, b);
660 let result = *c.as_u16x8().as_array();
661 #[rustfmt::skip]
662 let expected_result: [u16; 8] = [
663 0,
664 0,
665 0b1_10000000_1110000,
666 0b1_10000100_1001001,
667 0b0_10000110_0110010,
668 0b0_10000010_0101000,
669 0,
670 0,
671 ];
672 assert_eq!(result, expected_result);
673 }
674
675 #[simd_test(enable = "avx512bf16,avx512vl")]
676 fn test_mm256_cvtne2ps_pbh() {
677 #[rustfmt::skip]
678 let a_array = [
679 178.125_f32,
680 10.5_f32,
681 3.75_f32,
682 50.25_f32,
683 16.5_f32,
684 255.11_f32,
685 1000.158_f32,
686 575.575_f32,
687 ];
688 let b_array = [
689 -178.125_f32,
690 -10.5_f32,
691 -3.75_f32,
692 -50.25_f32,
693 -16.5_f32,
694 -255.11_f32,
695 -1000.158_f32,
696 -575.575_f32,
697 ];
698 let a = f32x8::from_array(a_array).as_m256();
699 let b = f32x8::from_array(b_array).as_m256();
700 let c: __m256bh = _mm256_cvtne2ps_pbh(a, b);
701 let result = *c.as_u16x16().as_array();
702 #[rustfmt::skip]
703 let expected_result: [u16; 16] = [
704 0b1_10000110_0110010,
705 0b1_10000010_0101000,
706 0b1_10000000_1110000,
707 0b1_10000100_1001001,
708 0b1_10000011_0000100,
709 0b1_10000110_1111111,
710 0b1_10001000_1111010,
711 0b1_10001000_0010000,
712 0b0_10000110_0110010,
713 0b0_10000010_0101000,
714 0b0_10000000_1110000,
715 0b0_10000100_1001001,
716 0b0_10000011_0000100,
717 0b0_10000110_1111111,
718 0b0_10001000_1111010,
719 0b0_10001000_0010000,
720 ];
721 assert_eq!(result, expected_result);
722 }
723
724 #[simd_test(enable = "avx512bf16,avx512vl")]
725 fn test_mm256_mask_cvtne2ps_pbh() {
726 #[rustfmt::skip]
727 let a_array = [
728 178.125_f32,
729 10.5_f32,
730 3.75_f32,
731 50.25_f32,
732 16.5_f32,
733 255.11_f32,
734 1000.158_f32,
735 575.575_f32,
736 ];
737 let b_array = [
738 -178.125_f32,
739 -10.5_f32,
740 -3.75_f32,
741 -50.25_f32,
742 -16.5_f32,
743 -255.11_f32,
744 -1000.158_f32,
745 -575.575_f32,
746 ];
747 let src_array: [u16; 16] = [
748 0b0_10000110_0110010,
749 0b0_10000010_0101000,
750 0b0_10000000_1110000,
751 0b0_10000100_1001001,
752 0b0_10000110_0110010,
753 0b0_10000010_0101000,
754 0b0_10000000_1110000,
755 0b0_10000100_1001001,
756 0b0_10000110_0110010,
757 0b0_10000010_0101000,
758 0b0_10000000_1110000,
759 0b0_10000100_1001001,
760 0b0_10000110_0110010,
761 0b0_10000010_0101000,
762 0b0_10000000_1110000,
763 0b0_10000100_1001001,
764 ];
765 let src = u16x16::from_array(src_array).as_m256bh();
766 let a = f32x8::from_array(a_array).as_m256();
767 let b = f32x8::from_array(b_array).as_m256();
768 let k: __mmask16 = 0xffff;
769 let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
770 let result = *c.as_u16x16().as_array();
771 #[rustfmt::skip]
772 let expected_result: [u16; 16] = [
773 0b1_10000110_0110010,
774 0b1_10000010_0101000,
775 0b1_10000000_1110000,
776 0b1_10000100_1001001,
777 0b1_10000011_0000100,
778 0b1_10000110_1111111,
779 0b1_10001000_1111010,
780 0b1_10001000_0010000,
781 0b0_10000110_0110010,
782 0b0_10000010_0101000,
783 0b0_10000000_1110000,
784 0b0_10000100_1001001,
785 0b0_10000011_0000100,
786 0b0_10000110_1111111,
787 0b0_10001000_1111010,
788 0b0_10001000_0010000,
789 ];
790 assert_eq!(result, expected_result);
791 let k: __mmask16 = 0;
792 let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
793 let result = *c.as_u16x16().as_array();
794 let expected_result = src_array;
795 assert_eq!(result, expected_result);
796 }
797
798 #[simd_test(enable = "avx512bf16,avx512vl")]
799 fn test_mm256_maskz_cvtne2ps_pbh() {
800 #[rustfmt::skip]
801 let a_array = [
802 178.125_f32,
803 10.5_f32,
804 3.75_f32,
805 50.25_f32,
806 16.5_f32,
807 255.11_f32,
808 1000.158_f32,
809 575.575_f32,
810 ];
811 let b_array = [
812 -178.125_f32,
813 -10.5_f32,
814 -3.75_f32,
815 -50.25_f32,
816 -16.5_f32,
817 -255.11_f32,
818 -1000.158_f32,
819 -575.575_f32,
820 ];
821 let a = f32x8::from_array(a_array).as_m256();
822 let b = f32x8::from_array(b_array).as_m256();
823 let k: __mmask16 = 0xffff;
824 let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
825 let result = *c.as_u16x16().as_array();
826 #[rustfmt::skip]
827 let expected_result: [u16; 16] = [
828 0b1_10000110_0110010,
829 0b1_10000010_0101000,
830 0b1_10000000_1110000,
831 0b1_10000100_1001001,
832 0b1_10000011_0000100,
833 0b1_10000110_1111111,
834 0b1_10001000_1111010,
835 0b1_10001000_0010000,
836 0b0_10000110_0110010,
837 0b0_10000010_0101000,
838 0b0_10000000_1110000,
839 0b0_10000100_1001001,
840 0b0_10000011_0000100,
841 0b0_10000110_1111111,
842 0b0_10001000_1111010,
843 0b0_10001000_0010000,
844 ];
845 assert_eq!(result, expected_result);
846 let k: __mmask16 = 0b0110_1100_0011_0110;
847 let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
848 let result = *c.as_u16x16().as_array();
849 #[rustfmt::skip]
850 let expected_result: [u16; 16] = [
851 0,
852 0b1_10000010_0101000,
853 0b1_10000000_1110000,
854 0,
855 0b1_10000011_0000100,
856 0b1_10000110_1111111,
857 0,
858 0,
859 0,
860 0,
861 0b0_10000000_1110000,
862 0b0_10000100_1001001,
863 0,
864 0b0_10000110_1111111,
865 0b0_10001000_1111010,
866 0,
867 ];
868 assert_eq!(result, expected_result);
869 }
870
871 #[simd_test(enable = "avx512bf16,avx512f")]
872 fn test_mm512_cvtne2ps_pbh() {
873 #[rustfmt::skip]
874 let a_array = [
875 178.125_f32,
876 10.5_f32,
877 3.75_f32,
878 50.25_f32,
879 16.5_f32,
880 255.11_f32,
881 1000.158_f32,
882 575.575_f32,
883 178.125_f32,
884 10.5_f32,
885 3.75_f32,
886 50.25_f32,
887 16.5_f32,
888 255.11_f32,
889 1000.158_f32,
890 575.575_f32,
891 ];
892 let b_array = [
893 -178.125_f32,
894 -10.5_f32,
895 -3.75_f32,
896 -50.25_f32,
897 -16.5_f32,
898 -255.11_f32,
899 -1000.158_f32,
900 -575.575_f32,
901 -178.125_f32,
902 -10.5_f32,
903 -3.75_f32,
904 -50.25_f32,
905 -16.5_f32,
906 -255.11_f32,
907 -1000.158_f32,
908 -575.575_f32,
909 ];
910 let a = f32x16::from_array(a_array).as_m512();
911 let b = f32x16::from_array(b_array).as_m512();
912 let c: __m512bh = _mm512_cvtne2ps_pbh(a, b);
913 let result = *c.as_u16x32().as_array();
914 #[rustfmt::skip]
915 let expected_result: [u16; 32] = [
916 0b1_10000110_0110010,
917 0b1_10000010_0101000,
918 0b1_10000000_1110000,
919 0b1_10000100_1001001,
920 0b1_10000011_0000100,
921 0b1_10000110_1111111,
922 0b1_10001000_1111010,
923 0b1_10001000_0010000,
924 0b1_10000110_0110010,
925 0b1_10000010_0101000,
926 0b1_10000000_1110000,
927 0b1_10000100_1001001,
928 0b1_10000011_0000100,
929 0b1_10000110_1111111,
930 0b1_10001000_1111010,
931 0b1_10001000_0010000,
932 0b0_10000110_0110010,
933 0b0_10000010_0101000,
934 0b0_10000000_1110000,
935 0b0_10000100_1001001,
936 0b0_10000011_0000100,
937 0b0_10000110_1111111,
938 0b0_10001000_1111010,
939 0b0_10001000_0010000,
940 0b0_10000110_0110010,
941 0b0_10000010_0101000,
942 0b0_10000000_1110000,
943 0b0_10000100_1001001,
944 0b0_10000011_0000100,
945 0b0_10000110_1111111,
946 0b0_10001000_1111010,
947 0b0_10001000_0010000,
948 ];
949 assert_eq!(result, expected_result);
950 }
951
952 #[simd_test(enable = "avx512bf16,avx512f")]
953 fn test_mm512_mask_cvtne2ps_pbh() {
954 #[rustfmt::skip]
955 let a_array = [
956 178.125_f32,
957 10.5_f32,
958 3.75_f32,
959 50.25_f32,
960 16.5_f32,
961 255.11_f32,
962 1000.158_f32,
963 575.575_f32,
964 178.125_f32,
965 10.5_f32,
966 3.75_f32,
967 50.25_f32,
968 16.5_f32,
969 255.11_f32,
970 1000.158_f32,
971 575.575_f32,
972 ];
973 let b_array = [
974 -178.125_f32,
975 -10.5_f32,
976 -3.75_f32,
977 -50.25_f32,
978 -16.5_f32,
979 -255.11_f32,
980 -1000.158_f32,
981 -575.575_f32,
982 -178.125_f32,
983 -10.5_f32,
984 -3.75_f32,
985 -50.25_f32,
986 -16.5_f32,
987 -255.11_f32,
988 -1000.158_f32,
989 -575.575_f32,
990 ];
991 let src_array: [u16; 32] = [
992 0b0_10000110_0110010,
993 0b0_10000010_0101000,
994 0b0_10000000_1110000,
995 0b0_10000100_1001001,
996 0b0_10000110_0110010,
997 0b0_10000010_0101000,
998 0b0_10000000_1110000,
999 0b0_10000100_1001001,
1000 0b0_10000110_0110010,
1001 0b0_10000010_0101000,
1002 0b0_10000000_1110000,
1003 0b0_10000100_1001001,
1004 0b0_10000110_0110010,
1005 0b0_10000010_0101000,
1006 0b0_10000000_1110000,
1007 0b0_10000100_1001001,
1008 0b0_10000110_0110010,
1009 0b0_10000010_0101000,
1010 0b0_10000000_1110000,
1011 0b0_10000100_1001001,
1012 0b0_10000110_0110010,
1013 0b0_10000010_0101000,
1014 0b0_10000000_1110000,
1015 0b0_10000100_1001001,
1016 0b0_10000110_0110010,
1017 0b0_10000010_0101000,
1018 0b0_10000000_1110000,
1019 0b0_10000100_1001001,
1020 0b0_10000110_0110010,
1021 0b0_10000010_0101000,
1022 0b0_10000000_1110000,
1023 0b0_10000100_1001001,
1024 ];
1025 let src = u16x32::from_array(src_array).as_m512bh();
1026 let a = f32x16::from_array(a_array).as_m512();
1027 let b = f32x16::from_array(b_array).as_m512();
1028 let k: __mmask32 = 0xffffffff;
1029 let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
1030 let result = *c.as_u16x32().as_array();
1031 #[rustfmt::skip]
1032 let expected_result: [u16; 32] = [
1033 0b1_10000110_0110010,
1034 0b1_10000010_0101000,
1035 0b1_10000000_1110000,
1036 0b1_10000100_1001001,
1037 0b1_10000011_0000100,
1038 0b1_10000110_1111111,
1039 0b1_10001000_1111010,
1040 0b1_10001000_0010000,
1041 0b1_10000110_0110010,
1042 0b1_10000010_0101000,
1043 0b1_10000000_1110000,
1044 0b1_10000100_1001001,
1045 0b1_10000011_0000100,
1046 0b1_10000110_1111111,
1047 0b1_10001000_1111010,
1048 0b1_10001000_0010000,
1049 0b0_10000110_0110010,
1050 0b0_10000010_0101000,
1051 0b0_10000000_1110000,
1052 0b0_10000100_1001001,
1053 0b0_10000011_0000100,
1054 0b0_10000110_1111111,
1055 0b0_10001000_1111010,
1056 0b0_10001000_0010000,
1057 0b0_10000110_0110010,
1058 0b0_10000010_0101000,
1059 0b0_10000000_1110000,
1060 0b0_10000100_1001001,
1061 0b0_10000011_0000100,
1062 0b0_10000110_1111111,
1063 0b0_10001000_1111010,
1064 0b0_10001000_0010000,
1065 ];
1066 assert_eq!(result, expected_result);
1067 let k: __mmask32 = 0;
1068 let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
1069 let result = *c.as_u16x32().as_array();
1070 let expected_result = src_array;
1071 assert_eq!(result, expected_result);
1072 }
1073
1074 #[simd_test(enable = "avx512bf16,avx512f")]
1075 fn test_mm512_maskz_cvtne2ps_pbh() {
1076 #[rustfmt::skip]
1077 let a_array = [
1078 178.125_f32,
1079 10.5_f32,
1080 3.75_f32,
1081 50.25_f32,
1082 16.5_f32,
1083 255.11_f32,
1084 1000.158_f32,
1085 575.575_f32,
1086 178.125_f32,
1087 10.5_f32,
1088 3.75_f32,
1089 50.25_f32,
1090 16.5_f32,
1091 255.11_f32,
1092 1000.158_f32,
1093 575.575_f32,
1094 ];
1095 let b_array = [
1096 -178.125_f32,
1097 -10.5_f32,
1098 -3.75_f32,
1099 -50.25_f32,
1100 -16.5_f32,
1101 -255.11_f32,
1102 -1000.158_f32,
1103 -575.575_f32,
1104 -178.125_f32,
1105 -10.5_f32,
1106 -3.75_f32,
1107 -50.25_f32,
1108 -16.5_f32,
1109 -255.11_f32,
1110 -1000.158_f32,
1111 -575.575_f32,
1112 ];
1113 let a = f32x16::from_array(a_array).as_m512();
1114 let b = f32x16::from_array(b_array).as_m512();
1115 let k: __mmask32 = 0xffffffff;
1116 let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
1117 let result = *c.as_u16x32().as_array();
1118 #[rustfmt::skip]
1119 let expected_result: [u16; 32] = [
1120 0b1_10000110_0110010,
1121 0b1_10000010_0101000,
1122 0b1_10000000_1110000,
1123 0b1_10000100_1001001,
1124 0b1_10000011_0000100,
1125 0b1_10000110_1111111,
1126 0b1_10001000_1111010,
1127 0b1_10001000_0010000,
1128 0b1_10000110_0110010,
1129 0b1_10000010_0101000,
1130 0b1_10000000_1110000,
1131 0b1_10000100_1001001,
1132 0b1_10000011_0000100,
1133 0b1_10000110_1111111,
1134 0b1_10001000_1111010,
1135 0b1_10001000_0010000,
1136 0b0_10000110_0110010,
1137 0b0_10000010_0101000,
1138 0b0_10000000_1110000,
1139 0b0_10000100_1001001,
1140 0b0_10000011_0000100,
1141 0b0_10000110_1111111,
1142 0b0_10001000_1111010,
1143 0b0_10001000_0010000,
1144 0b0_10000110_0110010,
1145 0b0_10000010_0101000,
1146 0b0_10000000_1110000,
1147 0b0_10000100_1001001,
1148 0b0_10000011_0000100,
1149 0b0_10000110_1111111,
1150 0b0_10001000_1111010,
1151 0b0_10001000_0010000,
1152 ];
1153 assert_eq!(result, expected_result);
1154 let k: __mmask32 = 0b1100_1010_1001_0110_1010_0011_0101_0110;
1155 let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
1156 let result = *c.as_u16x32().as_array();
1157 #[rustfmt::skip]
1158 let expected_result: [u16; 32] = [
1159 0,
1160 0b1_10000010_0101000,
1161 0b1_10000000_1110000,
1162 0,
1163 0b1_10000011_0000100,
1164 0,
1165 0b1_10001000_1111010,
1166 0,
1167 0b1_10000110_0110010,
1168 0b1_10000010_0101000,
1169 0,
1170 0,
1171 0,
1172 0b1_10000110_1111111,
1173 0,
1174 0b1_10001000_0010000,
1175 0,
1176 0b0_10000010_0101000,
1177 0b0_10000000_1110000,
1178 0,
1179 0b0_10000011_0000100,
1180 0,
1181 0,
1182 0b0_10001000_0010000,
1183 0,
1184 0b0_10000010_0101000,
1185 0,
1186 0b0_10000100_1001001,
1187 0,
1188 0,
1189 0b0_10001000_1111010,
1190 0b0_10001000_0010000,
1191 ];
1192 assert_eq!(result, expected_result);
1193 }
1194
1195 #[simd_test(enable = "avx512bf16,avx512vl")]
1196 fn test_mm256_cvtneps_pbh() {
1197 #[rustfmt::skip]
1198 let a_array = [
1199 178.125_f32,
1200 10.5_f32,
1201 3.75_f32,
1202 50.25_f32,
1203 16.5_f32,
1204 255.11_f32,
1205 1000.158_f32,
1206 575.575_f32,
1207 ];
1208 let a = f32x8::from_array(a_array).as_m256();
1209 let c: __m128bh = _mm256_cvtneps_pbh(a);
1210 let result = *c.as_u16x8().as_array();
1211 #[rustfmt::skip]
1212 let expected_result: [u16; 8] = [
1213 0b0_10000110_0110010,
1214 0b0_10000010_0101000,
1215 0b0_10000000_1110000,
1216 0b0_10000100_1001001,
1217 0b0_10000011_0000100,
1218 0b0_10000110_1111111,
1219 0b0_10001000_1111010,
1220 0b0_10001000_0010000,
1221 ];
1222 assert_eq!(result, expected_result);
1223 }
1224
1225 #[simd_test(enable = "avx512bf16,avx512vl")]
1226 fn test_mm256_mask_cvtneps_pbh() {
1227 #[rustfmt::skip]
1228 let a_array = [
1229 178.125_f32,
1230 10.5_f32,
1231 3.75_f32,
1232 50.25_f32,
1233 16.5_f32,
1234 255.11_f32,
1235 1000.158_f32,
1236 575.575_f32,
1237 ];
1238 let src_array: [u16; 8] = [
1239 0b1_10000110_0110010,
1240 0b1_10000010_0101000,
1241 0b1_10000000_1110000,
1242 0b1_10000100_1001001,
1243 0b1_10000011_0000100,
1244 0b1_10000110_1111111,
1245 0b1_10001000_1111010,
1246 0b1_10001000_0010000,
1247 ];
1248 let src = u16x8::from_array(src_array).as_m128bh();
1249 let a = f32x8::from_array(a_array).as_m256();
1250 let k: __mmask8 = 0xff;
1251 let b = _mm256_mask_cvtneps_pbh(src, k, a);
1252 let result = *b.as_u16x8().as_array();
1253 #[rustfmt::skip]
1254 let expected_result: [u16; 8] = [
1255 0b0_10000110_0110010,
1256 0b0_10000010_0101000,
1257 0b0_10000000_1110000,
1258 0b0_10000100_1001001,
1259 0b0_10000011_0000100,
1260 0b0_10000110_1111111,
1261 0b0_10001000_1111010,
1262 0b0_10001000_0010000,
1263 ];
1264 assert_eq!(result, expected_result);
1265 let k: __mmask8 = 0x0;
1266 let b: __m128bh = _mm256_mask_cvtneps_pbh(src, k, a);
1267 let result = *b.as_u16x8().as_array();
1268 let expected_result: [u16; 8] = src_array;
1269 assert_eq!(result, expected_result);
1270 }
1271
1272 #[simd_test(enable = "avx512bf16,avx512vl")]
1273 fn test_mm256_maskz_cvtneps_pbh() {
1274 #[rustfmt::skip]
1275 let a_array = [
1276 178.125_f32,
1277 10.5_f32,
1278 3.75_f32,
1279 50.25_f32,
1280 16.5_f32,
1281 255.11_f32,
1282 1000.158_f32,
1283 575.575_f32,
1284 ];
1285 let a = f32x8::from_array(a_array).as_m256();
1286 let k: __mmask8 = 0xff;
1287 let b = _mm256_maskz_cvtneps_pbh(k, a);
1288 let result = *b.as_u16x8().as_array();
1289 #[rustfmt::skip]
1290 let expected_result: [u16; 8] = [
1291 0b0_10000110_0110010,
1292 0b0_10000010_0101000,
1293 0b0_10000000_1110000,
1294 0b0_10000100_1001001,
1295 0b0_10000011_0000100,
1296 0b0_10000110_1111111,
1297 0b0_10001000_1111010,
1298 0b0_10001000_0010000,
1299 ];
1300 assert_eq!(result, expected_result);
1301 let k: __mmask8 = 0x6;
1302 let b: __m128bh = _mm256_maskz_cvtneps_pbh(k, a);
1303 let result = *b.as_u16x8().as_array();
1304 let expected_result: [u16; 8] =
1305 [0, 0b0_10000010_0101000, 0b0_10000000_1110000, 0, 0, 0, 0, 0];
1306 assert_eq!(result, expected_result);
1307 }
1308
1309 #[simd_test(enable = "avx512bf16,avx512f")]
1310 fn test_mm512_cvtneps_pbh() {
1311 #[rustfmt::skip]
1312 let a_array = [
1313 178.125_f32,
1314 10.5_f32,
1315 3.75_f32,
1316 50.25_f32,
1317 16.5_f32,
1318 255.11_f32,
1319 1000.158_f32,
1320 575.575_f32,
1321 178.125_f32,
1322 10.5_f32,
1323 3.75_f32,
1324 50.25_f32,
1325 16.5_f32,
1326 255.11_f32,
1327 1000.158_f32,
1328 575.575_f32,
1329 ];
1330 let a = f32x16::from_array(a_array).as_m512();
1331 let c: __m256bh = _mm512_cvtneps_pbh(a);
1332 let result = *c.as_u16x16().as_array();
1333 #[rustfmt::skip]
1334 let expected_result: [u16; 16] = [
1335 0b0_10000110_0110010,
1336 0b0_10000010_0101000,
1337 0b0_10000000_1110000,
1338 0b0_10000100_1001001,
1339 0b0_10000011_0000100,
1340 0b0_10000110_1111111,
1341 0b0_10001000_1111010,
1342 0b0_10001000_0010000,
1343 0b0_10000110_0110010,
1344 0b0_10000010_0101000,
1345 0b0_10000000_1110000,
1346 0b0_10000100_1001001,
1347 0b0_10000011_0000100,
1348 0b0_10000110_1111111,
1349 0b0_10001000_1111010,
1350 0b0_10001000_0010000,
1351 ];
1352 assert_eq!(result, expected_result);
1353 }
1354
1355 #[simd_test(enable = "avx512bf16,avx512f")]
1356 fn test_mm512_mask_cvtneps_pbh() {
1357 #[rustfmt::skip]
1358 let a_array = [
1359 178.125_f32,
1360 10.5_f32,
1361 3.75_f32,
1362 50.25_f32,
1363 16.5_f32,
1364 255.11_f32,
1365 1000.158_f32,
1366 575.575_f32,
1367 178.125_f32,
1368 10.5_f32,
1369 3.75_f32,
1370 50.25_f32,
1371 16.5_f32,
1372 255.11_f32,
1373 1000.158_f32,
1374 575.575_f32,
1375 ];
1376 let src_array: [u16; 16] = [
1377 0b1_10000110_0110010,
1378 0b1_10000010_0101000,
1379 0b1_10000000_1110000,
1380 0b1_10000100_1001001,
1381 0b1_10000011_0000100,
1382 0b1_10000110_1111111,
1383 0b1_10001000_1111010,
1384 0b1_10001000_0010000,
1385 0b1_10000110_0110010,
1386 0b1_10000010_0101000,
1387 0b1_10000000_1110000,
1388 0b1_10000100_1001001,
1389 0b1_10000011_0000100,
1390 0b1_10000110_1111111,
1391 0b1_10001000_1111010,
1392 0b1_10001000_0010000,
1393 ];
1394 let src = u16x16::from_array(src_array).as_m256bh();
1395 let a = f32x16::from_array(a_array).as_m512();
1396 let k: __mmask16 = 0xffff;
1397 let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
1398 let result = *c.as_u16x16().as_array();
1399 #[rustfmt::skip]
1400 let expected_result: [u16; 16] = [
1401 0b0_10000110_0110010,
1402 0b0_10000010_0101000,
1403 0b0_10000000_1110000,
1404 0b0_10000100_1001001,
1405 0b0_10000011_0000100,
1406 0b0_10000110_1111111,
1407 0b0_10001000_1111010,
1408 0b0_10001000_0010000,
1409 0b0_10000110_0110010,
1410 0b0_10000010_0101000,
1411 0b0_10000000_1110000,
1412 0b0_10000100_1001001,
1413 0b0_10000011_0000100,
1414 0b0_10000110_1111111,
1415 0b0_10001000_1111010,
1416 0b0_10001000_0010000,
1417 ];
1418 assert_eq!(result, expected_result);
1419 let k: __mmask16 = 0;
1420 let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
1421 let result = *c.as_u16x16().as_array();
1422 let expected_result = src_array;
1423 assert_eq!(result, expected_result);
1424 }
1425
1426 #[simd_test(enable = "avx512bf16,avx512f")]
1427 fn test_mm512_maskz_cvtneps_pbh() {
1428 #[rustfmt::skip]
1429 let a_array = [
1430 178.125_f32,
1431 10.5_f32,
1432 3.75_f32,
1433 50.25_f32,
1434 16.5_f32,
1435 255.11_f32,
1436 1000.158_f32,
1437 575.575_f32,
1438 178.125_f32,
1439 10.5_f32,
1440 3.75_f32,
1441 50.25_f32,
1442 16.5_f32,
1443 255.11_f32,
1444 1000.158_f32,
1445 575.575_f32,
1446 ];
1447 let a = f32x16::from_array(a_array).as_m512();
1448 let k: __mmask16 = 0xffff;
1449 let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
1450 let result = *c.as_u16x16().as_array();
1451 #[rustfmt::skip]
1452 let expected_result: [u16; 16] = [
1453 0b0_10000110_0110010,
1454 0b0_10000010_0101000,
1455 0b0_10000000_1110000,
1456 0b0_10000100_1001001,
1457 0b0_10000011_0000100,
1458 0b0_10000110_1111111,
1459 0b0_10001000_1111010,
1460 0b0_10001000_0010000,
1461 0b0_10000110_0110010,
1462 0b0_10000010_0101000,
1463 0b0_10000000_1110000,
1464 0b0_10000100_1001001,
1465 0b0_10000011_0000100,
1466 0b0_10000110_1111111,
1467 0b0_10001000_1111010,
1468 0b0_10001000_0010000,
1469 ];
1470 assert_eq!(result, expected_result);
1471 let k: __mmask16 = 0x653a;
1472 let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
1473 let result = *c.as_u16x16().as_array();
1474 #[rustfmt::skip]
1475 let expected_result: [u16; 16] = [
1476 0,
1477 0b0_10000010_0101000,
1478 0,
1479 0b0_10000100_1001001,
1480 0b0_10000011_0000100,
1481 0b0_10000110_1111111,
1482 0,
1483 0,
1484 0b0_10000110_0110010,
1485 0,
1486 0b0_10000000_1110000,
1487 0,
1488 0,
1489 0b0_10000110_1111111,
1490 0b0_10001000_1111010,
1491 0,
1492 ];
1493 assert_eq!(result, expected_result);
1494 }
1495
1496 #[simd_test(enable = "avx512bf16,avx512vl")]
1497 fn test_mm_dpbf16_ps() {
1498 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1499 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1500 let a1 = f32x4::from_array(a_array).as_m128();
1501 let b1 = f32x4::from_array(b_array).as_m128();
1502 let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
1503 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1504 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1505 let c: __m128 = _mm_dpbf16_ps(src, a, b);
1506 let result = *c.as_f32x4().as_array();
1507 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1508 assert_eq!(result, expected_result);
1509 }
1510
1511 #[simd_test(enable = "avx512bf16,avx512vl")]
1512 fn test_mm_mask_dpbf16_ps() {
1513 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1514 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1515 let a1 = f32x4::from_array(a_array).as_m128();
1516 let b1 = f32x4::from_array(b_array).as_m128();
1517 let k: __mmask8 = 0xf3;
1518 let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
1519 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1520 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1521 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1522 let result = *c.as_f32x4().as_array();
1523 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32];
1524 assert_eq!(result, expected_result);
1525 let k: __mmask8 = 0xff;
1526 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1527 let result = *c.as_f32x4().as_array();
1528 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1529 assert_eq!(result, expected_result);
1530 let k: __mmask8 = 0;
1531 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1532 let result = *c.as_f32x4().as_array();
1533 let expected_result: [f32; 4] = [1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32];
1534 assert_eq!(result, expected_result);
1535 }
1536
1537 #[simd_test(enable = "avx512bf16,avx512vl")]
1538 fn test_mm_maskz_dpbf16_ps() {
1539 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1540 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1541 let a1 = f32x4::from_array(a_array).as_m128();
1542 let b1 = f32x4::from_array(b_array).as_m128();
1543 let k: __mmask8 = 0xf3;
1544 let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
1545 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1546 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1547 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1548 let result = *c.as_f32x4().as_array();
1549 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 0.0, 0.0];
1550 assert_eq!(result, expected_result);
1551 let k: __mmask8 = 0xff;
1552 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1553 let result = *c.as_f32x4().as_array();
1554 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1555 assert_eq!(result, expected_result);
1556 let k: __mmask8 = 0;
1557 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1558 let result = *c.as_f32x4().as_array();
1559 let expected_result: [f32; 4] = [0.0, 0.0, 0.0, 0.0];
1560 assert_eq!(result, expected_result);
1561 }
1562
1563 #[simd_test(enable = "avx512bf16,avx512vl")]
1564 fn test_mm256_dpbf16_ps() {
1565 #[rustfmt::skip]
1566 let a_array = [
1567 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1568 ];
1569 let b_array = [
1570 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1571 ];
1572 let a1 = f32x8::from_array(a_array).as_m256();
1573 let b1 = f32x8::from_array(b_array).as_m256();
1574 #[rustfmt::skip]
1575 let src = f32x8::from_array([
1576 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1577 ]).as_m256();
1578 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1579 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1580 let c: __m256 = _mm256_dpbf16_ps(src, a, b);
1581 let result = *c.as_f32x8().as_array();
1582 #[rustfmt::skip]
1583 let expected_result: [f32; 8] = [
1584 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1585 ];
1586 assert_eq!(result, expected_result);
1587 }
1588
1589 #[simd_test(enable = "avx512bf16,avx512vl")]
1590 fn test_mm256_mask_dpbf16_ps() {
1591 #[rustfmt::skip]
1592 let a_array = [
1593 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1594 ];
1595 let b_array = [
1596 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1597 ];
1598 let a1 = f32x8::from_array(a_array).as_m256();
1599 let b1 = f32x8::from_array(b_array).as_m256();
1600 let k: __mmask8 = 0x33;
1601 #[rustfmt::skip]
1602 let src = f32x8::from_array([
1603 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1604 ]).as_m256();
1605 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1606 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1607 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1608 let result = *c.as_f32x8().as_array();
1609 #[rustfmt::skip]
1610 let expected_result: [f32; 8] = [
1611 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1612 ];
1613 assert_eq!(result, expected_result);
1614 let k: __mmask8 = 0xff;
1615 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1616 let result = *c.as_f32x8().as_array();
1617 #[rustfmt::skip]
1618 let expected_result: [f32; 8] = [
1619 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1620 ];
1621 assert_eq!(result, expected_result);
1622 let k: __mmask8 = 0;
1623 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1624 let result = *c.as_f32x8().as_array();
1625 #[rustfmt::skip]
1626 let expected_result: [f32; 8] = [
1627 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1628 ];
1629 assert_eq!(result, expected_result);
1630 }
1631
1632 #[simd_test(enable = "avx512bf16,avx512vl")]
1633 fn test_mm256_maskz_dpbf16_ps() {
1634 #[rustfmt::skip]
1635 let a_array = [
1636 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1637 ];
1638 let b_array = [
1639 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1640 ];
1641 let a1 = f32x8::from_array(a_array).as_m256();
1642 let b1 = f32x8::from_array(b_array).as_m256();
1643 let k: __mmask8 = 0x33;
1644 #[rustfmt::skip]
1645 let src = f32x8::from_array([
1646 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1647 ]).as_m256();
1648 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1649 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1650 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1651 let result = *c.as_f32x8().as_array();
1652 #[rustfmt::skip]
1653 let expected_result: [f32; 8] = [
1654 -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
1655 ];
1656 assert_eq!(result, expected_result);
1657 let k: __mmask8 = 0xff;
1658 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1659 let result = *c.as_f32x8().as_array();
1660 #[rustfmt::skip]
1661 let expected_result: [f32; 8] = [
1662 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1663 ];
1664 assert_eq!(result, expected_result);
1665 let k: __mmask8 = 0;
1666 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1667 let result = *c.as_f32x8().as_array();
1668 let expected_result: [f32; 8] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1669 assert_eq!(result, expected_result);
1670 }
1671
1672 #[simd_test(enable = "avx512bf16,avx512f")]
1673 fn test_mm512_dpbf16_ps() {
1674 #[rustfmt::skip]
1675 let a_array = [
1676 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1677 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1678 ];
1679 let b_array = [
1680 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1681 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1682 ];
1683 let a1 = f32x16::from_array(a_array).as_m512();
1684 let b1 = f32x16::from_array(b_array).as_m512();
1685 let src = f32x16::from_array([
1686 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1687 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1688 ])
1689 .as_m512();
1690 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1691 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1692 let c: __m512 = _mm512_dpbf16_ps(src, a, b);
1693 let result = *c.as_f32x16().as_array();
1694 #[rustfmt::skip]
1695 let expected_result: [f32; 16] = [
1696 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1697 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1698 ];
1699 assert_eq!(result, expected_result);
1700 }
1701
1702 #[simd_test(enable = "avx512bf16,avx512f")]
1703 fn test_mm512_mask_dpbf16_ps() {
1704 #[rustfmt::skip]
1705 let a_array = [
1706 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1707 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1708 ];
1709 let b_array = [
1710 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1711 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1712 ];
1713 let a1 = f32x16::from_array(a_array).as_m512();
1714 let b1 = f32x16::from_array(b_array).as_m512();
1715 let k: __mmask16 = 0x3333;
1716 #[rustfmt::skip]
1717 let src = f32x16::from_array([
1718 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1719 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1720 ]).as_m512();
1721 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1722 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1723 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1724 let result = *c.as_f32x16().as_array();
1725 #[rustfmt::skip]
1726 let expected_result: [f32; 16] = [
1727 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1728 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1729 ];
1730 assert_eq!(result, expected_result);
1731 let k: __mmask16 = 0xffff;
1732 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1733 let result = *c.as_f32x16().as_array();
1734 #[rustfmt::skip]
1735 let expected_result: [f32; 16] = [
1736 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1737 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1738 ];
1739 assert_eq!(result, expected_result);
1740 let k: __mmask16 = 0;
1741 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1742 let result = *c.as_f32x16().as_array();
1743 #[rustfmt::skip]
1744 let expected_result: [f32; 16] = [
1745 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1746 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1747 ];
1748 assert_eq!(result, expected_result);
1749 }
1750
1751 #[simd_test(enable = "avx512bf16,avx512f")]
1752 fn test_mm512_maskz_dpbf16_ps() {
1753 #[rustfmt::skip]
1754 let a_array = [
1755 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1756 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1757 ];
1758 let b_array = [
1759 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1760 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1761 ];
1762 let a1 = f32x16::from_array(a_array).as_m512();
1763 let b1 = f32x16::from_array(b_array).as_m512();
1764 let k: __mmask16 = 0x3333;
1765 #[rustfmt::skip]
1766 let src = f32x16::from_array([
1767 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1768 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1769 ]).as_m512();
1770 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1771 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1772 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1773 let result = *c.as_f32x16().as_array();
1774 #[rustfmt::skip]
1775 let expected_result: [f32; 16] = [
1776 -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32,
1777 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
1778 ];
1779 assert_eq!(result, expected_result);
1780 let k: __mmask16 = 0xffff;
1781 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1782 let result = *c.as_f32x16().as_array();
1783 #[rustfmt::skip]
1784 let expected_result: [f32; 16] = [
1785 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1786 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1787 ];
1788 assert_eq!(result, expected_result);
1789 let k: __mmask16 = 0;
1790 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1791 let result = *c.as_f32x16().as_array();
1792 #[rustfmt::skip]
1793 let expected_result: [f32; 16] = [
1794 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
1795 ];
1796 assert_eq!(result, expected_result);
1797 }
1798
1799 const BF16_ONE: u16 = 0b0_01111111_0000000;
1800 const BF16_TWO: u16 = 0b0_10000000_0000000;
1801 const BF16_THREE: u16 = 0b0_10000000_1000000;
1802 const BF16_FOUR: u16 = 0b0_10000001_0000000;
1803 const BF16_FIVE: u16 = 0b0_10000001_0100000;
1804 const BF16_SIX: u16 = 0b0_10000001_1000000;
1805 const BF16_SEVEN: u16 = 0b0_10000001_1100000;
1806 const BF16_EIGHT: u16 = 0b0_10000010_0000000;
1807
1808 #[simd_test(enable = "avx512bf16")]
1809 fn test_mm512_cvtpbh_ps() {
1810 let a = __m256bh([
1811 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1812 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1813 ]);
1814 let r = _mm512_cvtpbh_ps(a);
1815 let e = _mm512_setr_ps(
1816 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
1817 );
1818 assert_eq_m512(r, e);
1819 }
1820
1821 #[simd_test(enable = "avx512bf16")]
1822 fn test_mm512_mask_cvtpbh_ps() {
1823 let a = __m256bh([
1824 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1825 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1826 ]);
1827 let src = _mm512_setr_ps(
1828 9., 10., 11., 12., 13., 14., 15., 16., 9., 10., 11., 12., 13., 14., 15., 16.,
1829 );
1830 let k = 0b1010_1010_1010_1010;
1831 let r = _mm512_mask_cvtpbh_ps(src, k, a);
1832 let e = _mm512_setr_ps(
1833 9., 2., 11., 4., 13., 6., 15., 8., 9., 2., 11., 4., 13., 6., 15., 8.,
1834 );
1835 assert_eq_m512(r, e);
1836 }
1837
1838 #[simd_test(enable = "avx512bf16")]
1839 fn test_mm512_maskz_cvtpbh_ps() {
1840 let a = __m256bh([
1841 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1842 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1843 ]);
1844 let k = 0b1010_1010_1010_1010;
1845 let r = _mm512_maskz_cvtpbh_ps(k, a);
1846 let e = _mm512_setr_ps(
1847 0., 2., 0., 4., 0., 6., 0., 8., 0., 2., 0., 4., 0., 6., 0., 8.,
1848 );
1849 assert_eq_m512(r, e);
1850 }
1851
1852 #[simd_test(enable = "avx512bf16,avx512vl")]
1853 fn test_mm256_cvtpbh_ps() {
1854 let a = __m128bh([
1855 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1856 ]);
1857 let r = _mm256_cvtpbh_ps(a);
1858 let e = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
1859 assert_eq_m256(r, e);
1860 }
1861
1862 #[simd_test(enable = "avx512bf16,avx512vl")]
1863 fn test_mm256_mask_cvtpbh_ps() {
1864 let a = __m128bh([
1865 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1866 ]);
1867 let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
1868 let k = 0b1010_1010;
1869 let r = _mm256_mask_cvtpbh_ps(src, k, a);
1870 let e = _mm256_setr_ps(9., 2., 11., 4., 13., 6., 15., 8.);
1871 assert_eq_m256(r, e);
1872 }
1873
1874 #[simd_test(enable = "avx512bf16,avx512vl")]
1875 fn test_mm256_maskz_cvtpbh_ps() {
1876 let a = __m128bh([
1877 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1878 ]);
1879 let k = 0b1010_1010;
1880 let r = _mm256_maskz_cvtpbh_ps(k, a);
1881 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
1882 assert_eq_m256(r, e);
1883 }
1884
1885 #[simd_test(enable = "avx512bf16,avx512vl")]
1886 fn test_mm_cvtpbh_ps() {
1887 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1888 let r = _mm_cvtpbh_ps(a);
1889 let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1890 assert_eq_m128(r, e);
1891 }
1892
1893 #[simd_test(enable = "avx512bf16,avx512vl")]
1894 fn test_mm_mask_cvtpbh_ps() {
1895 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1896 let src = _mm_setr_ps(9., 10., 11., 12.);
1897 let k = 0b1010;
1898 let r = _mm_mask_cvtpbh_ps(src, k, a);
1899 let e = _mm_setr_ps(9., 2., 11., 4.);
1900 assert_eq_m128(r, e);
1901 }
1902
1903 #[simd_test(enable = "avx512bf16,avx512vl")]
1904 fn test_mm_maskz_cvtpbh_ps() {
1905 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1906 let k = 0b1010;
1907 let r = _mm_maskz_cvtpbh_ps(k, a);
1908 let e = _mm_setr_ps(0., 2., 0., 4.);
1909 assert_eq_m128(r, e);
1910 }
1911
1912 #[simd_test(enable = "avx512bf16")]
1913 fn test_mm_cvtsbh_ss() {
1914 let r = _mm_cvtsbh_ss(bf16::from_bits(BF16_ONE));
1915 assert_eq!(r, 1.);
1916 }
1917
1918 #[simd_test(enable = "avx512bf16,avx512vl")]
1919 fn test_mm_cvtneps_pbh() {
1920 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1921 let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_pbh(a)) };
1922 let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
1923 assert_eq!(r, e);
1924 }
1925
1926 #[simd_test(enable = "avx512bf16,avx512vl")]
1927 fn test_mm_mask_cvtneps_pbh() {
1928 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1929 let src = __m128bh([5, 6, 7, 8, !0, !0, !0, !0]);
1930 let k = 0b1010;
1931 let r: u16x4 = unsafe { transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a)) };
1932 let e = u16x4::new(5, BF16_TWO, 7, BF16_FOUR);
1933 assert_eq!(r, e);
1934 }
1935
1936 #[simd_test(enable = "avx512bf16,avx512vl")]
1937 fn test_mm_maskz_cvtneps_pbh() {
1938 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1939 let k = 0b1010;
1940 let r: u16x4 = unsafe { transmute_copy(&_mm_maskz_cvtneps_pbh(k, a)) };
1941 let e = u16x4::new(0, BF16_TWO, 0, BF16_FOUR);
1942 assert_eq!(r, e);
1943 }
1944
1945 #[simd_test(enable = "avx512bf16,avx512vl")]
1946 fn test_mm_cvtness_sbh() {
1947 let r = _mm_cvtness_sbh(1.);
1948 assert_eq!(r.to_bits(), BF16_ONE);
1949 }
1950}