core/stdarch/crates/core_arch/src/x86/
f16c.rs

1//! [F16C intrinsics].
2//!
3//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
4
5use crate::core_arch::{simd::*, x86::*};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10#[allow(improper_ctypes)]
11extern "unadjusted" {
12    #[link_name = "llvm.x86.vcvtph2ps.128"]
13    fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
14    #[link_name = "llvm.x86.vcvtph2ps.256"]
15    fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
16    #[link_name = "llvm.x86.vcvtps2ph.128"]
17    fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
18    #[link_name = "llvm.x86.vcvtps2ph.256"]
19    fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
20}
21
22/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
23/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
24/// vector.
25///
26/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps)
27#[inline]
28#[target_feature(enable = "f16c")]
29#[cfg_attr(test, assert_instr("vcvtph2ps"))]
30#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
31pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
32    transmute(llvm_vcvtph2ps_128(transmute(a)))
33}
34
35/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
36/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
37///
38/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps)
39#[inline]
40#[target_feature(enable = "f16c")]
41#[cfg_attr(test, assert_instr("vcvtph2ps"))]
42#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
43pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
44    transmute(llvm_vcvtph2ps_256(transmute(a)))
45}
46
47/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
48/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
49/// vector.
50///
51/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
52///
53/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
54/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
55/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
56/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
57/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
58///
59/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph)
60#[inline]
61#[target_feature(enable = "f16c")]
62#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
63#[rustc_legacy_const_generics(1)]
64#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
65pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
66    static_assert_uimm_bits!(IMM_ROUNDING, 3);
67    let a = a.as_f32x4();
68    let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
69    transmute(r)
70}
71
72/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
73/// 16-bit half-precision float values stored in a 128-bit wide vector.
74///
75/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
76///
77/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
78/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
79/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
80/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
81/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
82///
83/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph)
84#[inline]
85#[target_feature(enable = "f16c")]
86#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
87#[rustc_legacy_const_generics(1)]
88#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
89pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
90    static_assert_uimm_bits!(IMM_ROUNDING, 3);
91    let a = a.as_f32x8();
92    let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
93    transmute(r)
94}
95
96#[cfg(test)]
97mod tests {
98    use crate::{core_arch::x86::*, mem::transmute};
99    use stdarch_test::simd_test;
100
101    #[simd_test(enable = "f16c")]
102    unsafe fn test_mm_cvtph_ps() {
103        let array = [1_f32, 2_f32, 3_f32, 4_f32];
104        let float_vec: __m128 = transmute(array);
105        let halfs: __m128i = _mm_cvtps_ph::<0>(float_vec);
106        let floats: __m128 = _mm_cvtph_ps(halfs);
107        let result: [f32; 4] = transmute(floats);
108        assert_eq!(result, array);
109    }
110
111    #[simd_test(enable = "f16c")]
112    unsafe fn test_mm256_cvtph_ps() {
113        let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
114        let float_vec: __m256 = transmute(array);
115        let halfs: __m128i = _mm256_cvtps_ph::<0>(float_vec);
116        let floats: __m256 = _mm256_cvtph_ps(halfs);
117        let result: [f32; 8] = transmute(floats);
118        assert_eq!(result, array);
119    }
120}