core/stdarch/crates/core_arch/src/x86/f16c.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
//! [F16C intrinsics].
//!
//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
use crate::core_arch::{simd::*, x86::*};
#[cfg(test)]
use stdarch_test::assert_instr;
#[allow(improper_ctypes)]
extern "unadjusted" {
#[link_name = "llvm.x86.vcvtph2ps.128"]
fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
#[link_name = "llvm.x86.vcvtph2ps.256"]
fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
#[link_name = "llvm.x86.vcvtps2ph.128"]
fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
#[link_name = "llvm.x86.vcvtps2ph.256"]
fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
}
/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
/// vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
transmute(llvm_vcvtph2ps_128(transmute(a)))
}
/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
transmute(llvm_vcvtph2ps_256(transmute(a)))
}
/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
/// vector.
///
/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
///
/// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
/// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions,
/// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions,
/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
static_assert_uimm_bits!(IMM_ROUNDING, 3);
let a = a.as_f32x4();
let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
transmute(r)
}
/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
/// 16-bit half-precision float values stored in a 128-bit wide vector.
///
/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
///
/// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
/// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions,
/// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions,
/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
static_assert_uimm_bits!(IMM_ROUNDING, 3);
let a = a.as_f32x8();
let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
transmute(r)
}
#[cfg(test)]
mod tests {
use crate::{core_arch::x86::*, mem::transmute};
use stdarch_test::simd_test;
#[simd_test(enable = "f16c")]
unsafe fn test_mm_cvtph_ps() {
let array = [1_f32, 2_f32, 3_f32, 4_f32];
let float_vec: __m128 = transmute(array);
let halfs: __m128i = _mm_cvtps_ph::<0>(float_vec);
let floats: __m128 = _mm_cvtph_ps(halfs);
let result: [f32; 4] = transmute(floats);
assert_eq!(result, array);
}
#[simd_test(enable = "f16c")]
unsafe fn test_mm256_cvtph_ps() {
let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
let float_vec: __m256 = transmute(array);
let halfs: __m128i = _mm256_cvtps_ph::<0>(float_vec);
let floats: __m256 = _mm256_cvtph_ps(halfs);
let result: [f32; 8] = transmute(floats);
assert_eq!(result, array);
}
}