core/stdarch/crates/core_arch/src/x86_64/
avx512fp16.rs

1use crate::core_arch::x86::*;
2#[cfg(test)]
3use stdarch_test::assert_instr;
4
5/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
6/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
7/// of dst.
8///
9/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sh)
10#[inline]
11#[target_feature(enable = "avx512fp16")]
12#[cfg_attr(test, assert_instr(vcvtsi2sh))]
13#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
15    unsafe { vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
16}
17
18/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
19/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
20/// of dst.
21///
22/// Rounding is done according to the rounding parameter, which can be one of:
23///
24/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
25/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
26/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
27/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
28/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
29///
30/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
31#[inline]
32#[target_feature(enable = "avx512fp16")]
33#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
34#[rustc_legacy_const_generics(2)]
35#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
36pub fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
37    unsafe {
38        static_assert_rounding!(ROUNDING);
39        vcvtsi642sh(a, b, ROUNDING)
40    }
41}
42
43/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
44/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
45/// of dst.
46///
47/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
48#[inline]
49#[target_feature(enable = "avx512fp16")]
50#[cfg_attr(test, assert_instr(vcvtusi2sh))]
51#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
52pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
53    unsafe { vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
54}
55
56/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
57/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
58/// of dst.
59///
60/// Rounding is done according to the rounding parameter, which can be one of:
61///
62/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
63/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
64/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
65/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
66/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
67///
68/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sh)
69#[inline]
70#[target_feature(enable = "avx512fp16")]
71#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
72#[rustc_legacy_const_generics(2)]
73#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
74pub fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
75    unsafe {
76        static_assert_rounding!(ROUNDING);
77        vcvtusi642sh(a, b, ROUNDING)
78    }
79}
80
81/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
82/// the result in dst.
83///
84/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64)
85#[inline]
86#[target_feature(enable = "avx512fp16")]
87#[cfg_attr(test, assert_instr(vcvtsh2si))]
88#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
89pub fn _mm_cvtsh_i64(a: __m128h) -> i64 {
90    unsafe { vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
91}
92
93/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
94/// the result in dst.
95///
96/// Rounding is done according to the rounding parameter, which can be one of:
97///
98/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
99/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
103///
104/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64)
105#[inline]
106#[target_feature(enable = "avx512fp16")]
107#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
108#[rustc_legacy_const_generics(1)]
109#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
110pub fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
111    unsafe {
112        static_assert_rounding!(ROUNDING);
113        vcvtsh2si64(a, ROUNDING)
114    }
115}
116
117/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
118/// the result in dst.
119///
120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64)
121#[inline]
122#[target_feature(enable = "avx512fp16")]
123#[cfg_attr(test, assert_instr(vcvtsh2usi))]
124#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
125pub fn _mm_cvtsh_u64(a: __m128h) -> u64 {
126    unsafe { vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
127}
128
129/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
130/// the result in dst.
131///
132/// Rounding is done according to the rounding parameter, which can be one of:
133///
134/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
135/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
136/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
137/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
138/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
139///
140/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64)
141#[inline]
142#[target_feature(enable = "avx512fp16")]
143#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
144#[rustc_legacy_const_generics(1)]
145#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
146pub fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
147    unsafe {
148        static_assert_rounding!(ROUNDING);
149        vcvtsh2usi64(a, ROUNDING)
150    }
151}
152
153/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
154/// and store the result in dst.
155///
156/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64)
157#[inline]
158#[target_feature(enable = "avx512fp16")]
159#[cfg_attr(test, assert_instr(vcvttsh2si))]
160#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
161pub fn _mm_cvttsh_i64(a: __m128h) -> i64 {
162    unsafe { vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
163}
164
165/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
166/// and store the result in dst.
167///
168/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
169///
170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64)
171#[inline]
172#[target_feature(enable = "avx512fp16")]
173#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
174#[rustc_legacy_const_generics(1)]
175#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
176pub fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
177    unsafe {
178        static_assert_sae!(SAE);
179        vcvttsh2si64(a, SAE)
180    }
181}
182
183/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
184/// and store the result in dst.
185///
186/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
187#[inline]
188#[target_feature(enable = "avx512fp16")]
189#[cfg_attr(test, assert_instr(vcvttsh2usi))]
190#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
191pub fn _mm_cvttsh_u64(a: __m128h) -> u64 {
192    unsafe { vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
193}
194
195/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
196/// and store the result in dst.
197///
198/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
199///
200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u64)
201#[inline]
202#[target_feature(enable = "avx512fp16")]
203#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
204#[rustc_legacy_const_generics(1)]
205#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
206pub fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
207    unsafe {
208        static_assert_sae!(SAE);
209        vcvttsh2usi64(a, SAE)
210    }
211}
212
213#[allow(improper_ctypes)]
214unsafe extern "C" {
215    #[link_name = "llvm.x86.avx512fp16.vcvtsi642sh"]
216    fn vcvtsi642sh(a: __m128h, b: i64, rounding: i32) -> __m128h;
217    #[link_name = "llvm.x86.avx512fp16.vcvtusi642sh"]
218    fn vcvtusi642sh(a: __m128h, b: u64, rounding: i32) -> __m128h;
219    #[link_name = "llvm.x86.avx512fp16.vcvtsh2si64"]
220    fn vcvtsh2si64(a: __m128h, rounding: i32) -> i64;
221    #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi64"]
222    fn vcvtsh2usi64(a: __m128h, rounding: i32) -> u64;
223    #[link_name = "llvm.x86.avx512fp16.vcvttsh2si64"]
224    fn vcvttsh2si64(a: __m128h, sae: i32) -> i64;
225    #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi64"]
226    fn vcvttsh2usi64(a: __m128h, sae: i32) -> u64;
227}
228
229#[cfg(test)]
230mod tests {
231    use crate::core_arch::{x86::*, x86_64::*};
232    use stdarch_test::simd_test;
233
234    #[simd_test(enable = "avx512fp16,avx512vl")]
235    unsafe fn test_mm_cvti64_sh() {
236        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
237        let r = _mm_cvti64_sh(a, 10);
238        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
239        assert_eq_m128h(r, e);
240    }
241
242    #[simd_test(enable = "avx512fp16,avx512vl")]
243    unsafe fn test_mm_cvt_roundi64_sh() {
244        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
245        let r = _mm_cvt_roundi64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
246        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
247        assert_eq_m128h(r, e);
248    }
249
250    #[simd_test(enable = "avx512fp16,avx512vl")]
251    unsafe fn test_mm_cvtu64_sh() {
252        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
253        let r = _mm_cvtu64_sh(a, 10);
254        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
255        assert_eq_m128h(r, e);
256    }
257
258    #[simd_test(enable = "avx512fp16,avx512vl")]
259    unsafe fn test_mm_cvt_roundu64_sh() {
260        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
261        let r = _mm_cvt_roundu64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
262        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
263        assert_eq_m128h(r, e);
264    }
265
266    #[simd_test(enable = "avx512fp16")]
267    unsafe fn test_mm_cvtsh_i64() {
268        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
269        let r = _mm_cvtsh_i64(a);
270        assert_eq!(r, 1);
271    }
272
273    #[simd_test(enable = "avx512fp16")]
274    unsafe fn test_mm_cvt_roundsh_i64() {
275        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
276        let r = _mm_cvt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
277        assert_eq!(r, 1);
278    }
279
280    #[simd_test(enable = "avx512fp16")]
281    unsafe fn test_mm_cvtsh_u64() {
282        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
283        let r = _mm_cvtsh_u64(a);
284        assert_eq!(r, 1);
285    }
286
287    #[simd_test(enable = "avx512fp16")]
288    unsafe fn test_mm_cvt_roundsh_u64() {
289        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
290        let r = _mm_cvt_roundsh_u64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
291        assert_eq!(r, 1);
292    }
293
294    #[simd_test(enable = "avx512fp16")]
295    unsafe fn test_mm_cvttsh_i64() {
296        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
297        let r = _mm_cvttsh_i64(a);
298        assert_eq!(r, 1);
299    }
300
301    #[simd_test(enable = "avx512fp16")]
302    unsafe fn test_mm_cvtt_roundsh_i64() {
303        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
304        let r = _mm_cvtt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
305        assert_eq!(r, 1);
306    }
307
308    #[simd_test(enable = "avx512fp16")]
309    unsafe fn test_mm_cvttsh_u64() {
310        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
311        let r = _mm_cvttsh_u64(a);
312        assert_eq!(r, 1);
313    }
314
315    #[simd_test(enable = "avx512fp16")]
316    unsafe fn test_mm_cvtt_roundsh_u64() {
317        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
318        let r = _mm_cvtt_roundsh_u64::<_MM_FROUND_NO_EXC>(a);
319        assert_eq!(r, 1);
320    }
321}