core/stdarch/crates/core_arch/src/x86/
avx512ifma.rs

1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply packed unsigned 52-bit integers in each 64-bit element of
8/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
9/// unsigned integer from the intermediate result with the
10/// corresponding unsigned 64-bit integer in `a`, and store the
11/// results in `dst`.
12///
13/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52hi_epu64)
14#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19    unsafe { vpmadd52huq_512(a, b, c) }
20}
21
22/// Multiply packed unsigned 52-bit integers in each 64-bit element of
23/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
24/// unsigned integer from the intermediate result with the
25/// corresponding unsigned 64-bit integer in `a`, and store the
26/// results in `dst` using writemask `k` (elements are copied
27/// from `k` when the corresponding mask bit is not set).
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52hi_epu64)
30#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
35    unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) }
36}
37
38/// Multiply packed unsigned 52-bit integers in each 64-bit element of
39/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
40/// unsigned integer from the intermediate result with the
41/// corresponding unsigned 64-bit integer in `a`, and store the
42/// results in `dst` using writemask `k` (elements are zeroed
43/// out when the corresponding mask bit is not set).
44///
45/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52hi_epu64)
46#[inline]
47#[target_feature(enable = "avx512ifma")]
48#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
49#[cfg_attr(test, assert_instr(vpmadd52huq))]
50pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
51    unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) }
52}
53
54/// Multiply packed unsigned 52-bit integers in each 64-bit element of
55/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
56/// unsigned integer from the intermediate result with the
57/// corresponding unsigned 64-bit integer in `a`, and store the
58/// results in `dst`.
59///
60/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52lo_epu64)
61#[inline]
62#[target_feature(enable = "avx512ifma")]
63#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
64#[cfg_attr(test, assert_instr(vpmadd52luq))]
65pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
66    unsafe { vpmadd52luq_512(a, b, c) }
67}
68
69/// Multiply packed unsigned 52-bit integers in each 64-bit element of
70/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
71/// unsigned integer from the intermediate result with the
72/// corresponding unsigned 64-bit integer in `a`, and store the
73/// results in `dst` using writemask `k` (elements are copied
74/// from `k` when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52lo_epu64)
77#[inline]
78#[target_feature(enable = "avx512ifma")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpmadd52luq))]
81pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
82    unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) }
83}
84
85/// Multiply packed unsigned 52-bit integers in each 64-bit element of
86/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
87/// unsigned integer from the intermediate result with the
88/// corresponding unsigned 64-bit integer in `a`, and store the
89/// results in `dst` using writemask `k` (elements are zeroed
90/// out when the corresponding mask bit is not set).
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52lo_epu64)
93#[inline]
94#[target_feature(enable = "avx512ifma")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpmadd52luq))]
97pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
98    unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) }
99}
100
101/// Multiply packed unsigned 52-bit integers in each 64-bit element of
102/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
103/// unsigned integer from the intermediate result with the
104/// corresponding unsigned 64-bit integer in `a`, and store the
105/// results in `dst`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64)
108#[inline]
109#[target_feature(enable = "avxifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(test, assert_instr(vpmadd52huq))]
112pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
113    unsafe { vpmadd52huq_256(a, b, c) }
114}
115
116/// Multiply packed unsigned 52-bit integers in each 64-bit element of
117/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
118/// unsigned integer from the intermediate result with the
119/// corresponding unsigned 64-bit integer in `a`, and store the
120/// results in `dst`.
121///
122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52hi_epu64)
123#[inline]
124#[target_feature(enable = "avx512ifma,avx512vl")]
125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
126#[cfg_attr(test, assert_instr(vpmadd52huq))]
127pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
128    unsafe { vpmadd52huq_256(a, b, c) }
129}
130
131/// Multiply packed unsigned 52-bit integers in each 64-bit element of
132/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
133/// unsigned integer from the intermediate result with the
134/// corresponding unsigned 64-bit integer in `a`, and store the
135/// results in `dst` using writemask `k` (elements are copied
136/// from `k` when the corresponding mask bit is not set).
137///
138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52hi_epu64)
139#[inline]
140#[target_feature(enable = "avx512ifma,avx512vl")]
141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142#[cfg_attr(test, assert_instr(vpmadd52huq))]
143pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
144    unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) }
145}
146
147/// Multiply packed unsigned 52-bit integers in each 64-bit element of
148/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
149/// unsigned integer from the intermediate result with the
150/// corresponding unsigned 64-bit integer in `a`, and store the
151/// results in `dst` using writemask `k` (elements are zeroed
152/// out when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52hi_epu64)
155#[inline]
156#[target_feature(enable = "avx512ifma,avx512vl")]
157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
158#[cfg_attr(test, assert_instr(vpmadd52huq))]
159pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
160    unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) }
161}
162
163/// Multiply packed unsigned 52-bit integers in each 64-bit element of
164/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
165/// unsigned integer from the intermediate result with the
166/// corresponding unsigned 64-bit integer in `a`, and store the
167/// results in `dst`.
168///
169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64)
170#[inline]
171#[target_feature(enable = "avxifma")]
172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
173#[cfg_attr(test, assert_instr(vpmadd52luq))]
174pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
175    unsafe { vpmadd52luq_256(a, b, c) }
176}
177
178/// Multiply packed unsigned 52-bit integers in each 64-bit element of
179/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
180/// unsigned integer from the intermediate result with the
181/// corresponding unsigned 64-bit integer in `a`, and store the
182/// results in `dst`.
183///
184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52lo_epu64)
185#[inline]
186#[target_feature(enable = "avx512ifma,avx512vl")]
187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
188#[cfg_attr(test, assert_instr(vpmadd52luq))]
189pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
190    unsafe { vpmadd52luq_256(a, b, c) }
191}
192
193/// Multiply packed unsigned 52-bit integers in each 64-bit element of
194/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
195/// unsigned integer from the intermediate result with the
196/// corresponding unsigned 64-bit integer in `a`, and store the
197/// results in `dst` using writemask `k` (elements are copied
198/// from `k` when the corresponding mask bit is not set).
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52lo_epu64)
201#[inline]
202#[target_feature(enable = "avx512ifma,avx512vl")]
203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
204#[cfg_attr(test, assert_instr(vpmadd52luq))]
205pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
206    unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) }
207}
208
209/// Multiply packed unsigned 52-bit integers in each 64-bit element of
210/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
211/// unsigned integer from the intermediate result with the
212/// corresponding unsigned 64-bit integer in `a`, and store the
213/// results in `dst` using writemask `k` (elements are zeroed
214/// out when the corresponding mask bit is not set).
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52lo_epu64)
217#[inline]
218#[target_feature(enable = "avx512ifma,avx512vl")]
219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
220#[cfg_attr(test, assert_instr(vpmadd52luq))]
221pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
222    unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) }
223}
224
225/// Multiply packed unsigned 52-bit integers in each 64-bit element of
226/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
227/// unsigned integer from the intermediate result with the
228/// corresponding unsigned 64-bit integer in `a`, and store the
229/// results in `dst`.
230///
231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64)
232#[inline]
233#[target_feature(enable = "avxifma")]
234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
235#[cfg_attr(test, assert_instr(vpmadd52huq))]
236pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
237    unsafe { vpmadd52huq_128(a, b, c) }
238}
239
240/// Multiply packed unsigned 52-bit integers in each 64-bit element of
241/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
242/// unsigned integer from the intermediate result with the
243/// corresponding unsigned 64-bit integer in `a`, and store the
244/// results in `dst`.
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52hi_epu64)
247#[inline]
248#[target_feature(enable = "avx512ifma,avx512vl")]
249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
250#[cfg_attr(test, assert_instr(vpmadd52huq))]
251pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
252    unsafe { vpmadd52huq_128(a, b, c) }
253}
254
255/// Multiply packed unsigned 52-bit integers in each 64-bit element of
256/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
257/// unsigned integer from the intermediate result with the
258/// corresponding unsigned 64-bit integer in `a`, and store the
259/// results in `dst` using writemask `k` (elements are copied
260/// from `k` when the corresponding mask bit is not set).
261///
262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52hi_epu64)
263#[inline]
264#[target_feature(enable = "avx512ifma,avx512vl")]
265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
266#[cfg_attr(test, assert_instr(vpmadd52huq))]
267pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
268    unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) }
269}
270
271/// Multiply packed unsigned 52-bit integers in each 64-bit element of
272/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
273/// unsigned integer from the intermediate result with the
274/// corresponding unsigned 64-bit integer in `a`, and store the
275/// results in `dst` using writemask `k` (elements are zeroed
276/// out when the corresponding mask bit is not set).
277///
278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52hi_epu64)
279#[inline]
280#[target_feature(enable = "avx512ifma,avx512vl")]
281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
282#[cfg_attr(test, assert_instr(vpmadd52huq))]
283pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
284    unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) }
285}
286
287/// Multiply packed unsigned 52-bit integers in each 64-bit element of
288/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
289/// unsigned integer from the intermediate result with the
290/// corresponding unsigned 64-bit integer in `a`, and store the
291/// results in `dst`.
292///
293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64)
294#[inline]
295#[target_feature(enable = "avxifma")]
296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
297#[cfg_attr(test, assert_instr(vpmadd52luq))]
298pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
299    unsafe { vpmadd52luq_128(a, b, c) }
300}
301
302/// Multiply packed unsigned 52-bit integers in each 64-bit element of
303/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
304/// unsigned integer from the intermediate result with the
305/// corresponding unsigned 64-bit integer in `a`, and store the
306/// results in `dst`.
307///
308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52lo_epu64)
309#[inline]
310#[target_feature(enable = "avx512ifma,avx512vl")]
311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
312#[cfg_attr(test, assert_instr(vpmadd52luq))]
313pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
314    unsafe { vpmadd52luq_128(a, b, c) }
315}
316
317/// Multiply packed unsigned 52-bit integers in each 64-bit element of
318/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
319/// unsigned integer from the intermediate result with the
320/// corresponding unsigned 64-bit integer in `a`, and store the
321/// results in `dst` using writemask `k` (elements are copied
322/// from `k` when the corresponding mask bit is not set).
323///
324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52lo_epu64)
325#[inline]
326#[target_feature(enable = "avx512ifma,avx512vl")]
327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
328#[cfg_attr(test, assert_instr(vpmadd52luq))]
329pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
330    unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) }
331}
332
333/// Multiply packed unsigned 52-bit integers in each 64-bit element of
334/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
335/// unsigned integer from the intermediate result with the
336/// corresponding unsigned 64-bit integer in `a`, and store the
337/// results in `dst` using writemask `k` (elements are zeroed
338/// out when the corresponding mask bit is not set).
339///
340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52lo_epu64)
341#[inline]
342#[target_feature(enable = "avx512ifma,avx512vl")]
343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
344#[cfg_attr(test, assert_instr(vpmadd52luq))]
345pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
346    unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) }
347}
348
349#[allow(improper_ctypes)]
350unsafe extern "C" {
351    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
352    fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
353    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
354    fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
355    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
356    fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
357    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
358    fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
359    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
360    fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
361    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
362    fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
363}
364
365#[cfg(test)]
366mod tests {
367
368    use stdarch_test::simd_test;
369
370    use crate::core_arch::x86::*;
371
372    const K: __mmask8 = 0b01101101;
373
374    #[simd_test(enable = "avx512ifma")]
375    unsafe fn test_mm512_madd52hi_epu64() {
376        let a = _mm512_set1_epi64(10 << 40);
377        let b = _mm512_set1_epi64((11 << 40) + 4);
378        let c = _mm512_set1_epi64((12 << 40) + 3);
379
380        let actual = _mm512_madd52hi_epu64(a, b, c);
381
382        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
383        let expected = _mm512_set1_epi64(11030549757952);
384
385        assert_eq_m512i(expected, actual);
386    }
387
388    #[simd_test(enable = "avx512ifma")]
389    unsafe fn test_mm512_mask_madd52hi_epu64() {
390        let a = _mm512_set1_epi64(10 << 40);
391        let b = _mm512_set1_epi64((11 << 40) + 4);
392        let c = _mm512_set1_epi64((12 << 40) + 3);
393
394        let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
395
396        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
397        let mut expected = _mm512_set1_epi64(11030549757952);
398        expected = _mm512_mask_blend_epi64(K, a, expected);
399
400        assert_eq_m512i(expected, actual);
401    }
402
403    #[simd_test(enable = "avx512ifma")]
404    unsafe fn test_mm512_maskz_madd52hi_epu64() {
405        let a = _mm512_set1_epi64(10 << 40);
406        let b = _mm512_set1_epi64((11 << 40) + 4);
407        let c = _mm512_set1_epi64((12 << 40) + 3);
408
409        let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
410
411        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
412        let mut expected = _mm512_set1_epi64(11030549757952);
413        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
414
415        assert_eq_m512i(expected, actual);
416    }
417
418    #[simd_test(enable = "avx512ifma")]
419    unsafe fn test_mm512_madd52lo_epu64() {
420        let a = _mm512_set1_epi64(10 << 40);
421        let b = _mm512_set1_epi64((11 << 40) + 4);
422        let c = _mm512_set1_epi64((12 << 40) + 3);
423
424        let actual = _mm512_madd52lo_epu64(a, b, c);
425
426        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
427        let expected = _mm512_set1_epi64(100055558127628);
428
429        assert_eq_m512i(expected, actual);
430    }
431
432    #[simd_test(enable = "avx512ifma")]
433    unsafe fn test_mm512_mask_madd52lo_epu64() {
434        let a = _mm512_set1_epi64(10 << 40);
435        let b = _mm512_set1_epi64((11 << 40) + 4);
436        let c = _mm512_set1_epi64((12 << 40) + 3);
437
438        let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
439
440        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
441        let mut expected = _mm512_set1_epi64(100055558127628);
442        expected = _mm512_mask_blend_epi64(K, a, expected);
443
444        assert_eq_m512i(expected, actual);
445    }
446
447    #[simd_test(enable = "avx512ifma")]
448    unsafe fn test_mm512_maskz_madd52lo_epu64() {
449        let a = _mm512_set1_epi64(10 << 40);
450        let b = _mm512_set1_epi64((11 << 40) + 4);
451        let c = _mm512_set1_epi64((12 << 40) + 3);
452
453        let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
454
455        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
456        let mut expected = _mm512_set1_epi64(100055558127628);
457        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
458
459        assert_eq_m512i(expected, actual);
460    }
461
462    #[simd_test(enable = "avxifma")]
463    unsafe fn test_mm256_madd52hi_avx_epu64() {
464        let a = _mm256_set1_epi64x(10 << 40);
465        let b = _mm256_set1_epi64x((11 << 40) + 4);
466        let c = _mm256_set1_epi64x((12 << 40) + 3);
467
468        let actual = _mm256_madd52hi_avx_epu64(a, b, c);
469
470        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
471        let expected = _mm256_set1_epi64x(11030549757952);
472
473        assert_eq_m256i(expected, actual);
474    }
475
476    #[simd_test(enable = "avx512ifma,avx512vl")]
477    unsafe fn test_mm256_madd52hi_epu64() {
478        let a = _mm256_set1_epi64x(10 << 40);
479        let b = _mm256_set1_epi64x((11 << 40) + 4);
480        let c = _mm256_set1_epi64x((12 << 40) + 3);
481
482        let actual = _mm256_madd52hi_epu64(a, b, c);
483
484        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
485        let expected = _mm256_set1_epi64x(11030549757952);
486
487        assert_eq_m256i(expected, actual);
488    }
489
490    #[simd_test(enable = "avx512ifma,avx512vl")]
491    unsafe fn test_mm256_mask_madd52hi_epu64() {
492        let a = _mm256_set1_epi64x(10 << 40);
493        let b = _mm256_set1_epi64x((11 << 40) + 4);
494        let c = _mm256_set1_epi64x((12 << 40) + 3);
495
496        let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
497
498        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
499        let mut expected = _mm256_set1_epi64x(11030549757952);
500        expected = _mm256_mask_blend_epi64(K, a, expected);
501
502        assert_eq_m256i(expected, actual);
503    }
504
505    #[simd_test(enable = "avx512ifma,avx512vl")]
506    unsafe fn test_mm256_maskz_madd52hi_epu64() {
507        let a = _mm256_set1_epi64x(10 << 40);
508        let b = _mm256_set1_epi64x((11 << 40) + 4);
509        let c = _mm256_set1_epi64x((12 << 40) + 3);
510
511        let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
512
513        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
514        let mut expected = _mm256_set1_epi64x(11030549757952);
515        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
516
517        assert_eq_m256i(expected, actual);
518    }
519
520    #[simd_test(enable = "avxifma")]
521    unsafe fn test_mm256_madd52lo_avx_epu64() {
522        let a = _mm256_set1_epi64x(10 << 40);
523        let b = _mm256_set1_epi64x((11 << 40) + 4);
524        let c = _mm256_set1_epi64x((12 << 40) + 3);
525
526        let actual = _mm256_madd52lo_avx_epu64(a, b, c);
527
528        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
529        let expected = _mm256_set1_epi64x(100055558127628);
530
531        assert_eq_m256i(expected, actual);
532    }
533
534    #[simd_test(enable = "avx512ifma,avx512vl")]
535    unsafe fn test_mm256_madd52lo_epu64() {
536        let a = _mm256_set1_epi64x(10 << 40);
537        let b = _mm256_set1_epi64x((11 << 40) + 4);
538        let c = _mm256_set1_epi64x((12 << 40) + 3);
539
540        let actual = _mm256_madd52lo_epu64(a, b, c);
541
542        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
543        let expected = _mm256_set1_epi64x(100055558127628);
544
545        assert_eq_m256i(expected, actual);
546    }
547
548    #[simd_test(enable = "avx512ifma,avx512vl")]
549    unsafe fn test_mm256_mask_madd52lo_epu64() {
550        let a = _mm256_set1_epi64x(10 << 40);
551        let b = _mm256_set1_epi64x((11 << 40) + 4);
552        let c = _mm256_set1_epi64x((12 << 40) + 3);
553
554        let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
555
556        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
557        let mut expected = _mm256_set1_epi64x(100055558127628);
558        expected = _mm256_mask_blend_epi64(K, a, expected);
559
560        assert_eq_m256i(expected, actual);
561    }
562
563    #[simd_test(enable = "avx512ifma,avx512vl")]
564    unsafe fn test_mm256_maskz_madd52lo_epu64() {
565        let a = _mm256_set1_epi64x(10 << 40);
566        let b = _mm256_set1_epi64x((11 << 40) + 4);
567        let c = _mm256_set1_epi64x((12 << 40) + 3);
568
569        let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
570
571        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
572        let mut expected = _mm256_set1_epi64x(100055558127628);
573        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
574
575        assert_eq_m256i(expected, actual);
576    }
577
578    #[simd_test(enable = "avxifma")]
579    unsafe fn test_mm_madd52hi_avx_epu64() {
580        let a = _mm_set1_epi64x(10 << 40);
581        let b = _mm_set1_epi64x((11 << 40) + 4);
582        let c = _mm_set1_epi64x((12 << 40) + 3);
583
584        let actual = _mm_madd52hi_avx_epu64(a, b, c);
585
586        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
587        let expected = _mm_set1_epi64x(11030549757952);
588
589        assert_eq_m128i(expected, actual);
590    }
591
592    #[simd_test(enable = "avx512ifma,avx512vl")]
593    unsafe fn test_mm_madd52hi_epu64() {
594        let a = _mm_set1_epi64x(10 << 40);
595        let b = _mm_set1_epi64x((11 << 40) + 4);
596        let c = _mm_set1_epi64x((12 << 40) + 3);
597
598        let actual = _mm_madd52hi_epu64(a, b, c);
599
600        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
601        let expected = _mm_set1_epi64x(11030549757952);
602
603        assert_eq_m128i(expected, actual);
604    }
605
606    #[simd_test(enable = "avx512ifma,avx512vl")]
607    unsafe fn test_mm_mask_madd52hi_epu64() {
608        let a = _mm_set1_epi64x(10 << 40);
609        let b = _mm_set1_epi64x((11 << 40) + 4);
610        let c = _mm_set1_epi64x((12 << 40) + 3);
611
612        let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
613
614        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
615        let mut expected = _mm_set1_epi64x(11030549757952);
616        expected = _mm_mask_blend_epi64(K, a, expected);
617
618        assert_eq_m128i(expected, actual);
619    }
620
621    #[simd_test(enable = "avx512ifma,avx512vl")]
622    unsafe fn test_mm_maskz_madd52hi_epu64() {
623        let a = _mm_set1_epi64x(10 << 40);
624        let b = _mm_set1_epi64x((11 << 40) + 4);
625        let c = _mm_set1_epi64x((12 << 40) + 3);
626
627        let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
628
629        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
630        let mut expected = _mm_set1_epi64x(11030549757952);
631        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
632
633        assert_eq_m128i(expected, actual);
634    }
635
636    #[simd_test(enable = "avxifma")]
637    unsafe fn test_mm_madd52lo_avx_epu64() {
638        let a = _mm_set1_epi64x(10 << 40);
639        let b = _mm_set1_epi64x((11 << 40) + 4);
640        let c = _mm_set1_epi64x((12 << 40) + 3);
641
642        let actual = _mm_madd52lo_avx_epu64(a, b, c);
643
644        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
645        let expected = _mm_set1_epi64x(100055558127628);
646
647        assert_eq_m128i(expected, actual);
648    }
649
650    #[simd_test(enable = "avx512ifma,avx512vl")]
651    unsafe fn test_mm_madd52lo_epu64() {
652        let a = _mm_set1_epi64x(10 << 40);
653        let b = _mm_set1_epi64x((11 << 40) + 4);
654        let c = _mm_set1_epi64x((12 << 40) + 3);
655
656        let actual = _mm_madd52lo_epu64(a, b, c);
657
658        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
659        let expected = _mm_set1_epi64x(100055558127628);
660
661        assert_eq_m128i(expected, actual);
662    }
663
664    #[simd_test(enable = "avx512ifma,avx512vl")]
665    unsafe fn test_mm_mask_madd52lo_epu64() {
666        let a = _mm_set1_epi64x(10 << 40);
667        let b = _mm_set1_epi64x((11 << 40) + 4);
668        let c = _mm_set1_epi64x((12 << 40) + 3);
669
670        let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
671
672        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
673        let mut expected = _mm_set1_epi64x(100055558127628);
674        expected = _mm_mask_blend_epi64(K, a, expected);
675
676        assert_eq_m128i(expected, actual);
677    }
678
679    #[simd_test(enable = "avx512ifma,avx512vl")]
680    unsafe fn test_mm_maskz_madd52lo_epu64() {
681        let a = _mm_set1_epi64x(10 << 40);
682        let b = _mm_set1_epi64x((11 << 40) + 4);
683        let c = _mm_set1_epi64x((12 << 40) + 3);
684
685        let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
686
687        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
688        let mut expected = _mm_set1_epi64x(100055558127628);
689        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
690
691        assert_eq_m128i(expected, actual);
692    }
693}