core/stdarch/crates/core_arch/src/x86/
avx512vnni.rs

1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
10#[inline]
11#[target_feature(enable = "avx512vnni")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vpdpwssd))]
14pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
15    unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
16}
17
18/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
21#[inline]
22#[target_feature(enable = "avx512vnni")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpdpwssd))]
25pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26    unsafe {
27        let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
28        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29    }
30}
31
32/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33///
34/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
35#[inline]
36#[target_feature(enable = "avx512vnni")]
37#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38#[cfg_attr(test, assert_instr(vpdpwssd))]
39pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
40    unsafe {
41        let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
42        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
43    }
44}
45
46/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713)
49#[inline]
50#[target_feature(enable = "avxvnni")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr(vpdpwssd))]
53pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
54    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
55}
56
57/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
58///
59/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
60#[inline]
61#[target_feature(enable = "avx512vnni,avx512vl")]
62#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
63#[cfg_attr(test, assert_instr(vpdpwssd))]
64pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
65    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
66}
67
68/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
69///
70/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
71#[inline]
72#[target_feature(enable = "avx512vnni,avx512vl")]
73#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
74#[cfg_attr(test, assert_instr(vpdpwssd))]
75pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
76    unsafe {
77        let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
78        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
79    }
80}
81
82/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
83///
84/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
85#[inline]
86#[target_feature(enable = "avx512vnni,avx512vl")]
87#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
88#[cfg_attr(test, assert_instr(vpdpwssd))]
89pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
90    unsafe {
91        let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
92        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
93    }
94}
95
96/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
97///
98/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712)
99#[inline]
100#[target_feature(enable = "avxvnni")]
101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
102#[cfg_attr(test, assert_instr(vpdpwssd))]
103pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
104    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
105}
106
107/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
108///
109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
110#[inline]
111#[target_feature(enable = "avx512vnni,avx512vl")]
112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
113#[cfg_attr(test, assert_instr(vpdpwssd))]
114pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
115    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
116}
117
118/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
119///
120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
121#[inline]
122#[target_feature(enable = "avx512vnni,avx512vl")]
123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
124#[cfg_attr(test, assert_instr(vpdpwssd))]
125pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
126    unsafe {
127        let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
128        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
129    }
130}
131
132/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
135#[inline]
136#[target_feature(enable = "avx512vnni,avx512vl")]
137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
138#[cfg_attr(test, assert_instr(vpdpwssd))]
139pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
140    unsafe {
141        let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
142        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
143    }
144}
145
146/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
147///
148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
149#[inline]
150#[target_feature(enable = "avx512vnni")]
151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
152#[cfg_attr(test, assert_instr(vpdpwssds))]
153pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
154    unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
155}
156
157/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
158///
159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
160#[inline]
161#[target_feature(enable = "avx512vnni")]
162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
163#[cfg_attr(test, assert_instr(vpdpwssds))]
164pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
165    unsafe {
166        let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
167        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
168    }
169}
170
171/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
172///
173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
174#[inline]
175#[target_feature(enable = "avx512vnni")]
176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
177#[cfg_attr(test, assert_instr(vpdpwssds))]
178pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
179    unsafe {
180        let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
181        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
182    }
183}
184
185/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726)
188#[inline]
189#[target_feature(enable = "avxvnni")]
190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
191#[cfg_attr(test, assert_instr(vpdpwssds))]
192pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
193    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
194}
195
196/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
199#[inline]
200#[target_feature(enable = "avx512vnni,avx512vl")]
201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
202#[cfg_attr(test, assert_instr(vpdpwssds))]
203pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
204    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
205}
206
207/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
210#[inline]
211#[target_feature(enable = "avx512vnni,avx512vl")]
212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
213#[cfg_attr(test, assert_instr(vpdpwssds))]
214pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
215    unsafe {
216        let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
217        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
218    }
219}
220
221/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
222///
223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
224#[inline]
225#[target_feature(enable = "avx512vnni,avx512vl")]
226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
227#[cfg_attr(test, assert_instr(vpdpwssds))]
228pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
229    unsafe {
230        let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
231        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
232    }
233}
234
235/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725)
238#[inline]
239#[target_feature(enable = "avxvnni")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(test, assert_instr(vpdpwssds))]
242pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
243    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
244}
245
246/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
249#[inline]
250#[target_feature(enable = "avx512vnni,avx512vl")]
251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
252#[cfg_attr(test, assert_instr(vpdpwssds))]
253pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
254    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
255}
256
257/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
258///
259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
260#[inline]
261#[target_feature(enable = "avx512vnni,avx512vl")]
262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
263#[cfg_attr(test, assert_instr(vpdpwssds))]
264pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
265    unsafe {
266        let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
267        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
268    }
269}
270
271/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
272///
273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
274#[inline]
275#[target_feature(enable = "avx512vnni,avx512vl")]
276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
277#[cfg_attr(test, assert_instr(vpdpwssds))]
278pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
279    unsafe {
280        let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
281        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
282    }
283}
284
285/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
288#[inline]
289#[target_feature(enable = "avx512vnni")]
290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
291#[cfg_attr(test, assert_instr(vpdpbusd))]
292pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
293    unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
294}
295
296/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
297///
298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
299#[inline]
300#[target_feature(enable = "avx512vnni")]
301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
302#[cfg_attr(test, assert_instr(vpdpbusd))]
303pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
304    unsafe {
305        let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
306        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
307    }
308}
309
310/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
311///
312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
313#[inline]
314#[target_feature(enable = "avx512vnni")]
315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
316#[cfg_attr(test, assert_instr(vpdpbusd))]
317pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
318    unsafe {
319        let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
320        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
321    }
322}
323
324/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683)
327#[inline]
328#[target_feature(enable = "avxvnni")]
329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
330#[cfg_attr(test, assert_instr(vpdpbusd))]
331pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
332    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
333}
334
335/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
336///
337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
338#[inline]
339#[target_feature(enable = "avx512vnni,avx512vl")]
340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
341#[cfg_attr(test, assert_instr(vpdpbusd))]
342pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
343    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
344}
345
346/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
347///
348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
349#[inline]
350#[target_feature(enable = "avx512vnni,avx512vl")]
351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
352#[cfg_attr(test, assert_instr(vpdpbusd))]
353pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
354    unsafe {
355        let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
356        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
357    }
358}
359
360/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
361///
362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
363#[inline]
364#[target_feature(enable = "avx512vnni,avx512vl")]
365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
366#[cfg_attr(test, assert_instr(vpdpbusd))]
367pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
368    unsafe {
369        let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
370        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
371    }
372}
373
374/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682)
377#[inline]
378#[target_feature(enable = "avxvnni")]
379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
380#[cfg_attr(test, assert_instr(vpdpbusd))]
381pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
382    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
383}
384
385/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
388#[inline]
389#[target_feature(enable = "avx512vnni,avx512vl")]
390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
391#[cfg_attr(test, assert_instr(vpdpbusd))]
392pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
393    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
394}
395
396/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
399#[inline]
400#[target_feature(enable = "avx512vnni,avx512vl")]
401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402#[cfg_attr(test, assert_instr(vpdpbusd))]
403pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
404    unsafe {
405        let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
406        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
407    }
408}
409
410/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
411///
412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
413#[inline]
414#[target_feature(enable = "avx512vnni,avx512vl")]
415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
416#[cfg_attr(test, assert_instr(vpdpbusd))]
417pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
418    unsafe {
419        let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
420        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
421    }
422}
423
424/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
425///
426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
427#[inline]
428#[target_feature(enable = "avx512vnni")]
429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
430#[cfg_attr(test, assert_instr(vpdpbusds))]
431pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
432    unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
433}
434
435/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
436///
437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
438#[inline]
439#[target_feature(enable = "avx512vnni")]
440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
441#[cfg_attr(test, assert_instr(vpdpbusds))]
442pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
443    unsafe {
444        let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
445        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
446    }
447}
448
449/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
450///
451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
452#[inline]
453#[target_feature(enable = "avx512vnni")]
454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
455#[cfg_attr(test, assert_instr(vpdpbusds))]
456pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
457    unsafe {
458        let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
459        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
460    }
461}
462
463/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
464///
465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696)
466#[inline]
467#[target_feature(enable = "avxvnni")]
468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
469#[cfg_attr(test, assert_instr(vpdpbusds))]
470pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
471    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
472}
473
474/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
475///
476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
477#[inline]
478#[target_feature(enable = "avx512vnni,avx512vl")]
479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
480#[cfg_attr(test, assert_instr(vpdpbusds))]
481pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
482    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
483}
484
485/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
488#[inline]
489#[target_feature(enable = "avx512vnni,avx512vl")]
490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491#[cfg_attr(test, assert_instr(vpdpbusds))]
492pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
493    unsafe {
494        let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
495        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
496    }
497}
498
499/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
502#[inline]
503#[target_feature(enable = "avx512vnni,avx512vl")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vpdpbusds))]
506pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
507    unsafe {
508        let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
509        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
510    }
511}
512
513/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695)
516#[inline]
517#[target_feature(enable = "avxvnni")]
518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
519#[cfg_attr(test, assert_instr(vpdpbusds))]
520pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
521    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
522}
523
524/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
525///
526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
527#[inline]
528#[target_feature(enable = "avx512vnni,avx512vl")]
529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
530#[cfg_attr(test, assert_instr(vpdpbusds))]
531pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
532    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
533}
534
535/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
536///
537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
538#[inline]
539#[target_feature(enable = "avx512vnni,avx512vl")]
540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
541#[cfg_attr(test, assert_instr(vpdpbusds))]
542pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
543    unsafe {
544        let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
545        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
546    }
547}
548
549/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
550///
551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
552#[inline]
553#[target_feature(enable = "avx512vnni,avx512vl")]
554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
555#[cfg_attr(test, assert_instr(vpdpbusds))]
556pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
557    unsafe {
558        let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
559        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
560    }
561}
562
563/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
564/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
565/// 32-bit integer in src, and store the packed 32-bit results in dst.
566///
567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674)
568#[inline]
569#[target_feature(enable = "avxvnniint8")]
570#[cfg_attr(test, assert_instr(vpdpbssd))]
571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
572pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
573    unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
574}
575
576/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
577/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
578/// 32-bit integer in src, and store the packed 32-bit results in dst.
579///
580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675)
581#[inline]
582#[target_feature(enable = "avxvnniint8")]
583#[cfg_attr(test, assert_instr(vpdpbssd))]
584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
585pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
586    unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
587}
588
589/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
590/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
591/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
592///
593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676)
594#[inline]
595#[target_feature(enable = "avxvnniint8")]
596#[cfg_attr(test, assert_instr(vpdpbssds))]
597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
598pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
599    unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
600}
601
602/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
603/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
604/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
605///
606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677)
607#[inline]
608#[target_feature(enable = "avxvnniint8")]
609#[cfg_attr(test, assert_instr(vpdpbssds))]
610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
611pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
612    unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
613}
614
615/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
616/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
617/// 32-bit integer in src, and store the packed 32-bit results in dst.
618///
619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678)
620#[inline]
621#[target_feature(enable = "avxvnniint8")]
622#[cfg_attr(test, assert_instr(vpdpbsud))]
623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
624pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
625    unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
626}
627
628/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
629/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
630/// 32-bit integer in src, and store the packed 32-bit results in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679)
633#[inline]
634#[target_feature(enable = "avxvnniint8")]
635#[cfg_attr(test, assert_instr(vpdpbsud))]
636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
637pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
638    unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
639}
640
641/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
642/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
643/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
644///
645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680)
646#[inline]
647#[target_feature(enable = "avxvnniint8")]
648#[cfg_attr(test, assert_instr(vpdpbsuds))]
649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
650pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
651    unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
652}
653
654/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
655/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
656/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
657///
658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681)
659#[inline]
660#[target_feature(enable = "avxvnniint8")]
661#[cfg_attr(test, assert_instr(vpdpbsuds))]
662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
663pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
664    unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
665}
666
667/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
668/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
669/// 32-bit integer in src, and store the packed 32-bit results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708)
672#[inline]
673#[target_feature(enable = "avxvnniint8")]
674#[cfg_attr(test, assert_instr(vpdpbuud))]
675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
676pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
677    unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
678}
679
680/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
681/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
682/// 32-bit integer in src, and store the packed 32-bit results in dst.
683///
684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709)
685#[inline]
686#[target_feature(enable = "avxvnniint8")]
687#[cfg_attr(test, assert_instr(vpdpbuud))]
688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
689pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
690    unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
691}
692
693/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
694/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
695/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
696///
697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710)
698#[inline]
699#[target_feature(enable = "avxvnniint8")]
700#[cfg_attr(test, assert_instr(vpdpbuuds))]
701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
702pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
703    unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
704}
705
706/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
707/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
708/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
709///
710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711)
711#[inline]
712#[target_feature(enable = "avxvnniint8")]
713#[cfg_attr(test, assert_instr(vpdpbuuds))]
714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
715pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
716    unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
717}
718
719/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
720/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
721/// 32-bit integer in src, and store the packed 32-bit results in dst.
722///
723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738)
724#[inline]
725#[target_feature(enable = "avxvnniint16")]
726#[cfg_attr(test, assert_instr(vpdpwsud))]
727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
728pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
729    unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
730}
731
732/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
733/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
734/// 32-bit integer in src, and store the packed 32-bit results in dst.
735///
736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739)
737#[inline]
738#[target_feature(enable = "avxvnniint16")]
739#[cfg_attr(test, assert_instr(vpdpwsud))]
740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
741pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
742    unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
743}
744
745/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
746/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
747/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
748///
749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740)
750#[inline]
751#[target_feature(enable = "avxvnniint16")]
752#[cfg_attr(test, assert_instr(vpdpwsuds))]
753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
754pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
755    unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
756}
757
758/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
759/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
760/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
761///
762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741)
763#[inline]
764#[target_feature(enable = "avxvnniint16")]
765#[cfg_attr(test, assert_instr(vpdpwsuds))]
766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
767pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
768    unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
769}
770
771/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
772/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
773/// 32-bit integer in src, and store the packed 32-bit results in dst.
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742)
776#[inline]
777#[target_feature(enable = "avxvnniint16")]
778#[cfg_attr(test, assert_instr(vpdpwusd))]
779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
780pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
781    unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
782}
783
784/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
785/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
786/// 32-bit integer in src, and store the packed 32-bit results in dst.
787///
788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743)
789#[inline]
790#[target_feature(enable = "avxvnniint16")]
791#[cfg_attr(test, assert_instr(vpdpwusd))]
792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
793pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
794    unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
795}
796
797/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
798/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
799/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
800///
801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744)
802#[inline]
803#[target_feature(enable = "avxvnniint16")]
804#[cfg_attr(test, assert_instr(vpdpwusds))]
805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
806pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
807    unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
808}
809
810/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
811/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
812/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
813///
814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745)
815#[inline]
816#[target_feature(enable = "avxvnniint16")]
817#[cfg_attr(test, assert_instr(vpdpwusds))]
818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
819pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
820    unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
821}
822
823/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
824/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
825/// 32-bit integer in src, and store the packed 32-bit results in dst.
826///
827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746)
828#[inline]
829#[target_feature(enable = "avxvnniint16")]
830#[cfg_attr(test, assert_instr(vpdpwuud))]
831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
832pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
833    unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
834}
835
836/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
837/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
838/// 32-bit integer in src, and store the packed 32-bit results in dst.
839///
840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747)
841#[inline]
842#[target_feature(enable = "avxvnniint16")]
843#[cfg_attr(test, assert_instr(vpdpwuud))]
844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
845pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
846    unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
847}
848
849/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
850/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
851/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
852///
853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748)
854#[inline]
855#[target_feature(enable = "avxvnniint16")]
856#[cfg_attr(test, assert_instr(vpdpwuuds))]
857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
858pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
859    unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
860}
861
862/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
863/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
864/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
865///
866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749)
867#[inline]
868#[target_feature(enable = "avxvnniint16")]
869#[cfg_attr(test, assert_instr(vpdpwuuds))]
870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
872    unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
873}
874
875#[allow(improper_ctypes)]
876unsafe extern "C" {
877    #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
878    fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
879    #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
880    fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
881    #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
882    fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
883
884    #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
885    fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
886    #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
887    fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
888    #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
889    fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
890
891    #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
892    fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
893    #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
894    fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
895    #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
896    fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
897
898    #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
899    fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
900    #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
901    fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
902    #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
903    fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
904
905    #[link_name = "llvm.x86.avx2.vpdpbssd.128"]
906    fn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
907    #[link_name = "llvm.x86.avx2.vpdpbssd.256"]
908    fn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
909
910    #[link_name = "llvm.x86.avx2.vpdpbssds.128"]
911    fn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
912    #[link_name = "llvm.x86.avx2.vpdpbssds.256"]
913    fn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
914
915    #[link_name = "llvm.x86.avx2.vpdpbsud.128"]
916    fn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
917    #[link_name = "llvm.x86.avx2.vpdpbsud.256"]
918    fn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
919
920    #[link_name = "llvm.x86.avx2.vpdpbsuds.128"]
921    fn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
922    #[link_name = "llvm.x86.avx2.vpdpbsuds.256"]
923    fn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
924
925    #[link_name = "llvm.x86.avx2.vpdpbuud.128"]
926    fn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
927    #[link_name = "llvm.x86.avx2.vpdpbuud.256"]
928    fn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
929
930    #[link_name = "llvm.x86.avx2.vpdpbuuds.128"]
931    fn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
932    #[link_name = "llvm.x86.avx2.vpdpbuuds.256"]
933    fn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
934
935    #[link_name = "llvm.x86.avx2.vpdpwsud.128"]
936    fn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
937    #[link_name = "llvm.x86.avx2.vpdpwsud.256"]
938    fn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
939
940    #[link_name = "llvm.x86.avx2.vpdpwsuds.128"]
941    fn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
942    #[link_name = "llvm.x86.avx2.vpdpwsuds.256"]
943    fn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
944
945    #[link_name = "llvm.x86.avx2.vpdpwusd.128"]
946    fn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
947    #[link_name = "llvm.x86.avx2.vpdpwusd.256"]
948    fn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
949
950    #[link_name = "llvm.x86.avx2.vpdpwusds.128"]
951    fn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
952    #[link_name = "llvm.x86.avx2.vpdpwusds.256"]
953    fn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
954
955    #[link_name = "llvm.x86.avx2.vpdpwuud.128"]
956    fn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
957    #[link_name = "llvm.x86.avx2.vpdpwuud.256"]
958    fn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
959
960    #[link_name = "llvm.x86.avx2.vpdpwuuds.128"]
961    fn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
962    #[link_name = "llvm.x86.avx2.vpdpwuuds.256"]
963    fn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
964}
965
966#[cfg(test)]
967mod tests {
968
969    use crate::core_arch::x86::*;
970    use stdarch_test::simd_test;
971
972    #[simd_test(enable = "avx512vnni")]
973    unsafe fn test_mm512_dpwssd_epi32() {
974        let src = _mm512_set1_epi32(1);
975        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
976        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
977        let r = _mm512_dpwssd_epi32(src, a, b);
978        let e = _mm512_set1_epi32(3);
979        assert_eq_m512i(r, e);
980    }
981
982    #[simd_test(enable = "avx512vnni")]
983    unsafe fn test_mm512_mask_dpwssd_epi32() {
984        let src = _mm512_set1_epi32(1);
985        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
986        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
987        let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b);
988        assert_eq_m512i(r, src);
989        let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b);
990        let e = _mm512_set1_epi32(3);
991        assert_eq_m512i(r, e);
992    }
993
994    #[simd_test(enable = "avx512vnni")]
995    unsafe fn test_mm512_maskz_dpwssd_epi32() {
996        let src = _mm512_set1_epi32(1);
997        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
998        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
999        let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b);
1000        assert_eq_m512i(r, _mm512_setzero_si512());
1001        let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b);
1002        let e = _mm512_set1_epi32(3);
1003        assert_eq_m512i(r, e);
1004    }
1005
1006    #[simd_test(enable = "avxvnni")]
1007    unsafe fn test_mm256_dpwssd_avx_epi32() {
1008        let src = _mm256_set1_epi32(1);
1009        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1010        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1011        let r = _mm256_dpwssd_avx_epi32(src, a, b);
1012        let e = _mm256_set1_epi32(3);
1013        assert_eq_m256i(r, e);
1014    }
1015
1016    #[simd_test(enable = "avx512vnni,avx512vl")]
1017    unsafe fn test_mm256_dpwssd_epi32() {
1018        let src = _mm256_set1_epi32(1);
1019        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1020        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1021        let r = _mm256_dpwssd_epi32(src, a, b);
1022        let e = _mm256_set1_epi32(3);
1023        assert_eq_m256i(r, e);
1024    }
1025
1026    #[simd_test(enable = "avx512vnni,avx512vl")]
1027    unsafe fn test_mm256_mask_dpwssd_epi32() {
1028        let src = _mm256_set1_epi32(1);
1029        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1030        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1031        let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b);
1032        assert_eq_m256i(r, src);
1033        let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b);
1034        let e = _mm256_set1_epi32(3);
1035        assert_eq_m256i(r, e);
1036    }
1037
1038    #[simd_test(enable = "avx512vnni,avx512vl")]
1039    unsafe fn test_mm256_maskz_dpwssd_epi32() {
1040        let src = _mm256_set1_epi32(1);
1041        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1042        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1043        let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b);
1044        assert_eq_m256i(r, _mm256_setzero_si256());
1045        let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b);
1046        let e = _mm256_set1_epi32(3);
1047        assert_eq_m256i(r, e);
1048    }
1049
1050    #[simd_test(enable = "avxvnni")]
1051    unsafe fn test_mm_dpwssd_avx_epi32() {
1052        let src = _mm_set1_epi32(1);
1053        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1054        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1055        let r = _mm_dpwssd_avx_epi32(src, a, b);
1056        let e = _mm_set1_epi32(3);
1057        assert_eq_m128i(r, e);
1058    }
1059
1060    #[simd_test(enable = "avx512vnni,avx512vl")]
1061    unsafe fn test_mm_dpwssd_epi32() {
1062        let src = _mm_set1_epi32(1);
1063        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1064        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1065        let r = _mm_dpwssd_epi32(src, a, b);
1066        let e = _mm_set1_epi32(3);
1067        assert_eq_m128i(r, e);
1068    }
1069
1070    #[simd_test(enable = "avx512vnni,avx512vl")]
1071    unsafe fn test_mm_mask_dpwssd_epi32() {
1072        let src = _mm_set1_epi32(1);
1073        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1074        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1075        let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b);
1076        assert_eq_m128i(r, src);
1077        let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b);
1078        let e = _mm_set1_epi32(3);
1079        assert_eq_m128i(r, e);
1080    }
1081
1082    #[simd_test(enable = "avx512vnni,avx512vl")]
1083    unsafe fn test_mm_maskz_dpwssd_epi32() {
1084        let src = _mm_set1_epi32(1);
1085        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1086        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1087        let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b);
1088        assert_eq_m128i(r, _mm_setzero_si128());
1089        let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b);
1090        let e = _mm_set1_epi32(3);
1091        assert_eq_m128i(r, e);
1092    }
1093
1094    #[simd_test(enable = "avx512vnni")]
1095    unsafe fn test_mm512_dpwssds_epi32() {
1096        let src = _mm512_set1_epi32(1);
1097        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1098        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1099        let r = _mm512_dpwssds_epi32(src, a, b);
1100        let e = _mm512_set1_epi32(3);
1101        assert_eq_m512i(r, e);
1102    }
1103
1104    #[simd_test(enable = "avx512vnni")]
1105    unsafe fn test_mm512_mask_dpwssds_epi32() {
1106        let src = _mm512_set1_epi32(1);
1107        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1108        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1109        let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b);
1110        assert_eq_m512i(r, src);
1111        let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b);
1112        let e = _mm512_set1_epi32(3);
1113        assert_eq_m512i(r, e);
1114    }
1115
1116    #[simd_test(enable = "avx512vnni")]
1117    unsafe fn test_mm512_maskz_dpwssds_epi32() {
1118        let src = _mm512_set1_epi32(1);
1119        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1120        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1121        let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b);
1122        assert_eq_m512i(r, _mm512_setzero_si512());
1123        let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b);
1124        let e = _mm512_set1_epi32(3);
1125        assert_eq_m512i(r, e);
1126    }
1127
1128    #[simd_test(enable = "avxvnni")]
1129    unsafe fn test_mm256_dpwssds_avx_epi32() {
1130        let src = _mm256_set1_epi32(1);
1131        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1132        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1133        let r = _mm256_dpwssds_avx_epi32(src, a, b);
1134        let e = _mm256_set1_epi32(3);
1135        assert_eq_m256i(r, e);
1136    }
1137
1138    #[simd_test(enable = "avx512vnni,avx512vl")]
1139    unsafe fn test_mm256_dpwssds_epi32() {
1140        let src = _mm256_set1_epi32(1);
1141        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1142        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1143        let r = _mm256_dpwssds_epi32(src, a, b);
1144        let e = _mm256_set1_epi32(3);
1145        assert_eq_m256i(r, e);
1146    }
1147
1148    #[simd_test(enable = "avx512vnni,avx512vl")]
1149    unsafe fn test_mm256_mask_dpwssds_epi32() {
1150        let src = _mm256_set1_epi32(1);
1151        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1152        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1153        let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b);
1154        assert_eq_m256i(r, src);
1155        let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b);
1156        let e = _mm256_set1_epi32(3);
1157        assert_eq_m256i(r, e);
1158    }
1159
1160    #[simd_test(enable = "avx512vnni,avx512vl")]
1161    unsafe fn test_mm256_maskz_dpwssds_epi32() {
1162        let src = _mm256_set1_epi32(1);
1163        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1164        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1165        let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b);
1166        assert_eq_m256i(r, _mm256_setzero_si256());
1167        let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b);
1168        let e = _mm256_set1_epi32(3);
1169        assert_eq_m256i(r, e);
1170    }
1171
1172    #[simd_test(enable = "avxvnni")]
1173    unsafe fn test_mm_dpwssds_avx_epi32() {
1174        let src = _mm_set1_epi32(1);
1175        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1176        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1177        let r = _mm_dpwssds_avx_epi32(src, a, b);
1178        let e = _mm_set1_epi32(3);
1179        assert_eq_m128i(r, e);
1180    }
1181
1182    #[simd_test(enable = "avx512vnni,avx512vl")]
1183    unsafe fn test_mm_dpwssds_epi32() {
1184        let src = _mm_set1_epi32(1);
1185        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1186        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1187        let r = _mm_dpwssds_epi32(src, a, b);
1188        let e = _mm_set1_epi32(3);
1189        assert_eq_m128i(r, e);
1190    }
1191
1192    #[simd_test(enable = "avx512vnni,avx512vl")]
1193    unsafe fn test_mm_mask_dpwssds_epi32() {
1194        let src = _mm_set1_epi32(1);
1195        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1196        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1197        let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b);
1198        assert_eq_m128i(r, src);
1199        let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b);
1200        let e = _mm_set1_epi32(3);
1201        assert_eq_m128i(r, e);
1202    }
1203
1204    #[simd_test(enable = "avx512vnni,avx512vl")]
1205    unsafe fn test_mm_maskz_dpwssds_epi32() {
1206        let src = _mm_set1_epi32(1);
1207        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1208        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1209        let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b);
1210        assert_eq_m128i(r, _mm_setzero_si128());
1211        let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b);
1212        let e = _mm_set1_epi32(3);
1213        assert_eq_m128i(r, e);
1214    }
1215
1216    #[simd_test(enable = "avx512vnni")]
1217    unsafe fn test_mm512_dpbusd_epi32() {
1218        let src = _mm512_set1_epi32(1);
1219        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1220        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1221        let r = _mm512_dpbusd_epi32(src, a, b);
1222        let e = _mm512_set1_epi32(5);
1223        assert_eq_m512i(r, e);
1224    }
1225
1226    #[simd_test(enable = "avx512vnni")]
1227    unsafe fn test_mm512_mask_dpbusd_epi32() {
1228        let src = _mm512_set1_epi32(1);
1229        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1230        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1231        let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b);
1232        assert_eq_m512i(r, src);
1233        let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b);
1234        let e = _mm512_set1_epi32(5);
1235        assert_eq_m512i(r, e);
1236    }
1237
1238    #[simd_test(enable = "avx512vnni")]
1239    unsafe fn test_mm512_maskz_dpbusd_epi32() {
1240        let src = _mm512_set1_epi32(1);
1241        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1242        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1243        let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b);
1244        assert_eq_m512i(r, _mm512_setzero_si512());
1245        let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b);
1246        let e = _mm512_set1_epi32(5);
1247        assert_eq_m512i(r, e);
1248    }
1249
1250    #[simd_test(enable = "avxvnni")]
1251    unsafe fn test_mm256_dpbusd_avx_epi32() {
1252        let src = _mm256_set1_epi32(1);
1253        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1254        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1255        let r = _mm256_dpbusd_avx_epi32(src, a, b);
1256        let e = _mm256_set1_epi32(5);
1257        assert_eq_m256i(r, e);
1258    }
1259
1260    #[simd_test(enable = "avx512vnni,avx512vl")]
1261    unsafe fn test_mm256_dpbusd_epi32() {
1262        let src = _mm256_set1_epi32(1);
1263        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1264        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1265        let r = _mm256_dpbusd_epi32(src, a, b);
1266        let e = _mm256_set1_epi32(5);
1267        assert_eq_m256i(r, e);
1268    }
1269
1270    #[simd_test(enable = "avx512vnni,avx512vl")]
1271    unsafe fn test_mm256_mask_dpbusd_epi32() {
1272        let src = _mm256_set1_epi32(1);
1273        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1274        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1275        let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b);
1276        assert_eq_m256i(r, src);
1277        let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b);
1278        let e = _mm256_set1_epi32(5);
1279        assert_eq_m256i(r, e);
1280    }
1281
1282    #[simd_test(enable = "avx512vnni,avx512vl")]
1283    unsafe fn test_mm256_maskz_dpbusd_epi32() {
1284        let src = _mm256_set1_epi32(1);
1285        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1286        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1287        let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b);
1288        assert_eq_m256i(r, _mm256_setzero_si256());
1289        let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b);
1290        let e = _mm256_set1_epi32(5);
1291        assert_eq_m256i(r, e);
1292    }
1293
1294    #[simd_test(enable = "avxvnni")]
1295    unsafe fn test_mm_dpbusd_avx_epi32() {
1296        let src = _mm_set1_epi32(1);
1297        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1298        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1299        let r = _mm_dpbusd_avx_epi32(src, a, b);
1300        let e = _mm_set1_epi32(5);
1301        assert_eq_m128i(r, e);
1302    }
1303
1304    #[simd_test(enable = "avx512vnni,avx512vl")]
1305    unsafe fn test_mm_dpbusd_epi32() {
1306        let src = _mm_set1_epi32(1);
1307        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1308        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1309        let r = _mm_dpbusd_epi32(src, a, b);
1310        let e = _mm_set1_epi32(5);
1311        assert_eq_m128i(r, e);
1312    }
1313
1314    #[simd_test(enable = "avx512vnni,avx512vl")]
1315    unsafe fn test_mm_mask_dpbusd_epi32() {
1316        let src = _mm_set1_epi32(1);
1317        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1318        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1319        let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b);
1320        assert_eq_m128i(r, src);
1321        let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b);
1322        let e = _mm_set1_epi32(5);
1323        assert_eq_m128i(r, e);
1324    }
1325
1326    #[simd_test(enable = "avx512vnni,avx512vl")]
1327    unsafe fn test_mm_maskz_dpbusd_epi32() {
1328        let src = _mm_set1_epi32(1);
1329        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1330        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1331        let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b);
1332        assert_eq_m128i(r, _mm_setzero_si128());
1333        let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b);
1334        let e = _mm_set1_epi32(5);
1335        assert_eq_m128i(r, e);
1336    }
1337
1338    #[simd_test(enable = "avx512vnni")]
1339    unsafe fn test_mm512_dpbusds_epi32() {
1340        let src = _mm512_set1_epi32(1);
1341        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1342        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1343        let r = _mm512_dpbusds_epi32(src, a, b);
1344        let e = _mm512_set1_epi32(5);
1345        assert_eq_m512i(r, e);
1346    }
1347
1348    #[simd_test(enable = "avx512vnni")]
1349    unsafe fn test_mm512_mask_dpbusds_epi32() {
1350        let src = _mm512_set1_epi32(1);
1351        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1352        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1353        let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b);
1354        assert_eq_m512i(r, src);
1355        let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b);
1356        let e = _mm512_set1_epi32(5);
1357        assert_eq_m512i(r, e);
1358    }
1359
1360    #[simd_test(enable = "avx512vnni")]
1361    unsafe fn test_mm512_maskz_dpbusds_epi32() {
1362        let src = _mm512_set1_epi32(1);
1363        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1364        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1365        let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b);
1366        assert_eq_m512i(r, _mm512_setzero_si512());
1367        let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b);
1368        let e = _mm512_set1_epi32(5);
1369        assert_eq_m512i(r, e);
1370    }
1371
1372    #[simd_test(enable = "avxvnni")]
1373    unsafe fn test_mm256_dpbusds_avx_epi32() {
1374        let src = _mm256_set1_epi32(1);
1375        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1376        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1377        let r = _mm256_dpbusds_avx_epi32(src, a, b);
1378        let e = _mm256_set1_epi32(5);
1379        assert_eq_m256i(r, e);
1380    }
1381
1382    #[simd_test(enable = "avx512vnni,avx512vl")]
1383    unsafe fn test_mm256_dpbusds_epi32() {
1384        let src = _mm256_set1_epi32(1);
1385        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1386        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1387        let r = _mm256_dpbusds_epi32(src, a, b);
1388        let e = _mm256_set1_epi32(5);
1389        assert_eq_m256i(r, e);
1390    }
1391
1392    #[simd_test(enable = "avx512vnni,avx512vl")]
1393    unsafe fn test_mm256_mask_dpbusds_epi32() {
1394        let src = _mm256_set1_epi32(1);
1395        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1396        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1397        let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b);
1398        assert_eq_m256i(r, src);
1399        let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b);
1400        let e = _mm256_set1_epi32(5);
1401        assert_eq_m256i(r, e);
1402    }
1403
1404    #[simd_test(enable = "avx512vnni,avx512vl")]
1405    unsafe fn test_mm256_maskz_dpbusds_epi32() {
1406        let src = _mm256_set1_epi32(1);
1407        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1408        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1409        let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b);
1410        assert_eq_m256i(r, _mm256_setzero_si256());
1411        let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b);
1412        let e = _mm256_set1_epi32(5);
1413        assert_eq_m256i(r, e);
1414    }
1415
1416    #[simd_test(enable = "avxvnni")]
1417    unsafe fn test_mm_dpbusds_avx_epi32() {
1418        let src = _mm_set1_epi32(1);
1419        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1420        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1421        let r = _mm_dpbusds_avx_epi32(src, a, b);
1422        let e = _mm_set1_epi32(5);
1423        assert_eq_m128i(r, e);
1424    }
1425
1426    #[simd_test(enable = "avx512vnni,avx512vl")]
1427    unsafe fn test_mm_dpbusds_epi32() {
1428        let src = _mm_set1_epi32(1);
1429        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1430        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1431        let r = _mm_dpbusds_epi32(src, a, b);
1432        let e = _mm_set1_epi32(5);
1433        assert_eq_m128i(r, e);
1434    }
1435
1436    #[simd_test(enable = "avx512vnni,avx512vl")]
1437    unsafe fn test_mm_mask_dpbusds_epi32() {
1438        let src = _mm_set1_epi32(1);
1439        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1440        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1441        let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b);
1442        assert_eq_m128i(r, src);
1443        let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b);
1444        let e = _mm_set1_epi32(5);
1445        assert_eq_m128i(r, e);
1446    }
1447
1448    #[simd_test(enable = "avx512vnni,avx512vl")]
1449    unsafe fn test_mm_maskz_dpbusds_epi32() {
1450        let src = _mm_set1_epi32(1);
1451        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1452        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1453        let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b);
1454        assert_eq_m128i(r, _mm_setzero_si128());
1455        let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b);
1456        let e = _mm_set1_epi32(5);
1457        assert_eq_m128i(r, e);
1458    }
1459
1460    #[simd_test(enable = "avxvnniint8")]
1461    unsafe fn test_mm_dpbssd_epi32() {
1462        let src = _mm_set1_epi32(1);
1463        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1464        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1465        let r = _mm_dpbssd_epi32(src, a, b);
1466        let e = _mm_set1_epi32(5);
1467        assert_eq_m128i(r, e);
1468    }
1469
1470    #[simd_test(enable = "avxvnniint8")]
1471    unsafe fn test_mm256_dpbssd_epi32() {
1472        let src = _mm256_set1_epi32(1);
1473        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1474        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1475        let r = _mm256_dpbssd_epi32(src, a, b);
1476        let e = _mm256_set1_epi32(5);
1477        assert_eq_m256i(r, e);
1478    }
1479
1480    #[simd_test(enable = "avxvnniint8")]
1481    unsafe fn test_mm_dpbssds_epi32() {
1482        let src = _mm_set1_epi32(1);
1483        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1484        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1485        let r = _mm_dpbssds_epi32(src, a, b);
1486        let e = _mm_set1_epi32(5);
1487        assert_eq_m128i(r, e);
1488    }
1489
1490    #[simd_test(enable = "avxvnniint8")]
1491    unsafe fn test_mm256_dpbssds_epi32() {
1492        let src = _mm256_set1_epi32(1);
1493        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1494        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1495        let r = _mm256_dpbssds_epi32(src, a, b);
1496        let e = _mm256_set1_epi32(5);
1497        assert_eq_m256i(r, e);
1498    }
1499
1500    #[simd_test(enable = "avxvnniint8")]
1501    unsafe fn test_mm_dpbsud_epi32() {
1502        let src = _mm_set1_epi32(1);
1503        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1504        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1505        let r = _mm_dpbsud_epi32(src, a, b);
1506        let e = _mm_set1_epi32(5);
1507        assert_eq_m128i(r, e);
1508    }
1509
1510    #[simd_test(enable = "avxvnniint8")]
1511    unsafe fn test_mm256_dpbsud_epi32() {
1512        let src = _mm256_set1_epi32(1);
1513        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1514        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1515        let r = _mm256_dpbsud_epi32(src, a, b);
1516        let e = _mm256_set1_epi32(5);
1517        assert_eq_m256i(r, e);
1518    }
1519
1520    #[simd_test(enable = "avxvnniint8")]
1521    unsafe fn test_mm_dpbsuds_epi32() {
1522        let src = _mm_set1_epi32(1);
1523        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1524        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1525        let r = _mm_dpbsuds_epi32(src, a, b);
1526        let e = _mm_set1_epi32(5);
1527        assert_eq_m128i(r, e);
1528    }
1529
1530    #[simd_test(enable = "avxvnniint8")]
1531    unsafe fn test_mm256_dpbsuds_epi32() {
1532        let src = _mm256_set1_epi32(1);
1533        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1534        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1535        let r = _mm256_dpbsuds_epi32(src, a, b);
1536        let e = _mm256_set1_epi32(5);
1537        assert_eq_m256i(r, e);
1538    }
1539
1540    #[simd_test(enable = "avxvnniint8")]
1541    unsafe fn test_mm_dpbuud_epi32() {
1542        let src = _mm_set1_epi32(1);
1543        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1544        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1545        let r = _mm_dpbuud_epi32(src, a, b);
1546        let e = _mm_set1_epi32(5);
1547        assert_eq_m128i(r, e);
1548    }
1549
1550    #[simd_test(enable = "avxvnniint8")]
1551    unsafe fn test_mm256_dpbuud_epi32() {
1552        let src = _mm256_set1_epi32(1);
1553        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1554        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1555        let r = _mm256_dpbuud_epi32(src, a, b);
1556        let e = _mm256_set1_epi32(5);
1557        assert_eq_m256i(r, e);
1558    }
1559
1560    #[simd_test(enable = "avxvnniint8")]
1561    unsafe fn test_mm_dpbuuds_epi32() {
1562        let src = _mm_set1_epi32(1);
1563        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1564        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1565        let r = _mm_dpbuuds_epi32(src, a, b);
1566        let e = _mm_set1_epi32(5);
1567        assert_eq_m128i(r, e);
1568    }
1569
1570    #[simd_test(enable = "avxvnniint8")]
1571    unsafe fn test_mm256_dpbuuds_epi32() {
1572        let src = _mm256_set1_epi32(1);
1573        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1574        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1575        let r = _mm256_dpbuuds_epi32(src, a, b);
1576        let e = _mm256_set1_epi32(5);
1577        assert_eq_m256i(r, e);
1578    }
1579
1580    #[simd_test(enable = "avxvnniint16")]
1581    unsafe fn test_mm_dpwsud_epi32() {
1582        let src = _mm_set1_epi32(1);
1583        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1584        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1585        let r = _mm_dpwsud_epi32(src, a, b);
1586        let e = _mm_set1_epi32(3);
1587        assert_eq_m128i(r, e);
1588    }
1589
1590    #[simd_test(enable = "avxvnniint16")]
1591    unsafe fn test_mm256_dpwsud_epi32() {
1592        let src = _mm256_set1_epi32(1);
1593        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1594        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1595        let r = _mm256_dpwsud_epi32(src, a, b);
1596        let e = _mm256_set1_epi32(3);
1597        assert_eq_m256i(r, e);
1598    }
1599
1600    #[simd_test(enable = "avxvnniint16")]
1601    unsafe fn test_mm_dpwsuds_epi32() {
1602        let src = _mm_set1_epi32(1);
1603        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1604        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1605        let r = _mm_dpwsuds_epi32(src, a, b);
1606        let e = _mm_set1_epi32(3);
1607        assert_eq_m128i(r, e);
1608    }
1609
1610    #[simd_test(enable = "avxvnniint16")]
1611    unsafe fn test_mm256_dpwsuds_epi32() {
1612        let src = _mm256_set1_epi32(1);
1613        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1614        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1615        let r = _mm256_dpwsuds_epi32(src, a, b);
1616        let e = _mm256_set1_epi32(3);
1617        assert_eq_m256i(r, e);
1618    }
1619
1620    #[simd_test(enable = "avxvnniint16")]
1621    unsafe fn test_mm_dpwusd_epi32() {
1622        let src = _mm_set1_epi32(1);
1623        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1624        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1625        let r = _mm_dpwusd_epi32(src, a, b);
1626        let e = _mm_set1_epi32(3);
1627        assert_eq_m128i(r, e);
1628    }
1629
1630    #[simd_test(enable = "avxvnniint16")]
1631    unsafe fn test_mm256_dpwusd_epi32() {
1632        let src = _mm256_set1_epi32(1);
1633        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1634        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1635        let r = _mm256_dpwusd_epi32(src, a, b);
1636        let e = _mm256_set1_epi32(3);
1637        assert_eq_m256i(r, e);
1638    }
1639
1640    #[simd_test(enable = "avxvnniint16")]
1641    unsafe fn test_mm_dpwusds_epi32() {
1642        let src = _mm_set1_epi32(1);
1643        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1644        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1645        let r = _mm_dpwusds_epi32(src, a, b);
1646        let e = _mm_set1_epi32(3);
1647        assert_eq_m128i(r, e);
1648    }
1649
1650    #[simd_test(enable = "avxvnniint16")]
1651    unsafe fn test_mm256_dpwusds_epi32() {
1652        let src = _mm256_set1_epi32(1);
1653        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1654        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1655        let r = _mm256_dpwusds_epi32(src, a, b);
1656        let e = _mm256_set1_epi32(3);
1657        assert_eq_m256i(r, e);
1658    }
1659
1660    #[simd_test(enable = "avxvnniint16")]
1661    unsafe fn test_mm_dpwuud_epi32() {
1662        let src = _mm_set1_epi32(1);
1663        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1664        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1665        let r = _mm_dpwuud_epi32(src, a, b);
1666        let e = _mm_set1_epi32(3);
1667        assert_eq_m128i(r, e);
1668    }
1669
1670    #[simd_test(enable = "avxvnniint16")]
1671    unsafe fn test_mm256_dpwuud_epi32() {
1672        let src = _mm256_set1_epi32(1);
1673        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1674        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1675        let r = _mm256_dpwuud_epi32(src, a, b);
1676        let e = _mm256_set1_epi32(3);
1677        assert_eq_m256i(r, e);
1678    }
1679
1680    #[simd_test(enable = "avxvnniint16")]
1681    unsafe fn test_mm_dpwuuds_epi32() {
1682        let src = _mm_set1_epi32(1);
1683        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1684        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1685        let r = _mm_dpwuuds_epi32(src, a, b);
1686        let e = _mm_set1_epi32(3);
1687        assert_eq_m128i(r, e);
1688    }
1689
1690    #[simd_test(enable = "avxvnniint16")]
1691    unsafe fn test_mm256_dpwuuds_epi32() {
1692        let src = _mm256_set1_epi32(1);
1693        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1694        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1695        let r = _mm256_dpwuuds_epi32(src, a, b);
1696        let e = _mm256_set1_epi32(3);
1697        assert_eq_m256i(r, e);
1698    }
1699}