core/stdarch/crates/core_arch/src/x86/
avx512vnni.rs

1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
10#[inline]
11#[target_feature(enable = "avx512vnni")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vpdpwssd))]
14pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
15    transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
16}
17
18/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
21#[inline]
22#[target_feature(enable = "avx512vnni")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpdpwssd))]
25pub unsafe fn _mm512_mask_dpwssd_epi32(
26    src: __m512i,
27    k: __mmask16,
28    a: __m512i,
29    b: __m512i,
30) -> __m512i {
31    let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
32    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
33}
34
35/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
38#[inline]
39#[target_feature(enable = "avx512vnni")]
40#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41#[cfg_attr(test, assert_instr(vpdpwssd))]
42pub unsafe fn _mm512_maskz_dpwssd_epi32(
43    k: __mmask16,
44    src: __m512i,
45    a: __m512i,
46    b: __m512i,
47) -> __m512i {
48    let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
49    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
50}
51
52/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
53///
54/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713)
55#[inline]
56#[target_feature(enable = "avxvnni")]
57#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
58#[cfg_attr(
59    all(test, any(target_os = "linux", target_env = "msvc")),
60    assert_instr(vpdpwssd)
61)]
62pub unsafe fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
63    transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
64}
65
66/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
67///
68/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
69#[inline]
70#[target_feature(enable = "avx512vnni,avx512vl")]
71#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
72#[cfg_attr(test, assert_instr(vpdpwssd))]
73pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
74    transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
75}
76
77/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
78///
79/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
80#[inline]
81#[target_feature(enable = "avx512vnni,avx512vl")]
82#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
83#[cfg_attr(test, assert_instr(vpdpwssd))]
84pub unsafe fn _mm256_mask_dpwssd_epi32(
85    src: __m256i,
86    k: __mmask8,
87    a: __m256i,
88    b: __m256i,
89) -> __m256i {
90    let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
91    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
92}
93
94/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
95///
96/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
97#[inline]
98#[target_feature(enable = "avx512vnni,avx512vl")]
99#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
100#[cfg_attr(test, assert_instr(vpdpwssd))]
101pub unsafe fn _mm256_maskz_dpwssd_epi32(
102    k: __mmask8,
103    src: __m256i,
104    a: __m256i,
105    b: __m256i,
106) -> __m256i {
107    let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
108    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
109}
110
111/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
112///
113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712)
114#[inline]
115#[target_feature(enable = "avxvnni")]
116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
117#[cfg_attr(
118    all(test, any(target_os = "linux", target_env = "msvc")),
119    assert_instr(vpdpwssd)
120)]
121pub unsafe fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
122    transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
123}
124
125/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
126///
127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
128#[inline]
129#[target_feature(enable = "avx512vnni,avx512vl")]
130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
131#[cfg_attr(test, assert_instr(vpdpwssd))]
132pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
133    transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
134}
135
136/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
137///
138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
139#[inline]
140#[target_feature(enable = "avx512vnni,avx512vl")]
141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142#[cfg_attr(test, assert_instr(vpdpwssd))]
143pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
144    let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
145    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
146}
147
148/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
149///
150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
151#[inline]
152#[target_feature(enable = "avx512vnni,avx512vl")]
153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
154#[cfg_attr(test, assert_instr(vpdpwssd))]
155pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
156    let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
157    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
158}
159
160/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
161///
162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
163#[inline]
164#[target_feature(enable = "avx512vnni")]
165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
166#[cfg_attr(test, assert_instr(vpdpwssds))]
167pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
168    transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
169}
170
171/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
172///
173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
174#[inline]
175#[target_feature(enable = "avx512vnni")]
176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
177#[cfg_attr(test, assert_instr(vpdpwssds))]
178pub unsafe fn _mm512_mask_dpwssds_epi32(
179    src: __m512i,
180    k: __mmask16,
181    a: __m512i,
182    b: __m512i,
183) -> __m512i {
184    let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
185    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
186}
187
188/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
191#[inline]
192#[target_feature(enable = "avx512vnni")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpdpwssds))]
195pub unsafe fn _mm512_maskz_dpwssds_epi32(
196    k: __mmask16,
197    src: __m512i,
198    a: __m512i,
199    b: __m512i,
200) -> __m512i {
201    let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
202    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
203}
204
205/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
206///
207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726)
208#[inline]
209#[target_feature(enable = "avxvnni")]
210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
211#[cfg_attr(
212    all(test, any(target_os = "linux", target_env = "msvc")),
213    assert_instr(vpdpwssds)
214)]
215pub unsafe fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
216    transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
217}
218
219/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
220///
221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
222#[inline]
223#[target_feature(enable = "avx512vnni,avx512vl")]
224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
225#[cfg_attr(test, assert_instr(vpdpwssds))]
226pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
227    transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
228}
229
230/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
231///
232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
233#[inline]
234#[target_feature(enable = "avx512vnni,avx512vl")]
235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
236#[cfg_attr(test, assert_instr(vpdpwssds))]
237pub unsafe fn _mm256_mask_dpwssds_epi32(
238    src: __m256i,
239    k: __mmask8,
240    a: __m256i,
241    b: __m256i,
242) -> __m256i {
243    let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
244    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
245}
246
247/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
248///
249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
250#[inline]
251#[target_feature(enable = "avx512vnni,avx512vl")]
252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
253#[cfg_attr(test, assert_instr(vpdpwssds))]
254pub unsafe fn _mm256_maskz_dpwssds_epi32(
255    k: __mmask8,
256    src: __m256i,
257    a: __m256i,
258    b: __m256i,
259) -> __m256i {
260    let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
261    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
262}
263
264/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725)
267#[inline]
268#[target_feature(enable = "avxvnni")]
269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
270#[cfg_attr(
271    all(test, any(target_os = "linux", target_env = "msvc")),
272    assert_instr(vpdpwssds)
273)]
274pub unsafe fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
275    transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
276}
277
278/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
281#[inline]
282#[target_feature(enable = "avx512vnni,avx512vl")]
283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284#[cfg_attr(test, assert_instr(vpdpwssds))]
285pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
286    transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
287}
288
289/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
292#[inline]
293#[target_feature(enable = "avx512vnni,avx512vl")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr(vpdpwssds))]
296pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
297    let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
298    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
299}
300
301/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
302///
303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
304#[inline]
305#[target_feature(enable = "avx512vnni,avx512vl")]
306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
307#[cfg_attr(test, assert_instr(vpdpwssds))]
308pub unsafe fn _mm_maskz_dpwssds_epi32(
309    k: __mmask8,
310    src: __m128i,
311    a: __m128i,
312    b: __m128i,
313) -> __m128i {
314    let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
315    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
316}
317
318/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
319///
320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
321#[inline]
322#[target_feature(enable = "avx512vnni")]
323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
324#[cfg_attr(test, assert_instr(vpdpbusd))]
325pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
326    transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
327}
328
329/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
330///
331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
332#[inline]
333#[target_feature(enable = "avx512vnni")]
334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
335#[cfg_attr(test, assert_instr(vpdpbusd))]
336pub unsafe fn _mm512_mask_dpbusd_epi32(
337    src: __m512i,
338    k: __mmask16,
339    a: __m512i,
340    b: __m512i,
341) -> __m512i {
342    let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
343    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
344}
345
346/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
347///
348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
349#[inline]
350#[target_feature(enable = "avx512vnni")]
351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
352#[cfg_attr(test, assert_instr(vpdpbusd))]
353pub unsafe fn _mm512_maskz_dpbusd_epi32(
354    k: __mmask16,
355    src: __m512i,
356    a: __m512i,
357    b: __m512i,
358) -> __m512i {
359    let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
360    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
361}
362
363/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683)
366#[inline]
367#[target_feature(enable = "avxvnni")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369#[cfg_attr(
370    all(test, any(target_os = "linux", target_env = "msvc")),
371    assert_instr(vpdpbusd)
372)]
373pub unsafe fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
374    transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
375}
376
377/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
378///
379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
380#[inline]
381#[target_feature(enable = "avx512vnni,avx512vl")]
382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
383#[cfg_attr(test, assert_instr(vpdpbusd))]
384pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
385    transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
386}
387
388/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
389///
390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
391#[inline]
392#[target_feature(enable = "avx512vnni,avx512vl")]
393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
394#[cfg_attr(test, assert_instr(vpdpbusd))]
395pub unsafe fn _mm256_mask_dpbusd_epi32(
396    src: __m256i,
397    k: __mmask8,
398    a: __m256i,
399    b: __m256i,
400) -> __m256i {
401    let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
402    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
403}
404
405/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
406///
407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
408#[inline]
409#[target_feature(enable = "avx512vnni,avx512vl")]
410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
411#[cfg_attr(test, assert_instr(vpdpbusd))]
412pub unsafe fn _mm256_maskz_dpbusd_epi32(
413    k: __mmask8,
414    src: __m256i,
415    a: __m256i,
416    b: __m256i,
417) -> __m256i {
418    let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
419    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
420}
421
422/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
423///
424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682)
425#[inline]
426#[target_feature(enable = "avxvnni")]
427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
428#[cfg_attr(
429    all(test, any(target_os = "linux", target_env = "msvc")),
430    assert_instr(vpdpbusd)
431)]
432pub unsafe fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
433    transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
434}
435
436/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
437///
438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
439#[inline]
440#[target_feature(enable = "avx512vnni,avx512vl")]
441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
442#[cfg_attr(test, assert_instr(vpdpbusd))]
443pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
444    transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
445}
446
447/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
448///
449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
450#[inline]
451#[target_feature(enable = "avx512vnni,avx512vl")]
452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
453#[cfg_attr(test, assert_instr(vpdpbusd))]
454pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
455    let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
456    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
457}
458
459/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
460///
461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
462#[inline]
463#[target_feature(enable = "avx512vnni,avx512vl")]
464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
465#[cfg_attr(test, assert_instr(vpdpbusd))]
466pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
467    let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
468    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
469}
470
471/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
474#[inline]
475#[target_feature(enable = "avx512vnni")]
476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477#[cfg_attr(test, assert_instr(vpdpbusds))]
478pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
479    transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
480}
481
482/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
483///
484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
485#[inline]
486#[target_feature(enable = "avx512vnni")]
487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
488#[cfg_attr(test, assert_instr(vpdpbusds))]
489pub unsafe fn _mm512_mask_dpbusds_epi32(
490    src: __m512i,
491    k: __mmask16,
492    a: __m512i,
493    b: __m512i,
494) -> __m512i {
495    let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
496    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
497}
498
499/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
502#[inline]
503#[target_feature(enable = "avx512vnni")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vpdpbusds))]
506pub unsafe fn _mm512_maskz_dpbusds_epi32(
507    k: __mmask16,
508    src: __m512i,
509    a: __m512i,
510    b: __m512i,
511) -> __m512i {
512    let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
513    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
514}
515
516/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
517///
518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696)
519#[inline]
520#[target_feature(enable = "avxvnni")]
521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
522#[cfg_attr(
523    all(test, any(target_os = "linux", target_env = "msvc")),
524    assert_instr(vpdpbusds)
525)]
526pub unsafe fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
527    transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
528}
529
530/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
531///
532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
533#[inline]
534#[target_feature(enable = "avx512vnni,avx512vl")]
535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
536#[cfg_attr(test, assert_instr(vpdpbusds))]
537pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
538    transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
539}
540
541/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
544#[inline]
545#[target_feature(enable = "avx512vnni,avx512vl")]
546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547#[cfg_attr(test, assert_instr(vpdpbusds))]
548pub unsafe fn _mm256_mask_dpbusds_epi32(
549    src: __m256i,
550    k: __mmask8,
551    a: __m256i,
552    b: __m256i,
553) -> __m256i {
554    let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
555    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
556}
557
558/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
559///
560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
561#[inline]
562#[target_feature(enable = "avx512vnni,avx512vl")]
563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
564#[cfg_attr(test, assert_instr(vpdpbusds))]
565pub unsafe fn _mm256_maskz_dpbusds_epi32(
566    k: __mmask8,
567    src: __m256i,
568    a: __m256i,
569    b: __m256i,
570) -> __m256i {
571    let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
572    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
573}
574
575/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
576///
577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695)
578#[inline]
579#[target_feature(enable = "avxvnni")]
580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
581#[cfg_attr(
582    all(test, any(target_os = "linux", target_env = "msvc")),
583    assert_instr(vpdpbusds)
584)]
585pub unsafe fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
586    transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
587}
588
589/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
590///
591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
592#[inline]
593#[target_feature(enable = "avx512vnni,avx512vl")]
594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
595#[cfg_attr(test, assert_instr(vpdpbusds))]
596pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
597    transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
598}
599
600/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
603#[inline]
604#[target_feature(enable = "avx512vnni,avx512vl")]
605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
606#[cfg_attr(test, assert_instr(vpdpbusds))]
607pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
608    let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
609    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
610}
611
612/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
613///
614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
615#[inline]
616#[target_feature(enable = "avx512vnni,avx512vl")]
617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
618#[cfg_attr(test, assert_instr(vpdpbusds))]
619pub unsafe fn _mm_maskz_dpbusds_epi32(
620    k: __mmask8,
621    src: __m128i,
622    a: __m128i,
623    b: __m128i,
624) -> __m128i {
625    let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
626    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
627}
628
629/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
630/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
631/// 32-bit integer in src, and store the packed 32-bit results in dst.
632///
633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674)
634#[inline]
635#[target_feature(enable = "avxvnniint8")]
636#[cfg_attr(
637    all(test, any(target_os = "linux", target_env = "msvc")),
638    assert_instr(vpdpbssd)
639)]
640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
641pub unsafe fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
642    transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
643}
644
645/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
646/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
647/// 32-bit integer in src, and store the packed 32-bit results in dst.
648///
649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675)
650#[inline]
651#[target_feature(enable = "avxvnniint8")]
652#[cfg_attr(
653    all(test, any(target_os = "linux", target_env = "msvc")),
654    assert_instr(vpdpbssd)
655)]
656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
657pub unsafe fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
658    transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
659}
660
661/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
662/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
663/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
664///
665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676)
666#[inline]
667#[target_feature(enable = "avxvnniint8")]
668#[cfg_attr(
669    all(test, any(target_os = "linux", target_env = "msvc")),
670    assert_instr(vpdpbssds)
671)]
672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
673pub unsafe fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
674    transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
675}
676
677/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
678/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
679/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
680///
681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677)
682#[inline]
683#[target_feature(enable = "avxvnniint8")]
684#[cfg_attr(
685    all(test, any(target_os = "linux", target_env = "msvc")),
686    assert_instr(vpdpbssds)
687)]
688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
689pub unsafe fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
690    transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
691}
692
693/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
694/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
695/// 32-bit integer in src, and store the packed 32-bit results in dst.
696///
697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678)
698#[inline]
699#[target_feature(enable = "avxvnniint8")]
700#[cfg_attr(
701    all(test, any(target_os = "linux", target_env = "msvc")),
702    assert_instr(vpdpbsud)
703)]
704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
705pub unsafe fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
706    transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
707}
708
709/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
710/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
711/// 32-bit integer in src, and store the packed 32-bit results in dst.
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679)
714#[inline]
715#[target_feature(enable = "avxvnniint8")]
716#[cfg_attr(
717    all(test, any(target_os = "linux", target_env = "msvc")),
718    assert_instr(vpdpbsud)
719)]
720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
721pub unsafe fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
722    transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
723}
724
725/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
726/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
727/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
728///
729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680)
730#[inline]
731#[target_feature(enable = "avxvnniint8")]
732#[cfg_attr(
733    all(test, any(target_os = "linux", target_env = "msvc")),
734    assert_instr(vpdpbsuds)
735)]
736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
737pub unsafe fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
738    transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
739}
740
741/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
742/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
743/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
744///
745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681)
746#[inline]
747#[target_feature(enable = "avxvnniint8")]
748#[cfg_attr(
749    all(test, any(target_os = "linux", target_env = "msvc")),
750    assert_instr(vpdpbsuds)
751)]
752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
753pub unsafe fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
754    transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
755}
756
757/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
758/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
759/// 32-bit integer in src, and store the packed 32-bit results in dst.
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708)
762#[inline]
763#[target_feature(enable = "avxvnniint8")]
764#[cfg_attr(
765    all(test, any(target_os = "linux", target_env = "msvc")),
766    assert_instr(vpdpbuud)
767)]
768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
769pub unsafe fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
770    transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
771}
772
773/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
774/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
775/// 32-bit integer in src, and store the packed 32-bit results in dst.
776///
777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709)
778#[inline]
779#[target_feature(enable = "avxvnniint8")]
780#[cfg_attr(
781    all(test, any(target_os = "linux", target_env = "msvc")),
782    assert_instr(vpdpbuud)
783)]
784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
785pub unsafe fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
786    transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
787}
788
789/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
790/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
791/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
792///
793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710)
794#[inline]
795#[target_feature(enable = "avxvnniint8")]
796#[cfg_attr(
797    all(test, any(target_os = "linux", target_env = "msvc")),
798    assert_instr(vpdpbuuds)
799)]
800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
801pub unsafe fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
802    transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
803}
804
805/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
806/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
807/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
808///
809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711)
810#[inline]
811#[target_feature(enable = "avxvnniint8")]
812#[cfg_attr(
813    all(test, any(target_os = "linux", target_env = "msvc")),
814    assert_instr(vpdpbuuds)
815)]
816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
817pub unsafe fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
818    transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
819}
820
821/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
822/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
823/// 32-bit integer in src, and store the packed 32-bit results in dst.
824///
825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738)
826#[inline]
827#[target_feature(enable = "avxvnniint16")]
828#[cfg_attr(
829    all(test, any(target_os = "linux", target_env = "msvc")),
830    assert_instr(vpdpwsud)
831)]
832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
833pub unsafe fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
834    transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
835}
836
837/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
838/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
839/// 32-bit integer in src, and store the packed 32-bit results in dst.
840///
841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739)
842#[inline]
843#[target_feature(enable = "avxvnniint16")]
844#[cfg_attr(
845    all(test, any(target_os = "linux", target_env = "msvc")),
846    assert_instr(vpdpwsud)
847)]
848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
849pub unsafe fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
850    transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
851}
852
853/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
854/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
855/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
856///
857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740)
858#[inline]
859#[target_feature(enable = "avxvnniint16")]
860#[cfg_attr(
861    all(test, any(target_os = "linux", target_env = "msvc")),
862    assert_instr(vpdpwsuds)
863)]
864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
865pub unsafe fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
866    transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
867}
868
869/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
870/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
871/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
872///
873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741)
874#[inline]
875#[target_feature(enable = "avxvnniint16")]
876#[cfg_attr(
877    all(test, any(target_os = "linux", target_env = "msvc")),
878    assert_instr(vpdpwsuds)
879)]
880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
881pub unsafe fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
882    transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
883}
884
885/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
886/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
887/// 32-bit integer in src, and store the packed 32-bit results in dst.
888///
889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742)
890#[inline]
891#[target_feature(enable = "avxvnniint16")]
892#[cfg_attr(
893    all(test, any(target_os = "linux", target_env = "msvc")),
894    assert_instr(vpdpwusd)
895)]
896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
897pub unsafe fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
898    transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
899}
900
901/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
902/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
903/// 32-bit integer in src, and store the packed 32-bit results in dst.
904///
905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743)
906#[inline]
907#[target_feature(enable = "avxvnniint16")]
908#[cfg_attr(
909    all(test, any(target_os = "linux", target_env = "msvc")),
910    assert_instr(vpdpwusd)
911)]
912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
913pub unsafe fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
914    transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
915}
916
917/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
918/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
919/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
920///
921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744)
922#[inline]
923#[target_feature(enable = "avxvnniint16")]
924#[cfg_attr(
925    all(test, any(target_os = "linux", target_env = "msvc")),
926    assert_instr(vpdpwusds)
927)]
928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
929pub unsafe fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
930    transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
931}
932
933/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
934/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
935/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745)
938#[inline]
939#[target_feature(enable = "avxvnniint16")]
940#[cfg_attr(
941    all(test, any(target_os = "linux", target_env = "msvc")),
942    assert_instr(vpdpwusds)
943)]
944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
945pub unsafe fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
946    transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
947}
948
949/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
950/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
951/// 32-bit integer in src, and store the packed 32-bit results in dst.
952///
953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746)
954#[inline]
955#[target_feature(enable = "avxvnniint16")]
956#[cfg_attr(
957    all(test, any(target_os = "linux", target_env = "msvc")),
958    assert_instr(vpdpwuud)
959)]
960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
961pub unsafe fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
962    transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
963}
964
965/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
966/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
967/// 32-bit integer in src, and store the packed 32-bit results in dst.
968///
969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747)
970#[inline]
971#[target_feature(enable = "avxvnniint16")]
972#[cfg_attr(
973    all(test, any(target_os = "linux", target_env = "msvc")),
974    assert_instr(vpdpwuud)
975)]
976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
977pub unsafe fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
978    transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
979}
980
981/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
982/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
983/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
984///
985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748)
986#[inline]
987#[target_feature(enable = "avxvnniint16")]
988#[cfg_attr(
989    all(test, any(target_os = "linux", target_env = "msvc")),
990    assert_instr(vpdpwuuds)
991)]
992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
993pub unsafe fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
994    transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
995}
996
997/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
998/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
999/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
1000///
1001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749)
1002#[inline]
1003#[target_feature(enable = "avxvnniint16")]
1004#[cfg_attr(
1005    all(test, any(target_os = "linux", target_env = "msvc")),
1006    assert_instr(vpdpwuuds)
1007)]
1008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1009pub unsafe fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
1010    transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
1011}
1012
1013#[allow(improper_ctypes)]
1014extern "C" {
1015    #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
1016    fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
1017    #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
1018    fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1019    #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
1020    fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1021
1022    #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
1023    fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
1024    #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
1025    fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1026    #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
1027    fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1028
1029    #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
1030    fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
1031    #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
1032    fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1033    #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
1034    fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1035
1036    #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
1037    fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
1038    #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
1039    fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1040    #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
1041    fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1042
1043    #[link_name = "llvm.x86.avx2.vpdpbssd.128"]
1044    fn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1045    #[link_name = "llvm.x86.avx2.vpdpbssd.256"]
1046    fn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1047
1048    #[link_name = "llvm.x86.avx2.vpdpbssds.128"]
1049    fn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1050    #[link_name = "llvm.x86.avx2.vpdpbssds.256"]
1051    fn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1052
1053    #[link_name = "llvm.x86.avx2.vpdpbsud.128"]
1054    fn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1055    #[link_name = "llvm.x86.avx2.vpdpbsud.256"]
1056    fn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1057
1058    #[link_name = "llvm.x86.avx2.vpdpbsuds.128"]
1059    fn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1060    #[link_name = "llvm.x86.avx2.vpdpbsuds.256"]
1061    fn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1062
1063    #[link_name = "llvm.x86.avx2.vpdpbuud.128"]
1064    fn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1065    #[link_name = "llvm.x86.avx2.vpdpbuud.256"]
1066    fn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1067
1068    #[link_name = "llvm.x86.avx2.vpdpbuuds.128"]
1069    fn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1070    #[link_name = "llvm.x86.avx2.vpdpbuuds.256"]
1071    fn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1072
1073    #[link_name = "llvm.x86.avx2.vpdpwsud.128"]
1074    fn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1075    #[link_name = "llvm.x86.avx2.vpdpwsud.256"]
1076    fn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1077
1078    #[link_name = "llvm.x86.avx2.vpdpwsuds.128"]
1079    fn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1080    #[link_name = "llvm.x86.avx2.vpdpwsuds.256"]
1081    fn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1082
1083    #[link_name = "llvm.x86.avx2.vpdpwusd.128"]
1084    fn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1085    #[link_name = "llvm.x86.avx2.vpdpwusd.256"]
1086    fn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1087
1088    #[link_name = "llvm.x86.avx2.vpdpwusds.128"]
1089    fn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1090    #[link_name = "llvm.x86.avx2.vpdpwusds.256"]
1091    fn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1092
1093    #[link_name = "llvm.x86.avx2.vpdpwuud.128"]
1094    fn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1095    #[link_name = "llvm.x86.avx2.vpdpwuud.256"]
1096    fn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1097
1098    #[link_name = "llvm.x86.avx2.vpdpwuuds.128"]
1099    fn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1100    #[link_name = "llvm.x86.avx2.vpdpwuuds.256"]
1101    fn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1102}
1103
1104#[cfg(test)]
1105mod tests {
1106
1107    use crate::core_arch::x86::*;
1108    use stdarch_test::simd_test;
1109
1110    #[simd_test(enable = "avx512vnni")]
1111    unsafe fn test_mm512_dpwssd_epi32() {
1112        let src = _mm512_set1_epi32(1);
1113        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1114        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1115        let r = _mm512_dpwssd_epi32(src, a, b);
1116        let e = _mm512_set1_epi32(3);
1117        assert_eq_m512i(r, e);
1118    }
1119
1120    #[simd_test(enable = "avx512vnni")]
1121    unsafe fn test_mm512_mask_dpwssd_epi32() {
1122        let src = _mm512_set1_epi32(1);
1123        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1124        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1125        let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b);
1126        assert_eq_m512i(r, src);
1127        let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b);
1128        let e = _mm512_set1_epi32(3);
1129        assert_eq_m512i(r, e);
1130    }
1131
1132    #[simd_test(enable = "avx512vnni")]
1133    unsafe fn test_mm512_maskz_dpwssd_epi32() {
1134        let src = _mm512_set1_epi32(1);
1135        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1136        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1137        let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b);
1138        assert_eq_m512i(r, _mm512_setzero_si512());
1139        let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b);
1140        let e = _mm512_set1_epi32(3);
1141        assert_eq_m512i(r, e);
1142    }
1143
1144    #[simd_test(enable = "avxvnni")]
1145    unsafe fn test_mm256_dpwssd_avx_epi32() {
1146        let src = _mm256_set1_epi32(1);
1147        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1148        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1149        let r = _mm256_dpwssd_avx_epi32(src, a, b);
1150        let e = _mm256_set1_epi32(3);
1151        assert_eq_m256i(r, e);
1152    }
1153
1154    #[simd_test(enable = "avx512vnni,avx512vl")]
1155    unsafe fn test_mm256_dpwssd_epi32() {
1156        let src = _mm256_set1_epi32(1);
1157        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1158        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1159        let r = _mm256_dpwssd_epi32(src, a, b);
1160        let e = _mm256_set1_epi32(3);
1161        assert_eq_m256i(r, e);
1162    }
1163
1164    #[simd_test(enable = "avx512vnni,avx512vl")]
1165    unsafe fn test_mm256_mask_dpwssd_epi32() {
1166        let src = _mm256_set1_epi32(1);
1167        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1168        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1169        let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b);
1170        assert_eq_m256i(r, src);
1171        let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b);
1172        let e = _mm256_set1_epi32(3);
1173        assert_eq_m256i(r, e);
1174    }
1175
1176    #[simd_test(enable = "avx512vnni,avx512vl")]
1177    unsafe fn test_mm256_maskz_dpwssd_epi32() {
1178        let src = _mm256_set1_epi32(1);
1179        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1180        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1181        let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b);
1182        assert_eq_m256i(r, _mm256_setzero_si256());
1183        let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b);
1184        let e = _mm256_set1_epi32(3);
1185        assert_eq_m256i(r, e);
1186    }
1187
1188    #[simd_test(enable = "avxvnni")]
1189    unsafe fn test_mm_dpwssd_avx_epi32() {
1190        let src = _mm_set1_epi32(1);
1191        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1192        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1193        let r = _mm_dpwssd_avx_epi32(src, a, b);
1194        let e = _mm_set1_epi32(3);
1195        assert_eq_m128i(r, e);
1196    }
1197
1198    #[simd_test(enable = "avx512vnni,avx512vl")]
1199    unsafe fn test_mm_dpwssd_epi32() {
1200        let src = _mm_set1_epi32(1);
1201        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1202        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1203        let r = _mm_dpwssd_epi32(src, a, b);
1204        let e = _mm_set1_epi32(3);
1205        assert_eq_m128i(r, e);
1206    }
1207
1208    #[simd_test(enable = "avx512vnni,avx512vl")]
1209    unsafe fn test_mm_mask_dpwssd_epi32() {
1210        let src = _mm_set1_epi32(1);
1211        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1212        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1213        let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b);
1214        assert_eq_m128i(r, src);
1215        let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b);
1216        let e = _mm_set1_epi32(3);
1217        assert_eq_m128i(r, e);
1218    }
1219
1220    #[simd_test(enable = "avx512vnni,avx512vl")]
1221    unsafe fn test_mm_maskz_dpwssd_epi32() {
1222        let src = _mm_set1_epi32(1);
1223        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1224        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1225        let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b);
1226        assert_eq_m128i(r, _mm_setzero_si128());
1227        let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b);
1228        let e = _mm_set1_epi32(3);
1229        assert_eq_m128i(r, e);
1230    }
1231
1232    #[simd_test(enable = "avx512vnni")]
1233    unsafe fn test_mm512_dpwssds_epi32() {
1234        let src = _mm512_set1_epi32(1);
1235        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1236        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1237        let r = _mm512_dpwssds_epi32(src, a, b);
1238        let e = _mm512_set1_epi32(3);
1239        assert_eq_m512i(r, e);
1240    }
1241
1242    #[simd_test(enable = "avx512vnni")]
1243    unsafe fn test_mm512_mask_dpwssds_epi32() {
1244        let src = _mm512_set1_epi32(1);
1245        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1246        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1247        let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b);
1248        assert_eq_m512i(r, src);
1249        let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b);
1250        let e = _mm512_set1_epi32(3);
1251        assert_eq_m512i(r, e);
1252    }
1253
1254    #[simd_test(enable = "avx512vnni")]
1255    unsafe fn test_mm512_maskz_dpwssds_epi32() {
1256        let src = _mm512_set1_epi32(1);
1257        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1258        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1259        let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b);
1260        assert_eq_m512i(r, _mm512_setzero_si512());
1261        let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b);
1262        let e = _mm512_set1_epi32(3);
1263        assert_eq_m512i(r, e);
1264    }
1265
1266    #[simd_test(enable = "avxvnni")]
1267    unsafe fn test_mm256_dpwssds_avx_epi32() {
1268        let src = _mm256_set1_epi32(1);
1269        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1270        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1271        let r = _mm256_dpwssds_avx_epi32(src, a, b);
1272        let e = _mm256_set1_epi32(3);
1273        assert_eq_m256i(r, e);
1274    }
1275
1276    #[simd_test(enable = "avx512vnni,avx512vl")]
1277    unsafe fn test_mm256_dpwssds_epi32() {
1278        let src = _mm256_set1_epi32(1);
1279        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1280        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1281        let r = _mm256_dpwssds_epi32(src, a, b);
1282        let e = _mm256_set1_epi32(3);
1283        assert_eq_m256i(r, e);
1284    }
1285
1286    #[simd_test(enable = "avx512vnni,avx512vl")]
1287    unsafe fn test_mm256_mask_dpwssds_epi32() {
1288        let src = _mm256_set1_epi32(1);
1289        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1290        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1291        let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b);
1292        assert_eq_m256i(r, src);
1293        let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b);
1294        let e = _mm256_set1_epi32(3);
1295        assert_eq_m256i(r, e);
1296    }
1297
1298    #[simd_test(enable = "avx512vnni,avx512vl")]
1299    unsafe fn test_mm256_maskz_dpwssds_epi32() {
1300        let src = _mm256_set1_epi32(1);
1301        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1302        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1303        let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b);
1304        assert_eq_m256i(r, _mm256_setzero_si256());
1305        let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b);
1306        let e = _mm256_set1_epi32(3);
1307        assert_eq_m256i(r, e);
1308    }
1309
1310    #[simd_test(enable = "avxvnni")]
1311    unsafe fn test_mm_dpwssds_avx_epi32() {
1312        let src = _mm_set1_epi32(1);
1313        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1314        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1315        let r = _mm_dpwssds_avx_epi32(src, a, b);
1316        let e = _mm_set1_epi32(3);
1317        assert_eq_m128i(r, e);
1318    }
1319
1320    #[simd_test(enable = "avx512vnni,avx512vl")]
1321    unsafe fn test_mm_dpwssds_epi32() {
1322        let src = _mm_set1_epi32(1);
1323        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1324        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1325        let r = _mm_dpwssds_epi32(src, a, b);
1326        let e = _mm_set1_epi32(3);
1327        assert_eq_m128i(r, e);
1328    }
1329
1330    #[simd_test(enable = "avx512vnni,avx512vl")]
1331    unsafe fn test_mm_mask_dpwssds_epi32() {
1332        let src = _mm_set1_epi32(1);
1333        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1334        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1335        let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b);
1336        assert_eq_m128i(r, src);
1337        let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b);
1338        let e = _mm_set1_epi32(3);
1339        assert_eq_m128i(r, e);
1340    }
1341
1342    #[simd_test(enable = "avx512vnni,avx512vl")]
1343    unsafe fn test_mm_maskz_dpwssds_epi32() {
1344        let src = _mm_set1_epi32(1);
1345        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1346        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1347        let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b);
1348        assert_eq_m128i(r, _mm_setzero_si128());
1349        let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b);
1350        let e = _mm_set1_epi32(3);
1351        assert_eq_m128i(r, e);
1352    }
1353
1354    #[simd_test(enable = "avx512vnni")]
1355    unsafe fn test_mm512_dpbusd_epi32() {
1356        let src = _mm512_set1_epi32(1);
1357        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1358        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1359        let r = _mm512_dpbusd_epi32(src, a, b);
1360        let e = _mm512_set1_epi32(5);
1361        assert_eq_m512i(r, e);
1362    }
1363
1364    #[simd_test(enable = "avx512vnni")]
1365    unsafe fn test_mm512_mask_dpbusd_epi32() {
1366        let src = _mm512_set1_epi32(1);
1367        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1368        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1369        let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b);
1370        assert_eq_m512i(r, src);
1371        let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b);
1372        let e = _mm512_set1_epi32(5);
1373        assert_eq_m512i(r, e);
1374    }
1375
1376    #[simd_test(enable = "avx512vnni")]
1377    unsafe fn test_mm512_maskz_dpbusd_epi32() {
1378        let src = _mm512_set1_epi32(1);
1379        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1380        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1381        let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b);
1382        assert_eq_m512i(r, _mm512_setzero_si512());
1383        let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b);
1384        let e = _mm512_set1_epi32(5);
1385        assert_eq_m512i(r, e);
1386    }
1387
1388    #[simd_test(enable = "avxvnni")]
1389    unsafe fn test_mm256_dpbusd_avx_epi32() {
1390        let src = _mm256_set1_epi32(1);
1391        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1392        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1393        let r = _mm256_dpbusd_avx_epi32(src, a, b);
1394        let e = _mm256_set1_epi32(5);
1395        assert_eq_m256i(r, e);
1396    }
1397
1398    #[simd_test(enable = "avx512vnni,avx512vl")]
1399    unsafe fn test_mm256_dpbusd_epi32() {
1400        let src = _mm256_set1_epi32(1);
1401        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1402        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1403        let r = _mm256_dpbusd_epi32(src, a, b);
1404        let e = _mm256_set1_epi32(5);
1405        assert_eq_m256i(r, e);
1406    }
1407
1408    #[simd_test(enable = "avx512vnni,avx512vl")]
1409    unsafe fn test_mm256_mask_dpbusd_epi32() {
1410        let src = _mm256_set1_epi32(1);
1411        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1412        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1413        let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b);
1414        assert_eq_m256i(r, src);
1415        let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b);
1416        let e = _mm256_set1_epi32(5);
1417        assert_eq_m256i(r, e);
1418    }
1419
1420    #[simd_test(enable = "avx512vnni,avx512vl")]
1421    unsafe fn test_mm256_maskz_dpbusd_epi32() {
1422        let src = _mm256_set1_epi32(1);
1423        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1424        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1425        let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b);
1426        assert_eq_m256i(r, _mm256_setzero_si256());
1427        let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b);
1428        let e = _mm256_set1_epi32(5);
1429        assert_eq_m256i(r, e);
1430    }
1431
1432    #[simd_test(enable = "avxvnni")]
1433    unsafe fn test_mm_dpbusd_avx_epi32() {
1434        let src = _mm_set1_epi32(1);
1435        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1436        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1437        let r = _mm_dpbusd_avx_epi32(src, a, b);
1438        let e = _mm_set1_epi32(5);
1439        assert_eq_m128i(r, e);
1440    }
1441
1442    #[simd_test(enable = "avx512vnni,avx512vl")]
1443    unsafe fn test_mm_dpbusd_epi32() {
1444        let src = _mm_set1_epi32(1);
1445        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1446        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1447        let r = _mm_dpbusd_epi32(src, a, b);
1448        let e = _mm_set1_epi32(5);
1449        assert_eq_m128i(r, e);
1450    }
1451
1452    #[simd_test(enable = "avx512vnni,avx512vl")]
1453    unsafe fn test_mm_mask_dpbusd_epi32() {
1454        let src = _mm_set1_epi32(1);
1455        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1456        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1457        let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b);
1458        assert_eq_m128i(r, src);
1459        let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b);
1460        let e = _mm_set1_epi32(5);
1461        assert_eq_m128i(r, e);
1462    }
1463
1464    #[simd_test(enable = "avx512vnni,avx512vl")]
1465    unsafe fn test_mm_maskz_dpbusd_epi32() {
1466        let src = _mm_set1_epi32(1);
1467        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1468        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1469        let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b);
1470        assert_eq_m128i(r, _mm_setzero_si128());
1471        let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b);
1472        let e = _mm_set1_epi32(5);
1473        assert_eq_m128i(r, e);
1474    }
1475
1476    #[simd_test(enable = "avx512vnni")]
1477    unsafe fn test_mm512_dpbusds_epi32() {
1478        let src = _mm512_set1_epi32(1);
1479        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1480        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1481        let r = _mm512_dpbusds_epi32(src, a, b);
1482        let e = _mm512_set1_epi32(5);
1483        assert_eq_m512i(r, e);
1484    }
1485
1486    #[simd_test(enable = "avx512vnni")]
1487    unsafe fn test_mm512_mask_dpbusds_epi32() {
1488        let src = _mm512_set1_epi32(1);
1489        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1490        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1491        let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b);
1492        assert_eq_m512i(r, src);
1493        let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b);
1494        let e = _mm512_set1_epi32(5);
1495        assert_eq_m512i(r, e);
1496    }
1497
1498    #[simd_test(enable = "avx512vnni")]
1499    unsafe fn test_mm512_maskz_dpbusds_epi32() {
1500        let src = _mm512_set1_epi32(1);
1501        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1502        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1503        let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b);
1504        assert_eq_m512i(r, _mm512_setzero_si512());
1505        let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b);
1506        let e = _mm512_set1_epi32(5);
1507        assert_eq_m512i(r, e);
1508    }
1509
1510    #[simd_test(enable = "avxvnni")]
1511    unsafe fn test_mm256_dpbusds_avx_epi32() {
1512        let src = _mm256_set1_epi32(1);
1513        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1514        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1515        let r = _mm256_dpbusds_avx_epi32(src, a, b);
1516        let e = _mm256_set1_epi32(5);
1517        assert_eq_m256i(r, e);
1518    }
1519
1520    #[simd_test(enable = "avx512vnni,avx512vl")]
1521    unsafe fn test_mm256_dpbusds_epi32() {
1522        let src = _mm256_set1_epi32(1);
1523        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1524        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1525        let r = _mm256_dpbusds_epi32(src, a, b);
1526        let e = _mm256_set1_epi32(5);
1527        assert_eq_m256i(r, e);
1528    }
1529
1530    #[simd_test(enable = "avx512vnni,avx512vl")]
1531    unsafe fn test_mm256_mask_dpbusds_epi32() {
1532        let src = _mm256_set1_epi32(1);
1533        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1534        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1535        let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b);
1536        assert_eq_m256i(r, src);
1537        let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b);
1538        let e = _mm256_set1_epi32(5);
1539        assert_eq_m256i(r, e);
1540    }
1541
1542    #[simd_test(enable = "avx512vnni,avx512vl")]
1543    unsafe fn test_mm256_maskz_dpbusds_epi32() {
1544        let src = _mm256_set1_epi32(1);
1545        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1546        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1547        let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b);
1548        assert_eq_m256i(r, _mm256_setzero_si256());
1549        let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b);
1550        let e = _mm256_set1_epi32(5);
1551        assert_eq_m256i(r, e);
1552    }
1553
1554    #[simd_test(enable = "avxvnni")]
1555    unsafe fn test_mm_dpbusds_avx_epi32() {
1556        let src = _mm_set1_epi32(1);
1557        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1558        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1559        let r = _mm_dpbusds_avx_epi32(src, a, b);
1560        let e = _mm_set1_epi32(5);
1561        assert_eq_m128i(r, e);
1562    }
1563
1564    #[simd_test(enable = "avx512vnni,avx512vl")]
1565    unsafe fn test_mm_dpbusds_epi32() {
1566        let src = _mm_set1_epi32(1);
1567        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1568        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1569        let r = _mm_dpbusds_epi32(src, a, b);
1570        let e = _mm_set1_epi32(5);
1571        assert_eq_m128i(r, e);
1572    }
1573
1574    #[simd_test(enable = "avx512vnni,avx512vl")]
1575    unsafe fn test_mm_mask_dpbusds_epi32() {
1576        let src = _mm_set1_epi32(1);
1577        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1578        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1579        let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b);
1580        assert_eq_m128i(r, src);
1581        let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b);
1582        let e = _mm_set1_epi32(5);
1583        assert_eq_m128i(r, e);
1584    }
1585
1586    #[simd_test(enable = "avx512vnni,avx512vl")]
1587    unsafe fn test_mm_maskz_dpbusds_epi32() {
1588        let src = _mm_set1_epi32(1);
1589        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1590        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1591        let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b);
1592        assert_eq_m128i(r, _mm_setzero_si128());
1593        let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b);
1594        let e = _mm_set1_epi32(5);
1595        assert_eq_m128i(r, e);
1596    }
1597
1598    #[simd_test(enable = "avxvnniint8")]
1599    unsafe fn test_mm_dpbssd_epi32() {
1600        let src = _mm_set1_epi32(1);
1601        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1602        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1603        let r = _mm_dpbssd_epi32(src, a, b);
1604        let e = _mm_set1_epi32(5);
1605        assert_eq_m128i(r, e);
1606    }
1607
1608    #[simd_test(enable = "avxvnniint8")]
1609    unsafe fn test_mm256_dpbssd_epi32() {
1610        let src = _mm256_set1_epi32(1);
1611        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1612        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1613        let r = _mm256_dpbssd_epi32(src, a, b);
1614        let e = _mm256_set1_epi32(5);
1615        assert_eq_m256i(r, e);
1616    }
1617
1618    #[simd_test(enable = "avxvnniint8")]
1619    unsafe fn test_mm_dpbssds_epi32() {
1620        let src = _mm_set1_epi32(1);
1621        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1622        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1623        let r = _mm_dpbssds_epi32(src, a, b);
1624        let e = _mm_set1_epi32(5);
1625        assert_eq_m128i(r, e);
1626    }
1627
1628    #[simd_test(enable = "avxvnniint8")]
1629    unsafe fn test_mm256_dpbssds_epi32() {
1630        let src = _mm256_set1_epi32(1);
1631        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1632        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1633        let r = _mm256_dpbssds_epi32(src, a, b);
1634        let e = _mm256_set1_epi32(5);
1635        assert_eq_m256i(r, e);
1636    }
1637
1638    #[simd_test(enable = "avxvnniint8")]
1639    unsafe fn test_mm_dpbsud_epi32() {
1640        let src = _mm_set1_epi32(1);
1641        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1642        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1643        let r = _mm_dpbsud_epi32(src, a, b);
1644        let e = _mm_set1_epi32(5);
1645        assert_eq_m128i(r, e);
1646    }
1647
1648    #[simd_test(enable = "avxvnniint8")]
1649    unsafe fn test_mm256_dpbsud_epi32() {
1650        let src = _mm256_set1_epi32(1);
1651        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1652        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1653        let r = _mm256_dpbsud_epi32(src, a, b);
1654        let e = _mm256_set1_epi32(5);
1655        assert_eq_m256i(r, e);
1656    }
1657
1658    #[simd_test(enable = "avxvnniint8")]
1659    unsafe fn test_mm_dpbsuds_epi32() {
1660        let src = _mm_set1_epi32(1);
1661        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1662        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1663        let r = _mm_dpbsuds_epi32(src, a, b);
1664        let e = _mm_set1_epi32(5);
1665        assert_eq_m128i(r, e);
1666    }
1667
1668    #[simd_test(enable = "avxvnniint8")]
1669    unsafe fn test_mm256_dpbsuds_epi32() {
1670        let src = _mm256_set1_epi32(1);
1671        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1672        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1673        let r = _mm256_dpbsuds_epi32(src, a, b);
1674        let e = _mm256_set1_epi32(5);
1675        assert_eq_m256i(r, e);
1676    }
1677
1678    #[simd_test(enable = "avxvnniint8")]
1679    unsafe fn test_mm_dpbuud_epi32() {
1680        let src = _mm_set1_epi32(1);
1681        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1682        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1683        let r = _mm_dpbuud_epi32(src, a, b);
1684        let e = _mm_set1_epi32(5);
1685        assert_eq_m128i(r, e);
1686    }
1687
1688    #[simd_test(enable = "avxvnniint8")]
1689    unsafe fn test_mm256_dpbuud_epi32() {
1690        let src = _mm256_set1_epi32(1);
1691        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1692        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1693        let r = _mm256_dpbuud_epi32(src, a, b);
1694        let e = _mm256_set1_epi32(5);
1695        assert_eq_m256i(r, e);
1696    }
1697
1698    #[simd_test(enable = "avxvnniint8")]
1699    unsafe fn test_mm_dpbuuds_epi32() {
1700        let src = _mm_set1_epi32(1);
1701        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1702        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1703        let r = _mm_dpbuuds_epi32(src, a, b);
1704        let e = _mm_set1_epi32(5);
1705        assert_eq_m128i(r, e);
1706    }
1707
1708    #[simd_test(enable = "avxvnniint8")]
1709    unsafe fn test_mm256_dpbuuds_epi32() {
1710        let src = _mm256_set1_epi32(1);
1711        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1712        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1713        let r = _mm256_dpbuuds_epi32(src, a, b);
1714        let e = _mm256_set1_epi32(5);
1715        assert_eq_m256i(r, e);
1716    }
1717
1718    #[simd_test(enable = "avxvnniint16")]
1719    unsafe fn test_mm_dpwsud_epi32() {
1720        let src = _mm_set1_epi32(1);
1721        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1722        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1723        let r = _mm_dpwsud_epi32(src, a, b);
1724        let e = _mm_set1_epi32(3);
1725        assert_eq_m128i(r, e);
1726    }
1727
1728    #[simd_test(enable = "avxvnniint16")]
1729    unsafe fn test_mm256_dpwsud_epi32() {
1730        let src = _mm256_set1_epi32(1);
1731        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1732        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1733        let r = _mm256_dpwsud_epi32(src, a, b);
1734        let e = _mm256_set1_epi32(3);
1735        assert_eq_m256i(r, e);
1736    }
1737
1738    #[simd_test(enable = "avxvnniint16")]
1739    unsafe fn test_mm_dpwsuds_epi32() {
1740        let src = _mm_set1_epi32(1);
1741        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1742        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1743        let r = _mm_dpwsuds_epi32(src, a, b);
1744        let e = _mm_set1_epi32(3);
1745        assert_eq_m128i(r, e);
1746    }
1747
1748    #[simd_test(enable = "avxvnniint16")]
1749    unsafe fn test_mm256_dpwsuds_epi32() {
1750        let src = _mm256_set1_epi32(1);
1751        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1752        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1753        let r = _mm256_dpwsuds_epi32(src, a, b);
1754        let e = _mm256_set1_epi32(3);
1755        assert_eq_m256i(r, e);
1756    }
1757
1758    #[simd_test(enable = "avxvnniint16")]
1759    unsafe fn test_mm_dpwusd_epi32() {
1760        let src = _mm_set1_epi32(1);
1761        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1762        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1763        let r = _mm_dpwusd_epi32(src, a, b);
1764        let e = _mm_set1_epi32(3);
1765        assert_eq_m128i(r, e);
1766    }
1767
1768    #[simd_test(enable = "avxvnniint16")]
1769    unsafe fn test_mm256_dpwusd_epi32() {
1770        let src = _mm256_set1_epi32(1);
1771        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1772        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1773        let r = _mm256_dpwusd_epi32(src, a, b);
1774        let e = _mm256_set1_epi32(3);
1775        assert_eq_m256i(r, e);
1776    }
1777
1778    #[simd_test(enable = "avxvnniint16")]
1779    unsafe fn test_mm_dpwusds_epi32() {
1780        let src = _mm_set1_epi32(1);
1781        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1782        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1783        let r = _mm_dpwusds_epi32(src, a, b);
1784        let e = _mm_set1_epi32(3);
1785        assert_eq_m128i(r, e);
1786    }
1787
1788    #[simd_test(enable = "avxvnniint16")]
1789    unsafe fn test_mm256_dpwusds_epi32() {
1790        let src = _mm256_set1_epi32(1);
1791        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1792        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1793        let r = _mm256_dpwusds_epi32(src, a, b);
1794        let e = _mm256_set1_epi32(3);
1795        assert_eq_m256i(r, e);
1796    }
1797
1798    #[simd_test(enable = "avxvnniint16")]
1799    unsafe fn test_mm_dpwuud_epi32() {
1800        let src = _mm_set1_epi32(1);
1801        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1802        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1803        let r = _mm_dpwuud_epi32(src, a, b);
1804        let e = _mm_set1_epi32(3);
1805        assert_eq_m128i(r, e);
1806    }
1807
1808    #[simd_test(enable = "avxvnniint16")]
1809    unsafe fn test_mm256_dpwuud_epi32() {
1810        let src = _mm256_set1_epi32(1);
1811        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1812        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1813        let r = _mm256_dpwuud_epi32(src, a, b);
1814        let e = _mm256_set1_epi32(3);
1815        assert_eq_m256i(r, e);
1816    }
1817
1818    #[simd_test(enable = "avxvnniint16")]
1819    unsafe fn test_mm_dpwuuds_epi32() {
1820        let src = _mm_set1_epi32(1);
1821        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1822        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1823        let r = _mm_dpwuuds_epi32(src, a, b);
1824        let e = _mm_set1_epi32(3);
1825        assert_eq_m128i(r, e);
1826    }
1827
1828    #[simd_test(enable = "avxvnniint16")]
1829    unsafe fn test_mm256_dpwuuds_epi32() {
1830        let src = _mm256_set1_epi32(1);
1831        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1832        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1833        let r = _mm256_dpwuuds_epi32(src, a, b);
1834        let e = _mm256_set1_epi32(3);
1835        assert_eq_m256i(r, e);
1836    }
1837}