1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm256_abs_epi32(a: __m256i) -> __m256i {
36 unsafe {
37 let a = a.as_i32x8();
38 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
39 transmute(r)
40 }
41}
42
43#[inline]
47#[target_feature(enable = "avx2")]
48#[cfg_attr(test, assert_instr(vpabsw))]
49#[stable(feature = "simd_x86", since = "1.27.0")]
50#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
51pub const fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52 unsafe {
53 let a = a.as_i16x16();
54 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
55 transmute(r)
56 }
57}
58
59#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
67pub const fn _mm256_abs_epi8(a: __m256i) -> __m256i {
68 unsafe {
69 let a = a.as_i8x32();
70 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
71 transmute(r)
72 }
73}
74
75#[inline]
79#[target_feature(enable = "avx2")]
80#[cfg_attr(test, assert_instr(vpaddq))]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
84 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
85}
86
87#[inline]
91#[target_feature(enable = "avx2")]
92#[cfg_attr(test, assert_instr(vpaddd))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
95pub const fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
96 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
97}
98
99#[inline]
103#[target_feature(enable = "avx2")]
104#[cfg_attr(test, assert_instr(vpaddw))]
105#[stable(feature = "simd_x86", since = "1.27.0")]
106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
107pub const fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
108 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
109}
110
111#[inline]
115#[target_feature(enable = "avx2")]
116#[cfg_attr(test, assert_instr(vpaddb))]
117#[stable(feature = "simd_x86", since = "1.27.0")]
118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
119pub const fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
120 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
121}
122
123#[inline]
127#[target_feature(enable = "avx2")]
128#[cfg_attr(test, assert_instr(vpaddsb))]
129#[stable(feature = "simd_x86", since = "1.27.0")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
132 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
133}
134
135#[inline]
139#[target_feature(enable = "avx2")]
140#[cfg_attr(test, assert_instr(vpaddsw))]
141#[stable(feature = "simd_x86", since = "1.27.0")]
142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
143pub const fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
144 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
145}
146
147#[inline]
151#[target_feature(enable = "avx2")]
152#[cfg_attr(test, assert_instr(vpaddusb))]
153#[stable(feature = "simd_x86", since = "1.27.0")]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
156 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
157}
158
159#[inline]
163#[target_feature(enable = "avx2")]
164#[cfg_attr(test, assert_instr(vpaddusw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
167pub const fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
168 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
169}
170
171#[inline]
176#[target_feature(enable = "avx2")]
177#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
178#[rustc_legacy_const_generics(2)]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
181pub const fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
182 static_assert_uimm_bits!(IMM8, 8);
183
184 if IMM8 >= 32 {
187 return _mm256_setzero_si256();
188 }
189 let (a, b) = if IMM8 > 16 {
192 (_mm256_setzero_si256(), a)
193 } else {
194 (a, b)
195 };
196 unsafe {
197 if IMM8 == 16 {
198 return transmute(a);
199 }
200 }
201 const fn mask(shift: u32, i: u32) -> u32 {
202 let shift = shift % 16;
203 let mod_i = i % 16;
204 if mod_i < (16 - shift) {
205 i + shift
206 } else {
207 i + 16 + shift
208 }
209 }
210
211 unsafe {
212 let r: i8x32 = simd_shuffle!(
213 b.as_i8x32(),
214 a.as_i8x32(),
215 [
216 mask(IMM8 as u32, 0),
217 mask(IMM8 as u32, 1),
218 mask(IMM8 as u32, 2),
219 mask(IMM8 as u32, 3),
220 mask(IMM8 as u32, 4),
221 mask(IMM8 as u32, 5),
222 mask(IMM8 as u32, 6),
223 mask(IMM8 as u32, 7),
224 mask(IMM8 as u32, 8),
225 mask(IMM8 as u32, 9),
226 mask(IMM8 as u32, 10),
227 mask(IMM8 as u32, 11),
228 mask(IMM8 as u32, 12),
229 mask(IMM8 as u32, 13),
230 mask(IMM8 as u32, 14),
231 mask(IMM8 as u32, 15),
232 mask(IMM8 as u32, 16),
233 mask(IMM8 as u32, 17),
234 mask(IMM8 as u32, 18),
235 mask(IMM8 as u32, 19),
236 mask(IMM8 as u32, 20),
237 mask(IMM8 as u32, 21),
238 mask(IMM8 as u32, 22),
239 mask(IMM8 as u32, 23),
240 mask(IMM8 as u32, 24),
241 mask(IMM8 as u32, 25),
242 mask(IMM8 as u32, 26),
243 mask(IMM8 as u32, 27),
244 mask(IMM8 as u32, 28),
245 mask(IMM8 as u32, 29),
246 mask(IMM8 as u32, 30),
247 mask(IMM8 as u32, 31),
248 ],
249 );
250 transmute(r)
251 }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
264 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
265}
266
267#[inline]
272#[target_feature(enable = "avx2")]
273#[cfg_attr(test, assert_instr(vandnps))]
274#[stable(feature = "simd_x86", since = "1.27.0")]
275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
276pub const fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
277 unsafe {
278 let all_ones = _mm256_set1_epi8(-1);
279 transmute(simd_and(
280 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
281 b.as_i64x4(),
282 ))
283 }
284}
285
286#[inline]
290#[target_feature(enable = "avx2")]
291#[cfg_attr(test, assert_instr(vpavgw))]
292#[stable(feature = "simd_x86", since = "1.27.0")]
293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
294pub const fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
295 unsafe {
296 let a = simd_cast::<_, u32x16>(a.as_u16x16());
297 let b = simd_cast::<_, u32x16>(b.as_u16x16());
298 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
299 transmute(simd_cast::<_, u16x16>(r))
300 }
301}
302
303#[inline]
307#[target_feature(enable = "avx2")]
308#[cfg_attr(test, assert_instr(vpavgb))]
309#[stable(feature = "simd_x86", since = "1.27.0")]
310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
311pub const fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
312 unsafe {
313 let a = simd_cast::<_, u16x32>(a.as_u8x32());
314 let b = simd_cast::<_, u16x32>(b.as_u8x32());
315 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
316 transmute(simd_cast::<_, u8x32>(r))
317 }
318}
319
320#[inline]
324#[target_feature(enable = "avx2")]
325#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
326#[rustc_legacy_const_generics(2)]
327#[stable(feature = "simd_x86", since = "1.27.0")]
328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
329pub const fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
330 static_assert_uimm_bits!(IMM4, 4);
331 unsafe {
332 let a = a.as_i32x4();
333 let b = b.as_i32x4();
334 let r: i32x4 = simd_shuffle!(
335 a,
336 b,
337 [
338 [0, 4, 0, 4][IMM4 as usize & 0b11],
339 [1, 1, 5, 5][IMM4 as usize & 0b11],
340 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
341 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
342 ],
343 );
344 transmute(r)
345 }
346}
347
348#[inline]
352#[target_feature(enable = "avx2")]
353#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
354#[rustc_legacy_const_generics(2)]
355#[stable(feature = "simd_x86", since = "1.27.0")]
356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
357pub const fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
358 static_assert_uimm_bits!(IMM8, 8);
359 unsafe {
360 let a = a.as_i32x8();
361 let b = b.as_i32x8();
362 let r: i32x8 = simd_shuffle!(
363 a,
364 b,
365 [
366 [0, 8, 0, 8][IMM8 as usize & 0b11],
367 [1, 1, 9, 9][IMM8 as usize & 0b11],
368 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
369 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
370 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
371 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
372 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
373 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
374 ],
375 );
376 transmute(r)
377 }
378}
379
380#[inline]
384#[target_feature(enable = "avx2")]
385#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
386#[rustc_legacy_const_generics(2)]
387#[stable(feature = "simd_x86", since = "1.27.0")]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
390 static_assert_uimm_bits!(IMM8, 8);
391 unsafe {
392 let a = a.as_i16x16();
393 let b = b.as_i16x16();
394
395 let r: i16x16 = simd_shuffle!(
396 a,
397 b,
398 [
399 [0, 16, 0, 16][IMM8 as usize & 0b11],
400 [1, 1, 17, 17][IMM8 as usize & 0b11],
401 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
402 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
403 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
404 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
405 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
406 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
407 [8, 24, 8, 24][IMM8 as usize & 0b11],
408 [9, 9, 25, 25][IMM8 as usize & 0b11],
409 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
410 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
411 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
412 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
413 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
414 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
415 ],
416 );
417 transmute(r)
418 }
419}
420
421#[inline]
425#[target_feature(enable = "avx2")]
426#[cfg_attr(test, assert_instr(vpblendvb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
430 unsafe {
431 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
432 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
433 }
434}
435
436#[inline]
441#[target_feature(enable = "avx2")]
442#[cfg_attr(test, assert_instr(vpbroadcastb))]
443#[stable(feature = "simd_x86", since = "1.27.0")]
444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
445pub const fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
446 unsafe {
447 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
448 transmute::<i8x16, _>(ret)
449 }
450}
451
452#[inline]
457#[target_feature(enable = "avx2")]
458#[cfg_attr(test, assert_instr(vpbroadcastb))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
462 unsafe {
463 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
464 transmute::<i8x32, _>(ret)
465 }
466}
467
468#[inline]
475#[target_feature(enable = "avx2")]
476#[cfg_attr(test, assert_instr(vbroadcastss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
479pub const fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
480 unsafe {
481 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
482 transmute::<i32x4, _>(ret)
483 }
484}
485
486#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vbroadcastss))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
498 unsafe {
499 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
500 transmute::<i32x8, _>(ret)
501 }
502}
503
504#[inline]
509#[target_feature(enable = "avx2")]
510#[cfg_attr(test, assert_instr(vmovddup))]
513#[stable(feature = "simd_x86", since = "1.27.0")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
516 unsafe {
517 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
518 transmute::<i64x2, _>(ret)
519 }
520}
521
522#[inline]
527#[target_feature(enable = "avx2")]
528#[cfg_attr(test, assert_instr(vbroadcastsd))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
532 unsafe {
533 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
534 transmute::<i64x4, _>(ret)
535 }
536}
537
538#[inline]
543#[target_feature(enable = "avx2")]
544#[cfg_attr(test, assert_instr(vmovddup))]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
548 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
549}
550
551#[inline]
556#[target_feature(enable = "avx2")]
557#[cfg_attr(test, assert_instr(vbroadcastsd))]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
560pub const fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
561 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
562}
563
564#[inline]
569#[target_feature(enable = "avx2")]
570#[stable(feature = "simd_x86_updates", since = "1.82.0")]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
573 unsafe {
574 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
575 transmute::<i64x4, _>(ret)
576 }
577}
578
579#[inline]
586#[target_feature(enable = "avx2")]
587#[stable(feature = "simd_x86", since = "1.27.0")]
588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
589pub const fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
590 unsafe {
591 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
592 transmute::<i64x4, _>(ret)
593 }
594}
595
596#[inline]
601#[target_feature(enable = "avx2")]
602#[cfg_attr(test, assert_instr(vbroadcastss))]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
605pub const fn _mm_broadcastss_ps(a: __m128) -> __m128 {
606 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
607}
608
609#[inline]
614#[target_feature(enable = "avx2")]
615#[cfg_attr(test, assert_instr(vbroadcastss))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
618pub const fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
619 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
620}
621
622#[inline]
627#[target_feature(enable = "avx2")]
628#[cfg_attr(test, assert_instr(vpbroadcastw))]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
631pub const fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
632 unsafe {
633 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
634 transmute::<i16x8, _>(ret)
635 }
636}
637
638#[inline]
643#[target_feature(enable = "avx2")]
644#[cfg_attr(test, assert_instr(vpbroadcastw))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
648 unsafe {
649 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
650 transmute::<i16x16, _>(ret)
651 }
652}
653
654#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vpcmpeqq))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
663 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
664}
665
666#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vpcmpeqd))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
674pub const fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
675 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
676}
677
678#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpcmpeqw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
686pub const fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
687 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
688}
689
690#[inline]
694#[target_feature(enable = "avx2")]
695#[cfg_attr(test, assert_instr(vpcmpeqb))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
698pub const fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
699 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
700}
701
702#[inline]
706#[target_feature(enable = "avx2")]
707#[cfg_attr(test, assert_instr(vpcmpgtq))]
708#[stable(feature = "simd_x86", since = "1.27.0")]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
711 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
712}
713
714#[inline]
718#[target_feature(enable = "avx2")]
719#[cfg_attr(test, assert_instr(vpcmpgtd))]
720#[stable(feature = "simd_x86", since = "1.27.0")]
721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
722pub const fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
723 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
724}
725
726#[inline]
730#[target_feature(enable = "avx2")]
731#[cfg_attr(test, assert_instr(vpcmpgtw))]
732#[stable(feature = "simd_x86", since = "1.27.0")]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
735 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
736}
737
738#[inline]
742#[target_feature(enable = "avx2")]
743#[cfg_attr(test, assert_instr(vpcmpgtb))]
744#[stable(feature = "simd_x86", since = "1.27.0")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
747 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
748}
749
750#[inline]
754#[target_feature(enable = "avx2")]
755#[cfg_attr(test, assert_instr(vpmovsxwd))]
756#[stable(feature = "simd_x86", since = "1.27.0")]
757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
758pub const fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
759 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
760}
761
762#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpmovsxwq))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
770pub const fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
771 unsafe {
772 let a = a.as_i16x8();
773 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
774 transmute::<i64x4, _>(simd_cast(v64))
775 }
776}
777
778#[inline]
782#[target_feature(enable = "avx2")]
783#[cfg_attr(test, assert_instr(vpmovsxdq))]
784#[stable(feature = "simd_x86", since = "1.27.0")]
785#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
786pub const fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
787 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
788}
789
790#[inline]
794#[target_feature(enable = "avx2")]
795#[cfg_attr(test, assert_instr(vpmovsxbw))]
796#[stable(feature = "simd_x86", since = "1.27.0")]
797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
798pub const fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
799 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
800}
801
802#[inline]
806#[target_feature(enable = "avx2")]
807#[cfg_attr(test, assert_instr(vpmovsxbd))]
808#[stable(feature = "simd_x86", since = "1.27.0")]
809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
810pub const fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
811 unsafe {
812 let a = a.as_i8x16();
813 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
814 transmute::<i32x8, _>(simd_cast(v64))
815 }
816}
817
818#[inline]
822#[target_feature(enable = "avx2")]
823#[cfg_attr(test, assert_instr(vpmovsxbq))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
826pub const fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
827 unsafe {
828 let a = a.as_i8x16();
829 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
830 transmute::<i64x4, _>(simd_cast(v32))
831 }
832}
833
834#[inline]
839#[target_feature(enable = "avx2")]
840#[cfg_attr(test, assert_instr(vpmovzxwd))]
841#[stable(feature = "simd_x86", since = "1.27.0")]
842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
843pub const fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
844 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
845}
846
847#[inline]
852#[target_feature(enable = "avx2")]
853#[cfg_attr(test, assert_instr(vpmovzxwq))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
856pub const fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
857 unsafe {
858 let a = a.as_u16x8();
859 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
860 transmute::<i64x4, _>(simd_cast(v64))
861 }
862}
863
864#[inline]
868#[target_feature(enable = "avx2")]
869#[cfg_attr(test, assert_instr(vpmovzxdq))]
870#[stable(feature = "simd_x86", since = "1.27.0")]
871#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
872pub const fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
873 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
874}
875
876#[inline]
880#[target_feature(enable = "avx2")]
881#[cfg_attr(test, assert_instr(vpmovzxbw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
885 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
886}
887
888#[inline]
893#[target_feature(enable = "avx2")]
894#[cfg_attr(test, assert_instr(vpmovzxbd))]
895#[stable(feature = "simd_x86", since = "1.27.0")]
896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
897pub const fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
898 unsafe {
899 let a = a.as_u8x16();
900 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
901 transmute::<i32x8, _>(simd_cast(v64))
902 }
903}
904
905#[inline]
910#[target_feature(enable = "avx2")]
911#[cfg_attr(test, assert_instr(vpmovzxbq))]
912#[stable(feature = "simd_x86", since = "1.27.0")]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
915 unsafe {
916 let a = a.as_u8x16();
917 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
918 transmute::<i64x4, _>(simd_cast(v32))
919 }
920}
921
922#[inline]
926#[target_feature(enable = "avx2")]
927#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
928#[rustc_legacy_const_generics(1)]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
931pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
932 static_assert_uimm_bits!(IMM1, 1);
933 unsafe {
934 let a = a.as_i64x4();
935 let b = i64x4::ZERO;
936 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
937 transmute(dst)
938 }
939}
940
941#[inline]
945#[target_feature(enable = "avx2")]
946#[cfg_attr(test, assert_instr(vphaddw))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
950 let a = a.as_i16x16();
951 let b = b.as_i16x16();
952 unsafe {
953 let even: i16x16 = simd_shuffle!(
954 a,
955 b,
956 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
957 );
958 let odd: i16x16 = simd_shuffle!(
959 a,
960 b,
961 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
962 );
963 simd_add(even, odd).as_m256i()
964 }
965}
966
967#[inline]
971#[target_feature(enable = "avx2")]
972#[cfg_attr(test, assert_instr(vphaddd))]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
975pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
976 let a = a.as_i32x8();
977 let b = b.as_i32x8();
978 unsafe {
979 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
980 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
981 simd_add(even, odd).as_m256i()
982 }
983}
984
985#[inline]
990#[target_feature(enable = "avx2")]
991#[cfg_attr(test, assert_instr(vphaddsw))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
994 let a = a.as_i16x16();
995 let b = b.as_i16x16();
996 unsafe {
997 let even: i16x16 = simd_shuffle!(
998 a,
999 b,
1000 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1001 );
1002 let odd: i16x16 = simd_shuffle!(
1003 a,
1004 b,
1005 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1006 );
1007 simd_saturating_add(even, odd).as_m256i()
1008 }
1009}
1010
1011#[inline]
1015#[target_feature(enable = "avx2")]
1016#[cfg_attr(test, assert_instr(vphsubw))]
1017#[stable(feature = "simd_x86", since = "1.27.0")]
1018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1019pub const fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1020 let a = a.as_i16x16();
1021 let b = b.as_i16x16();
1022 unsafe {
1023 let even: i16x16 = simd_shuffle!(
1024 a,
1025 b,
1026 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1027 );
1028 let odd: i16x16 = simd_shuffle!(
1029 a,
1030 b,
1031 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1032 );
1033 simd_sub(even, odd).as_m256i()
1034 }
1035}
1036
1037#[inline]
1041#[target_feature(enable = "avx2")]
1042#[cfg_attr(test, assert_instr(vphsubd))]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1045pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1046 let a = a.as_i32x8();
1047 let b = b.as_i32x8();
1048 unsafe {
1049 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
1050 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
1051 simd_sub(even, odd).as_m256i()
1052 }
1053}
1054
1055#[inline]
1060#[target_feature(enable = "avx2")]
1061#[cfg_attr(test, assert_instr(vphsubsw))]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1064 let a = a.as_i16x16();
1065 let b = b.as_i16x16();
1066 unsafe {
1067 let even: i16x16 = simd_shuffle!(
1068 a,
1069 b,
1070 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1071 );
1072 let odd: i16x16 = simd_shuffle!(
1073 a,
1074 b,
1075 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1076 );
1077 simd_saturating_sub(even, odd).as_m256i()
1078 }
1079}
1080
1081#[inline]
1087#[target_feature(enable = "avx2")]
1088#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1089#[rustc_legacy_const_generics(2)]
1090#[stable(feature = "simd_x86", since = "1.27.0")]
1091pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1092 slice: *const i32,
1093 offsets: __m128i,
1094) -> __m128i {
1095 static_assert_imm8_scale!(SCALE);
1096 let zero = i32x4::ZERO;
1097 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1098 let offsets = offsets.as_i32x4();
1099 let slice = slice as *const i8;
1100 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1101 transmute(r)
1102}
1103
1104#[inline]
1111#[target_feature(enable = "avx2")]
1112#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1113#[rustc_legacy_const_generics(4)]
1114#[stable(feature = "simd_x86", since = "1.27.0")]
1115pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1116 src: __m128i,
1117 slice: *const i32,
1118 offsets: __m128i,
1119 mask: __m128i,
1120) -> __m128i {
1121 static_assert_imm8_scale!(SCALE);
1122 let src = src.as_i32x4();
1123 let mask = mask.as_i32x4();
1124 let offsets = offsets.as_i32x4();
1125 let slice = slice as *const i8;
1126 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1127 transmute(r)
1128}
1129
1130#[inline]
1136#[target_feature(enable = "avx2")]
1137#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1138#[rustc_legacy_const_generics(2)]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1140pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1141 slice: *const i32,
1142 offsets: __m256i,
1143) -> __m256i {
1144 static_assert_imm8_scale!(SCALE);
1145 let zero = i32x8::ZERO;
1146 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1147 let offsets = offsets.as_i32x8();
1148 let slice = slice as *const i8;
1149 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1150 transmute(r)
1151}
1152
1153#[inline]
1160#[target_feature(enable = "avx2")]
1161#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1162#[rustc_legacy_const_generics(4)]
1163#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1165 src: __m256i,
1166 slice: *const i32,
1167 offsets: __m256i,
1168 mask: __m256i,
1169) -> __m256i {
1170 static_assert_imm8_scale!(SCALE);
1171 let src = src.as_i32x8();
1172 let mask = mask.as_i32x8();
1173 let offsets = offsets.as_i32x8();
1174 let slice = slice as *const i8;
1175 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1176 transmute(r)
1177}
1178
1179#[inline]
1185#[target_feature(enable = "avx2")]
1186#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1187#[rustc_legacy_const_generics(2)]
1188#[stable(feature = "simd_x86", since = "1.27.0")]
1189pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1190 static_assert_imm8_scale!(SCALE);
1191 let zero = _mm_setzero_ps();
1192 let neg_one = _mm_set1_ps(-1.0);
1193 let offsets = offsets.as_i32x4();
1194 let slice = slice as *const i8;
1195 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1196}
1197
1198#[inline]
1205#[target_feature(enable = "avx2")]
1206#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1207#[rustc_legacy_const_generics(4)]
1208#[stable(feature = "simd_x86", since = "1.27.0")]
1209pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1210 src: __m128,
1211 slice: *const f32,
1212 offsets: __m128i,
1213 mask: __m128,
1214) -> __m128 {
1215 static_assert_imm8_scale!(SCALE);
1216 let offsets = offsets.as_i32x4();
1217 let slice = slice as *const i8;
1218 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1219}
1220
1221#[inline]
1227#[target_feature(enable = "avx2")]
1228#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1229#[rustc_legacy_const_generics(2)]
1230#[stable(feature = "simd_x86", since = "1.27.0")]
1231pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1232 static_assert_imm8_scale!(SCALE);
1233 let zero = _mm256_setzero_ps();
1234 let neg_one = _mm256_set1_ps(-1.0);
1235 let offsets = offsets.as_i32x8();
1236 let slice = slice as *const i8;
1237 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1238}
1239
1240#[inline]
1247#[target_feature(enable = "avx2")]
1248#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1249#[rustc_legacy_const_generics(4)]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1252 src: __m256,
1253 slice: *const f32,
1254 offsets: __m256i,
1255 mask: __m256,
1256) -> __m256 {
1257 static_assert_imm8_scale!(SCALE);
1258 let offsets = offsets.as_i32x8();
1259 let slice = slice as *const i8;
1260 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1261}
1262
1263#[inline]
1269#[target_feature(enable = "avx2")]
1270#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1271#[rustc_legacy_const_generics(2)]
1272#[stable(feature = "simd_x86", since = "1.27.0")]
1273pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1274 slice: *const i64,
1275 offsets: __m128i,
1276) -> __m128i {
1277 static_assert_imm8_scale!(SCALE);
1278 let zero = i64x2::ZERO;
1279 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1280 let offsets = offsets.as_i32x4();
1281 let slice = slice as *const i8;
1282 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1283 transmute(r)
1284}
1285
1286#[inline]
1293#[target_feature(enable = "avx2")]
1294#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1295#[rustc_legacy_const_generics(4)]
1296#[stable(feature = "simd_x86", since = "1.27.0")]
1297pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1298 src: __m128i,
1299 slice: *const i64,
1300 offsets: __m128i,
1301 mask: __m128i,
1302) -> __m128i {
1303 static_assert_imm8_scale!(SCALE);
1304 let src = src.as_i64x2();
1305 let mask = mask.as_i64x2();
1306 let offsets = offsets.as_i32x4();
1307 let slice = slice as *const i8;
1308 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1309 transmute(r)
1310}
1311
1312#[inline]
1318#[target_feature(enable = "avx2")]
1319#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1320#[rustc_legacy_const_generics(2)]
1321#[stable(feature = "simd_x86", since = "1.27.0")]
1322pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1323 slice: *const i64,
1324 offsets: __m128i,
1325) -> __m256i {
1326 static_assert_imm8_scale!(SCALE);
1327 let zero = i64x4::ZERO;
1328 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1329 let offsets = offsets.as_i32x4();
1330 let slice = slice as *const i8;
1331 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1332 transmute(r)
1333}
1334
1335#[inline]
1342#[target_feature(enable = "avx2")]
1343#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1344#[rustc_legacy_const_generics(4)]
1345#[stable(feature = "simd_x86", since = "1.27.0")]
1346pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1347 src: __m256i,
1348 slice: *const i64,
1349 offsets: __m128i,
1350 mask: __m256i,
1351) -> __m256i {
1352 static_assert_imm8_scale!(SCALE);
1353 let src = src.as_i64x4();
1354 let mask = mask.as_i64x4();
1355 let offsets = offsets.as_i32x4();
1356 let slice = slice as *const i8;
1357 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1358 transmute(r)
1359}
1360
1361#[inline]
1367#[target_feature(enable = "avx2")]
1368#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1369#[rustc_legacy_const_generics(2)]
1370#[stable(feature = "simd_x86", since = "1.27.0")]
1371pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1372 static_assert_imm8_scale!(SCALE);
1373 let zero = _mm_setzero_pd();
1374 let neg_one = _mm_set1_pd(-1.0);
1375 let offsets = offsets.as_i32x4();
1376 let slice = slice as *const i8;
1377 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1378}
1379
1380#[inline]
1387#[target_feature(enable = "avx2")]
1388#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1389#[rustc_legacy_const_generics(4)]
1390#[stable(feature = "simd_x86", since = "1.27.0")]
1391pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1392 src: __m128d,
1393 slice: *const f64,
1394 offsets: __m128i,
1395 mask: __m128d,
1396) -> __m128d {
1397 static_assert_imm8_scale!(SCALE);
1398 let offsets = offsets.as_i32x4();
1399 let slice = slice as *const i8;
1400 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1401}
1402
1403#[inline]
1409#[target_feature(enable = "avx2")]
1410#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1411#[rustc_legacy_const_generics(2)]
1412#[stable(feature = "simd_x86", since = "1.27.0")]
1413pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1414 slice: *const f64,
1415 offsets: __m128i,
1416) -> __m256d {
1417 static_assert_imm8_scale!(SCALE);
1418 let zero = _mm256_setzero_pd();
1419 let neg_one = _mm256_set1_pd(-1.0);
1420 let offsets = offsets.as_i32x4();
1421 let slice = slice as *const i8;
1422 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1423}
1424
1425#[inline]
1432#[target_feature(enable = "avx2")]
1433#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1434#[rustc_legacy_const_generics(4)]
1435#[stable(feature = "simd_x86", since = "1.27.0")]
1436pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1437 src: __m256d,
1438 slice: *const f64,
1439 offsets: __m128i,
1440 mask: __m256d,
1441) -> __m256d {
1442 static_assert_imm8_scale!(SCALE);
1443 let offsets = offsets.as_i32x4();
1444 let slice = slice as *const i8;
1445 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1446}
1447
1448#[inline]
1454#[target_feature(enable = "avx2")]
1455#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1456#[rustc_legacy_const_generics(2)]
1457#[stable(feature = "simd_x86", since = "1.27.0")]
1458pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1459 slice: *const i32,
1460 offsets: __m128i,
1461) -> __m128i {
1462 static_assert_imm8_scale!(SCALE);
1463 let zero = i32x4::ZERO;
1464 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1465 let offsets = offsets.as_i64x2();
1466 let slice = slice as *const i8;
1467 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1468 transmute(r)
1469}
1470
1471#[inline]
1478#[target_feature(enable = "avx2")]
1479#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1480#[rustc_legacy_const_generics(4)]
1481#[stable(feature = "simd_x86", since = "1.27.0")]
1482pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1483 src: __m128i,
1484 slice: *const i32,
1485 offsets: __m128i,
1486 mask: __m128i,
1487) -> __m128i {
1488 static_assert_imm8_scale!(SCALE);
1489 let src = src.as_i32x4();
1490 let mask = mask.as_i32x4();
1491 let offsets = offsets.as_i64x2();
1492 let slice = slice as *const i8;
1493 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1494 transmute(r)
1495}
1496
1497#[inline]
1503#[target_feature(enable = "avx2")]
1504#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1505#[rustc_legacy_const_generics(2)]
1506#[stable(feature = "simd_x86", since = "1.27.0")]
1507pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1508 slice: *const i32,
1509 offsets: __m256i,
1510) -> __m128i {
1511 static_assert_imm8_scale!(SCALE);
1512 let zero = i32x4::ZERO;
1513 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1514 let offsets = offsets.as_i64x4();
1515 let slice = slice as *const i8;
1516 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1517 transmute(r)
1518}
1519
1520#[inline]
1527#[target_feature(enable = "avx2")]
1528#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1529#[rustc_legacy_const_generics(4)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1532 src: __m128i,
1533 slice: *const i32,
1534 offsets: __m256i,
1535 mask: __m128i,
1536) -> __m128i {
1537 static_assert_imm8_scale!(SCALE);
1538 let src = src.as_i32x4();
1539 let mask = mask.as_i32x4();
1540 let offsets = offsets.as_i64x4();
1541 let slice = slice as *const i8;
1542 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1543 transmute(r)
1544}
1545
1546#[inline]
1552#[target_feature(enable = "avx2")]
1553#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1554#[rustc_legacy_const_generics(2)]
1555#[stable(feature = "simd_x86", since = "1.27.0")]
1556pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1557 static_assert_imm8_scale!(SCALE);
1558 let zero = _mm_setzero_ps();
1559 let neg_one = _mm_set1_ps(-1.0);
1560 let offsets = offsets.as_i64x2();
1561 let slice = slice as *const i8;
1562 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1563}
1564
1565#[inline]
1572#[target_feature(enable = "avx2")]
1573#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1574#[rustc_legacy_const_generics(4)]
1575#[stable(feature = "simd_x86", since = "1.27.0")]
1576pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1577 src: __m128,
1578 slice: *const f32,
1579 offsets: __m128i,
1580 mask: __m128,
1581) -> __m128 {
1582 static_assert_imm8_scale!(SCALE);
1583 let offsets = offsets.as_i64x2();
1584 let slice = slice as *const i8;
1585 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1586}
1587
1588#[inline]
1594#[target_feature(enable = "avx2")]
1595#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1596#[rustc_legacy_const_generics(2)]
1597#[stable(feature = "simd_x86", since = "1.27.0")]
1598pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1599 static_assert_imm8_scale!(SCALE);
1600 let zero = _mm_setzero_ps();
1601 let neg_one = _mm_set1_ps(-1.0);
1602 let offsets = offsets.as_i64x4();
1603 let slice = slice as *const i8;
1604 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1605}
1606
1607#[inline]
1614#[target_feature(enable = "avx2")]
1615#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1616#[rustc_legacy_const_generics(4)]
1617#[stable(feature = "simd_x86", since = "1.27.0")]
1618pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1619 src: __m128,
1620 slice: *const f32,
1621 offsets: __m256i,
1622 mask: __m128,
1623) -> __m128 {
1624 static_assert_imm8_scale!(SCALE);
1625 let offsets = offsets.as_i64x4();
1626 let slice = slice as *const i8;
1627 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1628}
1629
1630#[inline]
1636#[target_feature(enable = "avx2")]
1637#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1638#[rustc_legacy_const_generics(2)]
1639#[stable(feature = "simd_x86", since = "1.27.0")]
1640pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1641 slice: *const i64,
1642 offsets: __m128i,
1643) -> __m128i {
1644 static_assert_imm8_scale!(SCALE);
1645 let zero = i64x2::ZERO;
1646 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1647 let slice = slice as *const i8;
1648 let offsets = offsets.as_i64x2();
1649 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1650 transmute(r)
1651}
1652
1653#[inline]
1660#[target_feature(enable = "avx2")]
1661#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1662#[rustc_legacy_const_generics(4)]
1663#[stable(feature = "simd_x86", since = "1.27.0")]
1664pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1665 src: __m128i,
1666 slice: *const i64,
1667 offsets: __m128i,
1668 mask: __m128i,
1669) -> __m128i {
1670 static_assert_imm8_scale!(SCALE);
1671 let src = src.as_i64x2();
1672 let mask = mask.as_i64x2();
1673 let offsets = offsets.as_i64x2();
1674 let slice = slice as *const i8;
1675 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1676 transmute(r)
1677}
1678
1679#[inline]
1685#[target_feature(enable = "avx2")]
1686#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1687#[rustc_legacy_const_generics(2)]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1690 slice: *const i64,
1691 offsets: __m256i,
1692) -> __m256i {
1693 static_assert_imm8_scale!(SCALE);
1694 let zero = i64x4::ZERO;
1695 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1696 let slice = slice as *const i8;
1697 let offsets = offsets.as_i64x4();
1698 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1699 transmute(r)
1700}
1701
1702#[inline]
1709#[target_feature(enable = "avx2")]
1710#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1711#[rustc_legacy_const_generics(4)]
1712#[stable(feature = "simd_x86", since = "1.27.0")]
1713pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1714 src: __m256i,
1715 slice: *const i64,
1716 offsets: __m256i,
1717 mask: __m256i,
1718) -> __m256i {
1719 static_assert_imm8_scale!(SCALE);
1720 let src = src.as_i64x4();
1721 let mask = mask.as_i64x4();
1722 let offsets = offsets.as_i64x4();
1723 let slice = slice as *const i8;
1724 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1725 transmute(r)
1726}
1727
1728#[inline]
1734#[target_feature(enable = "avx2")]
1735#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1736#[rustc_legacy_const_generics(2)]
1737#[stable(feature = "simd_x86", since = "1.27.0")]
1738pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1739 static_assert_imm8_scale!(SCALE);
1740 let zero = _mm_setzero_pd();
1741 let neg_one = _mm_set1_pd(-1.0);
1742 let slice = slice as *const i8;
1743 let offsets = offsets.as_i64x2();
1744 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1745}
1746
1747#[inline]
1754#[target_feature(enable = "avx2")]
1755#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1756#[rustc_legacy_const_generics(4)]
1757#[stable(feature = "simd_x86", since = "1.27.0")]
1758pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1759 src: __m128d,
1760 slice: *const f64,
1761 offsets: __m128i,
1762 mask: __m128d,
1763) -> __m128d {
1764 static_assert_imm8_scale!(SCALE);
1765 let slice = slice as *const i8;
1766 let offsets = offsets.as_i64x2();
1767 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1768}
1769
1770#[inline]
1776#[target_feature(enable = "avx2")]
1777#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1778#[rustc_legacy_const_generics(2)]
1779#[stable(feature = "simd_x86", since = "1.27.0")]
1780pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1781 slice: *const f64,
1782 offsets: __m256i,
1783) -> __m256d {
1784 static_assert_imm8_scale!(SCALE);
1785 let zero = _mm256_setzero_pd();
1786 let neg_one = _mm256_set1_pd(-1.0);
1787 let slice = slice as *const i8;
1788 let offsets = offsets.as_i64x4();
1789 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1790}
1791
1792#[inline]
1799#[target_feature(enable = "avx2")]
1800#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1801#[rustc_legacy_const_generics(4)]
1802#[stable(feature = "simd_x86", since = "1.27.0")]
1803pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1804 src: __m256d,
1805 slice: *const f64,
1806 offsets: __m256i,
1807 mask: __m256d,
1808) -> __m256d {
1809 static_assert_imm8_scale!(SCALE);
1810 let slice = slice as *const i8;
1811 let offsets = offsets.as_i64x4();
1812 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1813}
1814
1815#[inline]
1820#[target_feature(enable = "avx2")]
1821#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1822#[rustc_legacy_const_generics(2)]
1823#[stable(feature = "simd_x86", since = "1.27.0")]
1824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1825pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1826 static_assert_uimm_bits!(IMM1, 1);
1827 unsafe {
1828 let a = a.as_i64x4();
1829 let b = _mm256_castsi128_si256(b).as_i64x4();
1830 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1831 transmute(dst)
1832 }
1833}
1834
1835#[inline]
1841#[target_feature(enable = "avx2")]
1842#[cfg_attr(test, assert_instr(vpmaddwd))]
1843#[stable(feature = "simd_x86", since = "1.27.0")]
1844pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1845 unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1858}
1859
1860#[inline]
1867#[target_feature(enable = "avx2")]
1868#[cfg_attr(test, assert_instr(vpmaddubsw))]
1869#[stable(feature = "simd_x86", since = "1.27.0")]
1870pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1871 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1872}
1873
1874#[inline]
1880#[target_feature(enable = "avx2")]
1881#[cfg_attr(test, assert_instr(vpmaskmovd))]
1882#[stable(feature = "simd_x86", since = "1.27.0")]
1883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1884pub const unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1885 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1886 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1887}
1888
1889#[inline]
1895#[target_feature(enable = "avx2")]
1896#[cfg_attr(test, assert_instr(vpmaskmovd))]
1897#[stable(feature = "simd_x86", since = "1.27.0")]
1898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1899pub const unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1900 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1901 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1902}
1903
1904#[inline]
1910#[target_feature(enable = "avx2")]
1911#[cfg_attr(test, assert_instr(vpmaskmovq))]
1912#[stable(feature = "simd_x86", since = "1.27.0")]
1913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1914pub const unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1915 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1916 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1917}
1918
1919#[inline]
1925#[target_feature(enable = "avx2")]
1926#[cfg_attr(test, assert_instr(vpmaskmovq))]
1927#[stable(feature = "simd_x86", since = "1.27.0")]
1928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1929pub const unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1930 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1931 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1932}
1933
1934#[inline]
1940#[target_feature(enable = "avx2")]
1941#[cfg_attr(test, assert_instr(vpmaskmovd))]
1942#[stable(feature = "simd_x86", since = "1.27.0")]
1943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1944pub const unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1945 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1946 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1947}
1948
1949#[inline]
1955#[target_feature(enable = "avx2")]
1956#[cfg_attr(test, assert_instr(vpmaskmovd))]
1957#[stable(feature = "simd_x86", since = "1.27.0")]
1958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1959pub const unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1960 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1961 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1962}
1963
1964#[inline]
1970#[target_feature(enable = "avx2")]
1971#[cfg_attr(test, assert_instr(vpmaskmovq))]
1972#[stable(feature = "simd_x86", since = "1.27.0")]
1973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1974pub const unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1975 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1976 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1977}
1978
1979#[inline]
1985#[target_feature(enable = "avx2")]
1986#[cfg_attr(test, assert_instr(vpmaskmovq))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1989pub const unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1990 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1991 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1992}
1993
1994#[inline]
1999#[target_feature(enable = "avx2")]
2000#[cfg_attr(test, assert_instr(vpmaxsw))]
2001#[stable(feature = "simd_x86", since = "1.27.0")]
2002#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2003pub const fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
2004 unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
2005}
2006
2007#[inline]
2012#[target_feature(enable = "avx2")]
2013#[cfg_attr(test, assert_instr(vpmaxsd))]
2014#[stable(feature = "simd_x86", since = "1.27.0")]
2015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2016pub const fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
2017 unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
2018}
2019
2020#[inline]
2025#[target_feature(enable = "avx2")]
2026#[cfg_attr(test, assert_instr(vpmaxsb))]
2027#[stable(feature = "simd_x86", since = "1.27.0")]
2028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2029pub const fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
2030 unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2031}
2032
2033#[inline]
2038#[target_feature(enable = "avx2")]
2039#[cfg_attr(test, assert_instr(vpmaxuw))]
2040#[stable(feature = "simd_x86", since = "1.27.0")]
2041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2042pub const fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
2043 unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2044}
2045
2046#[inline]
2051#[target_feature(enable = "avx2")]
2052#[cfg_attr(test, assert_instr(vpmaxud))]
2053#[stable(feature = "simd_x86", since = "1.27.0")]
2054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2055pub const fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
2056 unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2057}
2058
2059#[inline]
2064#[target_feature(enable = "avx2")]
2065#[cfg_attr(test, assert_instr(vpmaxub))]
2066#[stable(feature = "simd_x86", since = "1.27.0")]
2067#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2068pub const fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2069 unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2070}
2071
2072#[inline]
2077#[target_feature(enable = "avx2")]
2078#[cfg_attr(test, assert_instr(vpminsw))]
2079#[stable(feature = "simd_x86", since = "1.27.0")]
2080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2081pub const fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2082 unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
2083}
2084
2085#[inline]
2090#[target_feature(enable = "avx2")]
2091#[cfg_attr(test, assert_instr(vpminsd))]
2092#[stable(feature = "simd_x86", since = "1.27.0")]
2093#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2094pub const fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2095 unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
2096}
2097
2098#[inline]
2103#[target_feature(enable = "avx2")]
2104#[cfg_attr(test, assert_instr(vpminsb))]
2105#[stable(feature = "simd_x86", since = "1.27.0")]
2106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2107pub const fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2108 unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2109}
2110
2111#[inline]
2116#[target_feature(enable = "avx2")]
2117#[cfg_attr(test, assert_instr(vpminuw))]
2118#[stable(feature = "simd_x86", since = "1.27.0")]
2119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2120pub const fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2121 unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2122}
2123
2124#[inline]
2129#[target_feature(enable = "avx2")]
2130#[cfg_attr(test, assert_instr(vpminud))]
2131#[stable(feature = "simd_x86", since = "1.27.0")]
2132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2133pub const fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2134 unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2135}
2136
2137#[inline]
2142#[target_feature(enable = "avx2")]
2143#[cfg_attr(test, assert_instr(vpminub))]
2144#[stable(feature = "simd_x86", since = "1.27.0")]
2145#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2146pub const fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2147 unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2148}
2149
2150#[inline]
2155#[target_feature(enable = "avx2")]
2156#[cfg_attr(test, assert_instr(vpmovmskb))]
2157#[stable(feature = "simd_x86", since = "1.27.0")]
2158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2159pub const fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2160 unsafe {
2161 let z = i8x32::ZERO;
2162 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2163 simd_bitmask::<_, u32>(m) as i32
2164 }
2165}
2166
2167#[inline]
2177#[target_feature(enable = "avx2")]
2178#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2179#[rustc_legacy_const_generics(2)]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2182 static_assert_uimm_bits!(IMM8, 8);
2183 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2184}
2185
2186#[inline]
2193#[target_feature(enable = "avx2")]
2194#[cfg_attr(test, assert_instr(vpmuldq))]
2195#[stable(feature = "simd_x86", since = "1.27.0")]
2196#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2197pub const fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2198 unsafe {
2199 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2200 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2201 transmute(simd_mul(a, b))
2202 }
2203}
2204
2205#[inline]
2212#[target_feature(enable = "avx2")]
2213#[cfg_attr(test, assert_instr(vpmuludq))]
2214#[stable(feature = "simd_x86", since = "1.27.0")]
2215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2216pub const fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2217 unsafe {
2218 let a = a.as_u64x4();
2219 let b = b.as_u64x4();
2220 let mask = u64x4::splat(u32::MAX as u64);
2221 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2222 }
2223}
2224
2225#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpmulhw))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2235pub const fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2236 unsafe {
2237 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2238 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2239 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2240 transmute(simd_cast::<i32x16, i16x16>(r))
2241 }
2242}
2243
2244#[inline]
2250#[target_feature(enable = "avx2")]
2251#[cfg_attr(test, assert_instr(vpmulhuw))]
2252#[stable(feature = "simd_x86", since = "1.27.0")]
2253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2254pub const fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2255 unsafe {
2256 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2257 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2258 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2259 transmute(simd_cast::<u32x16, u16x16>(r))
2260 }
2261}
2262
2263#[inline]
2269#[target_feature(enable = "avx2")]
2270#[cfg_attr(test, assert_instr(vpmullw))]
2271#[stable(feature = "simd_x86", since = "1.27.0")]
2272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2273pub const fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2274 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2275}
2276
2277#[inline]
2283#[target_feature(enable = "avx2")]
2284#[cfg_attr(test, assert_instr(vpmulld))]
2285#[stable(feature = "simd_x86", since = "1.27.0")]
2286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2287pub const fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2288 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2289}
2290
2291#[inline]
2298#[target_feature(enable = "avx2")]
2299#[cfg_attr(test, assert_instr(vpmulhrsw))]
2300#[stable(feature = "simd_x86", since = "1.27.0")]
2301pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2302 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2303}
2304
2305#[inline]
2310#[target_feature(enable = "avx2")]
2311#[cfg_attr(test, assert_instr(vorps))]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2314pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2315 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2316}
2317
2318#[inline]
2323#[target_feature(enable = "avx2")]
2324#[cfg_attr(test, assert_instr(vpacksswb))]
2325#[stable(feature = "simd_x86", since = "1.27.0")]
2326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2327pub const fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2328 unsafe {
2329 let max = simd_splat(i8::MAX as i16);
2330 let min = simd_splat(i8::MIN as i16);
2331
2332 let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
2333 .as_m256i()
2334 .as_i8x32();
2335 let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min)
2336 .as_m256i()
2337 .as_i8x32();
2338
2339 #[rustfmt::skip]
2340 const IDXS: [u32; 32] = [
2341 00, 02, 04, 06, 08, 10, 12, 14, 32, 34, 36, 38, 40, 42, 44, 46, 16, 18, 20, 22, 24, 26, 28, 30, 48, 50, 52, 54, 56, 58, 60, 62, ];
2346 let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
2347
2348 result.as_m256i()
2349 }
2350}
2351
2352#[inline]
2357#[target_feature(enable = "avx2")]
2358#[cfg_attr(test, assert_instr(vpackssdw))]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2361pub const fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2362 unsafe {
2363 let max = simd_splat(i16::MAX as i32);
2364 let min = simd_splat(i16::MIN as i32);
2365
2366 let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
2367 .as_m256i()
2368 .as_i16x16();
2369 let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min)
2370 .as_m256i()
2371 .as_i16x16();
2372
2373 #[rustfmt::skip]
2374 const IDXS: [u32; 16] = [
2375 00, 02, 04, 06, 16, 18, 20, 22, 08, 10, 12, 14, 24, 26, 28, 30, ];
2380 let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
2381
2382 result.as_m256i()
2383 }
2384}
2385
2386#[inline]
2391#[target_feature(enable = "avx2")]
2392#[cfg_attr(test, assert_instr(vpackuswb))]
2393#[stable(feature = "simd_x86", since = "1.27.0")]
2394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2395pub const fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2396 unsafe {
2397 let max = simd_splat(u8::MAX as i16);
2398 let min = simd_splat(u8::MIN as i16);
2399
2400 let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
2401 .as_m256i()
2402 .as_i8x32();
2403 let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min)
2404 .as_m256i()
2405 .as_i8x32();
2406
2407 #[rustfmt::skip]
2408 const IDXS: [u32; 32] = [
2409 00, 02, 04, 06, 08, 10, 12, 14, 32, 34, 36, 38, 40, 42, 44, 46, 16, 18, 20, 22, 24, 26, 28, 30, 48, 50, 52, 54, 56, 58, 60, 62, ];
2414 let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
2415
2416 result.as_m256i()
2417 }
2418}
2419
2420#[inline]
2425#[target_feature(enable = "avx2")]
2426#[cfg_attr(test, assert_instr(vpackusdw))]
2427#[stable(feature = "simd_x86", since = "1.27.0")]
2428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2429pub const fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2430 unsafe {
2431 let max = simd_splat(u16::MAX as i32);
2432 let min = simd_splat(u16::MIN as i32);
2433
2434 let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
2435 .as_m256i()
2436 .as_i16x16();
2437 let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min)
2438 .as_m256i()
2439 .as_i16x16();
2440
2441 #[rustfmt::skip]
2442 const IDXS: [u32; 16] = [
2443 00, 02, 04, 06, 16, 18, 20, 22, 08, 10, 12, 14, 24, 26, 28, 30, ];
2448 let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
2449
2450 result.as_m256i()
2451 }
2452}
2453
2454#[inline]
2461#[target_feature(enable = "avx2")]
2462#[cfg_attr(test, assert_instr(vpermps))]
2463#[stable(feature = "simd_x86", since = "1.27.0")]
2464pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2465 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2466}
2467
2468#[inline]
2472#[target_feature(enable = "avx2")]
2473#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2474#[rustc_legacy_const_generics(1)]
2475#[stable(feature = "simd_x86", since = "1.27.0")]
2476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2477pub const fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2478 static_assert_uimm_bits!(IMM8, 8);
2479 unsafe {
2480 let zero = i64x4::ZERO;
2481 let r: i64x4 = simd_shuffle!(
2482 a.as_i64x4(),
2483 zero,
2484 [
2485 IMM8 as u32 & 0b11,
2486 (IMM8 as u32 >> 2) & 0b11,
2487 (IMM8 as u32 >> 4) & 0b11,
2488 (IMM8 as u32 >> 6) & 0b11,
2489 ],
2490 );
2491 transmute(r)
2492 }
2493}
2494
2495#[inline]
2499#[target_feature(enable = "avx2")]
2500#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2501#[rustc_legacy_const_generics(2)]
2502#[stable(feature = "simd_x86", since = "1.27.0")]
2503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2504pub const fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2505 static_assert_uimm_bits!(IMM8, 8);
2506 _mm256_permute2f128_si256::<IMM8>(a, b)
2507}
2508
2509#[inline]
2514#[target_feature(enable = "avx2")]
2515#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2516#[rustc_legacy_const_generics(1)]
2517#[stable(feature = "simd_x86", since = "1.27.0")]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2520 static_assert_uimm_bits!(IMM8, 8);
2521 unsafe {
2522 simd_shuffle!(
2523 a,
2524 _mm256_undefined_pd(),
2525 [
2526 IMM8 as u32 & 0b11,
2527 (IMM8 as u32 >> 2) & 0b11,
2528 (IMM8 as u32 >> 4) & 0b11,
2529 (IMM8 as u32 >> 6) & 0b11,
2530 ],
2531 )
2532 }
2533}
2534
2535#[inline]
2540#[target_feature(enable = "avx2")]
2541#[cfg_attr(test, assert_instr(vpermps))]
2542#[stable(feature = "simd_x86", since = "1.27.0")]
2543pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2544 unsafe { permps(a, idx.as_i32x8()) }
2545}
2546
2547#[inline]
2554#[target_feature(enable = "avx2")]
2555#[cfg_attr(test, assert_instr(vpsadbw))]
2556#[stable(feature = "simd_x86", since = "1.27.0")]
2557pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2558 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2559}
2560
2561#[inline]
2592#[target_feature(enable = "avx2")]
2593#[cfg_attr(test, assert_instr(vpshufb))]
2594#[stable(feature = "simd_x86", since = "1.27.0")]
2595pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2596 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2597}
2598
2599#[inline]
2630#[target_feature(enable = "avx2")]
2631#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2632#[rustc_legacy_const_generics(1)]
2633#[stable(feature = "simd_x86", since = "1.27.0")]
2634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2635pub const fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2636 static_assert_uimm_bits!(MASK, 8);
2637 unsafe {
2638 let r: i32x8 = simd_shuffle!(
2639 a.as_i32x8(),
2640 a.as_i32x8(),
2641 [
2642 MASK as u32 & 0b11,
2643 (MASK as u32 >> 2) & 0b11,
2644 (MASK as u32 >> 4) & 0b11,
2645 (MASK as u32 >> 6) & 0b11,
2646 (MASK as u32 & 0b11) + 4,
2647 ((MASK as u32 >> 2) & 0b11) + 4,
2648 ((MASK as u32 >> 4) & 0b11) + 4,
2649 ((MASK as u32 >> 6) & 0b11) + 4,
2650 ],
2651 );
2652 transmute(r)
2653 }
2654}
2655
2656#[inline]
2662#[target_feature(enable = "avx2")]
2663#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2664#[rustc_legacy_const_generics(1)]
2665#[stable(feature = "simd_x86", since = "1.27.0")]
2666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2667pub const fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2668 static_assert_uimm_bits!(IMM8, 8);
2669 unsafe {
2670 let a = a.as_i16x16();
2671 let r: i16x16 = simd_shuffle!(
2672 a,
2673 a,
2674 [
2675 0,
2676 1,
2677 2,
2678 3,
2679 4 + (IMM8 as u32 & 0b11),
2680 4 + ((IMM8 as u32 >> 2) & 0b11),
2681 4 + ((IMM8 as u32 >> 4) & 0b11),
2682 4 + ((IMM8 as u32 >> 6) & 0b11),
2683 8,
2684 9,
2685 10,
2686 11,
2687 12 + (IMM8 as u32 & 0b11),
2688 12 + ((IMM8 as u32 >> 2) & 0b11),
2689 12 + ((IMM8 as u32 >> 4) & 0b11),
2690 12 + ((IMM8 as u32 >> 6) & 0b11),
2691 ],
2692 );
2693 transmute(r)
2694 }
2695}
2696
2697#[inline]
2703#[target_feature(enable = "avx2")]
2704#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2705#[rustc_legacy_const_generics(1)]
2706#[stable(feature = "simd_x86", since = "1.27.0")]
2707#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2708pub const fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2709 static_assert_uimm_bits!(IMM8, 8);
2710 unsafe {
2711 let a = a.as_i16x16();
2712 let r: i16x16 = simd_shuffle!(
2713 a,
2714 a,
2715 [
2716 0 + (IMM8 as u32 & 0b11),
2717 0 + ((IMM8 as u32 >> 2) & 0b11),
2718 0 + ((IMM8 as u32 >> 4) & 0b11),
2719 0 + ((IMM8 as u32 >> 6) & 0b11),
2720 4,
2721 5,
2722 6,
2723 7,
2724 8 + (IMM8 as u32 & 0b11),
2725 8 + ((IMM8 as u32 >> 2) & 0b11),
2726 8 + ((IMM8 as u32 >> 4) & 0b11),
2727 8 + ((IMM8 as u32 >> 6) & 0b11),
2728 12,
2729 13,
2730 14,
2731 15,
2732 ],
2733 );
2734 transmute(r)
2735 }
2736}
2737
2738#[inline]
2744#[target_feature(enable = "avx2")]
2745#[cfg_attr(test, assert_instr(vpsignw))]
2746#[stable(feature = "simd_x86", since = "1.27.0")]
2747pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2748 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2749}
2750
2751#[inline]
2757#[target_feature(enable = "avx2")]
2758#[cfg_attr(test, assert_instr(vpsignd))]
2759#[stable(feature = "simd_x86", since = "1.27.0")]
2760pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2761 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2762}
2763
2764#[inline]
2770#[target_feature(enable = "avx2")]
2771#[cfg_attr(test, assert_instr(vpsignb))]
2772#[stable(feature = "simd_x86", since = "1.27.0")]
2773pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2774 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2775}
2776
2777#[inline]
2782#[target_feature(enable = "avx2")]
2783#[cfg_attr(test, assert_instr(vpsllw))]
2784#[stable(feature = "simd_x86", since = "1.27.0")]
2785pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2786 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2787}
2788
2789#[inline]
2794#[target_feature(enable = "avx2")]
2795#[cfg_attr(test, assert_instr(vpslld))]
2796#[stable(feature = "simd_x86", since = "1.27.0")]
2797pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2798 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2799}
2800
2801#[inline]
2806#[target_feature(enable = "avx2")]
2807#[cfg_attr(test, assert_instr(vpsllq))]
2808#[stable(feature = "simd_x86", since = "1.27.0")]
2809pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2810 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2811}
2812
2813#[inline]
2818#[target_feature(enable = "avx2")]
2819#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2820#[rustc_legacy_const_generics(1)]
2821#[stable(feature = "simd_x86", since = "1.27.0")]
2822#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2823pub const fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2824 static_assert_uimm_bits!(IMM8, 8);
2825 unsafe {
2826 if IMM8 >= 16 {
2827 _mm256_setzero_si256()
2828 } else {
2829 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2830 }
2831 }
2832}
2833
2834#[inline]
2839#[target_feature(enable = "avx2")]
2840#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2841#[rustc_legacy_const_generics(1)]
2842#[stable(feature = "simd_x86", since = "1.27.0")]
2843#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2844pub const fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2845 unsafe {
2846 static_assert_uimm_bits!(IMM8, 8);
2847 if IMM8 >= 32 {
2848 _mm256_setzero_si256()
2849 } else {
2850 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2851 }
2852 }
2853}
2854
2855#[inline]
2860#[target_feature(enable = "avx2")]
2861#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2862#[rustc_legacy_const_generics(1)]
2863#[stable(feature = "simd_x86", since = "1.27.0")]
2864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2865pub const fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2866 unsafe {
2867 static_assert_uimm_bits!(IMM8, 8);
2868 if IMM8 >= 64 {
2869 _mm256_setzero_si256()
2870 } else {
2871 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2872 }
2873 }
2874}
2875
2876#[inline]
2880#[target_feature(enable = "avx2")]
2881#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2882#[rustc_legacy_const_generics(1)]
2883#[stable(feature = "simd_x86", since = "1.27.0")]
2884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2885pub const fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2886 static_assert_uimm_bits!(IMM8, 8);
2887 _mm256_bslli_epi128::<IMM8>(a)
2888}
2889
2890#[inline]
2894#[target_feature(enable = "avx2")]
2895#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2896#[rustc_legacy_const_generics(1)]
2897#[stable(feature = "simd_x86", since = "1.27.0")]
2898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2899pub const fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2900 static_assert_uimm_bits!(IMM8, 8);
2901 const fn mask(shift: i32, i: u32) -> u32 {
2902 let shift = shift as u32 & 0xff;
2903 if shift > 15 || i % 16 < shift {
2904 0
2905 } else {
2906 32 + (i - shift)
2907 }
2908 }
2909 unsafe {
2910 let a = a.as_i8x32();
2911 let r: i8x32 = simd_shuffle!(
2912 i8x32::ZERO,
2913 a,
2914 [
2915 mask(IMM8, 0),
2916 mask(IMM8, 1),
2917 mask(IMM8, 2),
2918 mask(IMM8, 3),
2919 mask(IMM8, 4),
2920 mask(IMM8, 5),
2921 mask(IMM8, 6),
2922 mask(IMM8, 7),
2923 mask(IMM8, 8),
2924 mask(IMM8, 9),
2925 mask(IMM8, 10),
2926 mask(IMM8, 11),
2927 mask(IMM8, 12),
2928 mask(IMM8, 13),
2929 mask(IMM8, 14),
2930 mask(IMM8, 15),
2931 mask(IMM8, 16),
2932 mask(IMM8, 17),
2933 mask(IMM8, 18),
2934 mask(IMM8, 19),
2935 mask(IMM8, 20),
2936 mask(IMM8, 21),
2937 mask(IMM8, 22),
2938 mask(IMM8, 23),
2939 mask(IMM8, 24),
2940 mask(IMM8, 25),
2941 mask(IMM8, 26),
2942 mask(IMM8, 27),
2943 mask(IMM8, 28),
2944 mask(IMM8, 29),
2945 mask(IMM8, 30),
2946 mask(IMM8, 31),
2947 ],
2948 );
2949 transmute(r)
2950 }
2951}
2952
2953#[inline]
2959#[target_feature(enable = "avx2")]
2960#[cfg_attr(test, assert_instr(vpsllvd))]
2961#[stable(feature = "simd_x86", since = "1.27.0")]
2962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2963pub const fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2964 unsafe {
2965 let count = count.as_u32x4();
2966 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2967 let count = simd_select(no_overflow, count, u32x4::ZERO);
2968 simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
2969 }
2970}
2971
2972#[inline]
2978#[target_feature(enable = "avx2")]
2979#[cfg_attr(test, assert_instr(vpsllvd))]
2980#[stable(feature = "simd_x86", since = "1.27.0")]
2981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2982pub const fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2983 unsafe {
2984 let count = count.as_u32x8();
2985 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2986 let count = simd_select(no_overflow, count, u32x8::ZERO);
2987 simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
2988 }
2989}
2990
2991#[inline]
2997#[target_feature(enable = "avx2")]
2998#[cfg_attr(test, assert_instr(vpsllvq))]
2999#[stable(feature = "simd_x86", since = "1.27.0")]
3000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3001pub const fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
3002 unsafe {
3003 let count = count.as_u64x2();
3004 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3005 let count = simd_select(no_overflow, count, u64x2::ZERO);
3006 simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3007 }
3008}
3009
3010#[inline]
3016#[target_feature(enable = "avx2")]
3017#[cfg_attr(test, assert_instr(vpsllvq))]
3018#[stable(feature = "simd_x86", since = "1.27.0")]
3019#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3020pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
3021 unsafe {
3022 let count = count.as_u64x4();
3023 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3024 let count = simd_select(no_overflow, count, u64x4::ZERO);
3025 simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3026 }
3027}
3028
3029#[inline]
3034#[target_feature(enable = "avx2")]
3035#[cfg_attr(test, assert_instr(vpsraw))]
3036#[stable(feature = "simd_x86", since = "1.27.0")]
3037pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
3038 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
3039}
3040
3041#[inline]
3046#[target_feature(enable = "avx2")]
3047#[cfg_attr(test, assert_instr(vpsrad))]
3048#[stable(feature = "simd_x86", since = "1.27.0")]
3049pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
3050 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
3051}
3052
3053#[inline]
3058#[target_feature(enable = "avx2")]
3059#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
3060#[rustc_legacy_const_generics(1)]
3061#[stable(feature = "simd_x86", since = "1.27.0")]
3062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3063pub const fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3064 static_assert_uimm_bits!(IMM8, 8);
3065 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
3066}
3067
3068#[inline]
3073#[target_feature(enable = "avx2")]
3074#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
3075#[rustc_legacy_const_generics(1)]
3076#[stable(feature = "simd_x86", since = "1.27.0")]
3077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3078pub const fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3079 static_assert_uimm_bits!(IMM8, 8);
3080 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
3081}
3082
3083#[inline]
3088#[target_feature(enable = "avx2")]
3089#[cfg_attr(test, assert_instr(vpsravd))]
3090#[stable(feature = "simd_x86", since = "1.27.0")]
3091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3092pub const fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
3093 unsafe {
3094 let count = count.as_u32x4();
3095 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3096 let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
3097 simd_shr(a.as_i32x4(), count).as_m128i()
3098 }
3099}
3100
3101#[inline]
3106#[target_feature(enable = "avx2")]
3107#[cfg_attr(test, assert_instr(vpsravd))]
3108#[stable(feature = "simd_x86", since = "1.27.0")]
3109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3110pub const fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
3111 unsafe {
3112 let count = count.as_u32x8();
3113 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3114 let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
3115 simd_shr(a.as_i32x8(), count).as_m256i()
3116 }
3117}
3118
3119#[inline]
3123#[target_feature(enable = "avx2")]
3124#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3125#[rustc_legacy_const_generics(1)]
3126#[stable(feature = "simd_x86", since = "1.27.0")]
3127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3128pub const fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
3129 static_assert_uimm_bits!(IMM8, 8);
3130 _mm256_bsrli_epi128::<IMM8>(a)
3131}
3132
3133#[inline]
3137#[target_feature(enable = "avx2")]
3138#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3139#[rustc_legacy_const_generics(1)]
3140#[stable(feature = "simd_x86", since = "1.27.0")]
3141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3142pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
3143 static_assert_uimm_bits!(IMM8, 8);
3144 const fn mask(shift: i32, i: u32) -> u32 {
3145 let shift = shift as u32 & 0xff;
3146 if shift > 15 || (15 - (i % 16)) < shift {
3147 0
3148 } else {
3149 32 + (i + shift)
3150 }
3151 }
3152 unsafe {
3153 let a = a.as_i8x32();
3154 let r: i8x32 = simd_shuffle!(
3155 i8x32::ZERO,
3156 a,
3157 [
3158 mask(IMM8, 0),
3159 mask(IMM8, 1),
3160 mask(IMM8, 2),
3161 mask(IMM8, 3),
3162 mask(IMM8, 4),
3163 mask(IMM8, 5),
3164 mask(IMM8, 6),
3165 mask(IMM8, 7),
3166 mask(IMM8, 8),
3167 mask(IMM8, 9),
3168 mask(IMM8, 10),
3169 mask(IMM8, 11),
3170 mask(IMM8, 12),
3171 mask(IMM8, 13),
3172 mask(IMM8, 14),
3173 mask(IMM8, 15),
3174 mask(IMM8, 16),
3175 mask(IMM8, 17),
3176 mask(IMM8, 18),
3177 mask(IMM8, 19),
3178 mask(IMM8, 20),
3179 mask(IMM8, 21),
3180 mask(IMM8, 22),
3181 mask(IMM8, 23),
3182 mask(IMM8, 24),
3183 mask(IMM8, 25),
3184 mask(IMM8, 26),
3185 mask(IMM8, 27),
3186 mask(IMM8, 28),
3187 mask(IMM8, 29),
3188 mask(IMM8, 30),
3189 mask(IMM8, 31),
3190 ],
3191 );
3192 transmute(r)
3193 }
3194}
3195
3196#[inline]
3201#[target_feature(enable = "avx2")]
3202#[cfg_attr(test, assert_instr(vpsrlw))]
3203#[stable(feature = "simd_x86", since = "1.27.0")]
3204pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3205 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3206}
3207
3208#[inline]
3213#[target_feature(enable = "avx2")]
3214#[cfg_attr(test, assert_instr(vpsrld))]
3215#[stable(feature = "simd_x86", since = "1.27.0")]
3216pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3217 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3218}
3219
3220#[inline]
3225#[target_feature(enable = "avx2")]
3226#[cfg_attr(test, assert_instr(vpsrlq))]
3227#[stable(feature = "simd_x86", since = "1.27.0")]
3228pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3229 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3230}
3231
3232#[inline]
3237#[target_feature(enable = "avx2")]
3238#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3239#[rustc_legacy_const_generics(1)]
3240#[stable(feature = "simd_x86", since = "1.27.0")]
3241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3242pub const fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3243 static_assert_uimm_bits!(IMM8, 8);
3244 unsafe {
3245 if IMM8 >= 16 {
3246 _mm256_setzero_si256()
3247 } else {
3248 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3249 }
3250 }
3251}
3252
3253#[inline]
3258#[target_feature(enable = "avx2")]
3259#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3260#[rustc_legacy_const_generics(1)]
3261#[stable(feature = "simd_x86", since = "1.27.0")]
3262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3263pub const fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3264 static_assert_uimm_bits!(IMM8, 8);
3265 unsafe {
3266 if IMM8 >= 32 {
3267 _mm256_setzero_si256()
3268 } else {
3269 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3270 }
3271 }
3272}
3273
3274#[inline]
3279#[target_feature(enable = "avx2")]
3280#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3281#[rustc_legacy_const_generics(1)]
3282#[stable(feature = "simd_x86", since = "1.27.0")]
3283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3284pub const fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3285 static_assert_uimm_bits!(IMM8, 8);
3286 unsafe {
3287 if IMM8 >= 64 {
3288 _mm256_setzero_si256()
3289 } else {
3290 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3291 }
3292 }
3293}
3294
3295#[inline]
3300#[target_feature(enable = "avx2")]
3301#[cfg_attr(test, assert_instr(vpsrlvd))]
3302#[stable(feature = "simd_x86", since = "1.27.0")]
3303#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3304pub const fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3305 unsafe {
3306 let count = count.as_u32x4();
3307 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3308 let count = simd_select(no_overflow, count, u32x4::ZERO);
3309 simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
3310 }
3311}
3312
3313#[inline]
3318#[target_feature(enable = "avx2")]
3319#[cfg_attr(test, assert_instr(vpsrlvd))]
3320#[stable(feature = "simd_x86", since = "1.27.0")]
3321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3322pub const fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3323 unsafe {
3324 let count = count.as_u32x8();
3325 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3326 let count = simd_select(no_overflow, count, u32x8::ZERO);
3327 simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
3328 }
3329}
3330
3331#[inline]
3336#[target_feature(enable = "avx2")]
3337#[cfg_attr(test, assert_instr(vpsrlvq))]
3338#[stable(feature = "simd_x86", since = "1.27.0")]
3339#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3340pub const fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3341 unsafe {
3342 let count = count.as_u64x2();
3343 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3344 let count = simd_select(no_overflow, count, u64x2::ZERO);
3345 simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3346 }
3347}
3348
3349#[inline]
3354#[target_feature(enable = "avx2")]
3355#[cfg_attr(test, assert_instr(vpsrlvq))]
3356#[stable(feature = "simd_x86", since = "1.27.0")]
3357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3358pub const fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3359 unsafe {
3360 let count = count.as_u64x4();
3361 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3362 let count = simd_select(no_overflow, count, u64x4::ZERO);
3363 simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3364 }
3365}
3366
3367#[inline]
3373#[target_feature(enable = "avx2")]
3374#[cfg_attr(test, assert_instr(vmovntdqa))]
3375#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3376pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3377 let dst: __m256i;
3378 crate::arch::asm!(
3379 vpl!("vmovntdqa {a}"),
3380 a = out(ymm_reg) dst,
3381 p = in(reg) mem_addr,
3382 options(pure, readonly, nostack, preserves_flags),
3383 );
3384 dst
3385}
3386
3387#[inline]
3391#[target_feature(enable = "avx2")]
3392#[cfg_attr(test, assert_instr(vpsubw))]
3393#[stable(feature = "simd_x86", since = "1.27.0")]
3394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3395pub const fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3396 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3397}
3398
3399#[inline]
3403#[target_feature(enable = "avx2")]
3404#[cfg_attr(test, assert_instr(vpsubd))]
3405#[stable(feature = "simd_x86", since = "1.27.0")]
3406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3407pub const fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3408 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3409}
3410
3411#[inline]
3415#[target_feature(enable = "avx2")]
3416#[cfg_attr(test, assert_instr(vpsubq))]
3417#[stable(feature = "simd_x86", since = "1.27.0")]
3418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3419pub const fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3420 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3421}
3422
3423#[inline]
3427#[target_feature(enable = "avx2")]
3428#[cfg_attr(test, assert_instr(vpsubb))]
3429#[stable(feature = "simd_x86", since = "1.27.0")]
3430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3431pub const fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3432 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3433}
3434
3435#[inline]
3440#[target_feature(enable = "avx2")]
3441#[cfg_attr(test, assert_instr(vpsubsw))]
3442#[stable(feature = "simd_x86", since = "1.27.0")]
3443#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3444pub const fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3445 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3446}
3447
3448#[inline]
3453#[target_feature(enable = "avx2")]
3454#[cfg_attr(test, assert_instr(vpsubsb))]
3455#[stable(feature = "simd_x86", since = "1.27.0")]
3456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3457pub const fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3458 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3459}
3460
3461#[inline]
3466#[target_feature(enable = "avx2")]
3467#[cfg_attr(test, assert_instr(vpsubusw))]
3468#[stable(feature = "simd_x86", since = "1.27.0")]
3469#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3470pub const fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3471 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3472}
3473
3474#[inline]
3479#[target_feature(enable = "avx2")]
3480#[cfg_attr(test, assert_instr(vpsubusb))]
3481#[stable(feature = "simd_x86", since = "1.27.0")]
3482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3483pub const fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3484 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3485}
3486
3487#[inline]
3527#[target_feature(enable = "avx2")]
3528#[cfg_attr(test, assert_instr(vpunpckhbw))]
3529#[stable(feature = "simd_x86", since = "1.27.0")]
3530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3531pub const fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3532 unsafe {
3533 #[rustfmt::skip]
3534 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3535 8, 40, 9, 41, 10, 42, 11, 43,
3536 12, 44, 13, 45, 14, 46, 15, 47,
3537 24, 56, 25, 57, 26, 58, 27, 59,
3538 28, 60, 29, 61, 30, 62, 31, 63,
3539 ]);
3540 transmute(r)
3541 }
3542}
3543
3544#[inline]
3583#[target_feature(enable = "avx2")]
3584#[cfg_attr(test, assert_instr(vpunpcklbw))]
3585#[stable(feature = "simd_x86", since = "1.27.0")]
3586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3587pub const fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3588 unsafe {
3589 #[rustfmt::skip]
3590 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3591 0, 32, 1, 33, 2, 34, 3, 35,
3592 4, 36, 5, 37, 6, 38, 7, 39,
3593 16, 48, 17, 49, 18, 50, 19, 51,
3594 20, 52, 21, 53, 22, 54, 23, 55,
3595 ]);
3596 transmute(r)
3597 }
3598}
3599
3600#[inline]
3635#[target_feature(enable = "avx2")]
3636#[cfg_attr(test, assert_instr(vpunpckhwd))]
3637#[stable(feature = "simd_x86", since = "1.27.0")]
3638#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3639pub const fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3640 unsafe {
3641 let r: i16x16 = simd_shuffle!(
3642 a.as_i16x16(),
3643 b.as_i16x16(),
3644 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3645 );
3646 transmute(r)
3647 }
3648}
3649
3650#[inline]
3686#[target_feature(enable = "avx2")]
3687#[cfg_attr(test, assert_instr(vpunpcklwd))]
3688#[stable(feature = "simd_x86", since = "1.27.0")]
3689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3690pub const fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3691 unsafe {
3692 let r: i16x16 = simd_shuffle!(
3693 a.as_i16x16(),
3694 b.as_i16x16(),
3695 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3696 );
3697 transmute(r)
3698 }
3699}
3700
3701#[inline]
3730#[target_feature(enable = "avx2")]
3731#[cfg_attr(test, assert_instr(vunpckhps))]
3732#[stable(feature = "simd_x86", since = "1.27.0")]
3733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3734pub const fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3735 unsafe {
3736 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3737 transmute(r)
3738 }
3739}
3740
3741#[inline]
3770#[target_feature(enable = "avx2")]
3771#[cfg_attr(test, assert_instr(vunpcklps))]
3772#[stable(feature = "simd_x86", since = "1.27.0")]
3773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3774pub const fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3775 unsafe {
3776 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3777 transmute(r)
3778 }
3779}
3780
3781#[inline]
3810#[target_feature(enable = "avx2")]
3811#[cfg_attr(test, assert_instr(vunpckhpd))]
3812#[stable(feature = "simd_x86", since = "1.27.0")]
3813#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3814pub const fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3815 unsafe {
3816 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3817 transmute(r)
3818 }
3819}
3820
3821#[inline]
3850#[target_feature(enable = "avx2")]
3851#[cfg_attr(test, assert_instr(vunpcklpd))]
3852#[stable(feature = "simd_x86", since = "1.27.0")]
3853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3854pub const fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3855 unsafe {
3856 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3857 transmute(r)
3858 }
3859}
3860
3861#[inline]
3866#[target_feature(enable = "avx2")]
3867#[cfg_attr(test, assert_instr(vxorps))]
3868#[stable(feature = "simd_x86", since = "1.27.0")]
3869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3870pub const fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3871 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3872}
3873
3874#[inline]
3881#[target_feature(enable = "avx2")]
3882#[rustc_legacy_const_generics(1)]
3884#[stable(feature = "simd_x86", since = "1.27.0")]
3885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3886pub const fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3887 static_assert_uimm_bits!(INDEX, 5);
3888 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3889}
3890
3891#[inline]
3898#[target_feature(enable = "avx2")]
3899#[rustc_legacy_const_generics(1)]
3901#[stable(feature = "simd_x86", since = "1.27.0")]
3902#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3903pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3904 static_assert_uimm_bits!(INDEX, 4);
3905 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3906}
3907
3908#[allow(improper_ctypes)]
3909unsafe extern "C" {
3910 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3911 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3912 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3913 fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3914 #[link_name = "llvm.x86.avx2.mpsadbw"]
3915 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3916 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3917 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3918 #[link_name = "llvm.x86.avx2.psad.bw"]
3919 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3920 #[link_name = "llvm.x86.avx2.psign.b"]
3921 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3922 #[link_name = "llvm.x86.avx2.psign.w"]
3923 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3924 #[link_name = "llvm.x86.avx2.psign.d"]
3925 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3926 #[link_name = "llvm.x86.avx2.psll.w"]
3927 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3928 #[link_name = "llvm.x86.avx2.psll.d"]
3929 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3930 #[link_name = "llvm.x86.avx2.psll.q"]
3931 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3932 #[link_name = "llvm.x86.avx2.psra.w"]
3933 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3934 #[link_name = "llvm.x86.avx2.psra.d"]
3935 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3936 #[link_name = "llvm.x86.avx2.psrl.w"]
3937 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3938 #[link_name = "llvm.x86.avx2.psrl.d"]
3939 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3940 #[link_name = "llvm.x86.avx2.psrl.q"]
3941 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3942 #[link_name = "llvm.x86.avx2.pshuf.b"]
3943 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3944 #[link_name = "llvm.x86.avx2.permd"]
3945 fn permd(a: u32x8, b: u32x8) -> u32x8;
3946 #[link_name = "llvm.x86.avx2.permps"]
3947 fn permps(a: __m256, b: i32x8) -> __m256;
3948 #[link_name = "llvm.x86.avx2.gather.d.d"]
3949 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3950 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3951 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3952 #[link_name = "llvm.x86.avx2.gather.d.q"]
3953 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3954 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3955 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3956 #[link_name = "llvm.x86.avx2.gather.q.d"]
3957 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3958 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3959 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3960 #[link_name = "llvm.x86.avx2.gather.q.q"]
3961 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3962 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3963 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3964 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3965 fn pgatherdpd(
3966 src: __m128d,
3967 slice: *const i8,
3968 offsets: i32x4,
3969 mask: __m128d,
3970 scale: i8,
3971 ) -> __m128d;
3972 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3973 fn vpgatherdpd(
3974 src: __m256d,
3975 slice: *const i8,
3976 offsets: i32x4,
3977 mask: __m256d,
3978 scale: i8,
3979 ) -> __m256d;
3980 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3981 fn pgatherqpd(
3982 src: __m128d,
3983 slice: *const i8,
3984 offsets: i64x2,
3985 mask: __m128d,
3986 scale: i8,
3987 ) -> __m128d;
3988 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3989 fn vpgatherqpd(
3990 src: __m256d,
3991 slice: *const i8,
3992 offsets: i64x4,
3993 mask: __m256d,
3994 scale: i8,
3995 ) -> __m256d;
3996 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3997 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3998 -> __m128;
3999 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
4000 fn vpgatherdps(
4001 src: __m256,
4002 slice: *const i8,
4003 offsets: i32x8,
4004 mask: __m256,
4005 scale: i8,
4006 ) -> __m256;
4007 #[link_name = "llvm.x86.avx2.gather.q.ps"]
4008 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
4009 -> __m128;
4010 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
4011 fn vpgatherqps(
4012 src: __m128,
4013 slice: *const i8,
4014 offsets: i64x4,
4015 mask: __m128,
4016 scale: i8,
4017 ) -> __m128;
4018}
4019
4020#[cfg(test)]
4021mod tests {
4022 use crate::core_arch::assert_eq_const as assert_eq;
4023
4024 use stdarch_test::simd_test;
4025
4026 use crate::core_arch::x86::*;
4027
4028 #[simd_test(enable = "avx2")]
4029 const fn test_mm256_abs_epi32() {
4030 #[rustfmt::skip]
4031 let a = _mm256_setr_epi32(
4032 0, 1, -1, i32::MAX,
4033 i32::MIN, 100, -100, -32,
4034 );
4035 let r = _mm256_abs_epi32(a);
4036 #[rustfmt::skip]
4037 let e = _mm256_setr_epi32(
4038 0, 1, 1, i32::MAX,
4039 i32::MAX.wrapping_add(1), 100, 100, 32,
4040 );
4041 assert_eq_m256i(r, e);
4042 }
4043
4044 #[simd_test(enable = "avx2")]
4045 const fn test_mm256_abs_epi16() {
4046 #[rustfmt::skip]
4047 let a = _mm256_setr_epi16(
4048 0, 1, -1, 2, -2, 3, -3, 4,
4049 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
4050 );
4051 let r = _mm256_abs_epi16(a);
4052 #[rustfmt::skip]
4053 let e = _mm256_setr_epi16(
4054 0, 1, 1, 2, 2, 3, 3, 4,
4055 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
4056 );
4057 assert_eq_m256i(r, e);
4058 }
4059
4060 #[simd_test(enable = "avx2")]
4061 const fn test_mm256_abs_epi8() {
4062 #[rustfmt::skip]
4063 let a = _mm256_setr_epi8(
4064 0, 1, -1, 2, -2, 3, -3, 4,
4065 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
4066 0, 1, -1, 2, -2, 3, -3, 4,
4067 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
4068 );
4069 let r = _mm256_abs_epi8(a);
4070 #[rustfmt::skip]
4071 let e = _mm256_setr_epi8(
4072 0, 1, 1, 2, 2, 3, 3, 4,
4073 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
4074 0, 1, 1, 2, 2, 3, 3, 4,
4075 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
4076 );
4077 assert_eq_m256i(r, e);
4078 }
4079
4080 #[simd_test(enable = "avx2")]
4081 const fn test_mm256_add_epi64() {
4082 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4083 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
4084 let r = _mm256_add_epi64(a, b);
4085 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4086 assert_eq_m256i(r, e);
4087 }
4088
4089 #[simd_test(enable = "avx2")]
4090 const fn test_mm256_add_epi32() {
4091 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4092 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4093 let r = _mm256_add_epi32(a, b);
4094 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4095 assert_eq_m256i(r, e);
4096 }
4097
4098 #[simd_test(enable = "avx2")]
4099 const fn test_mm256_add_epi16() {
4100 #[rustfmt::skip]
4101 let a = _mm256_setr_epi16(
4102 0, 1, 2, 3, 4, 5, 6, 7,
4103 8, 9, 10, 11, 12, 13, 14, 15,
4104 );
4105 #[rustfmt::skip]
4106 let b = _mm256_setr_epi16(
4107 0, 1, 2, 3, 4, 5, 6, 7,
4108 8, 9, 10, 11, 12, 13, 14, 15,
4109 );
4110 let r = _mm256_add_epi16(a, b);
4111 #[rustfmt::skip]
4112 let e = _mm256_setr_epi16(
4113 0, 2, 4, 6, 8, 10, 12, 14,
4114 16, 18, 20, 22, 24, 26, 28, 30,
4115 );
4116 assert_eq_m256i(r, e);
4117 }
4118
4119 #[simd_test(enable = "avx2")]
4120 const fn test_mm256_add_epi8() {
4121 #[rustfmt::skip]
4122 let a = _mm256_setr_epi8(
4123 0, 1, 2, 3, 4, 5, 6, 7,
4124 8, 9, 10, 11, 12, 13, 14, 15,
4125 16, 17, 18, 19, 20, 21, 22, 23,
4126 24, 25, 26, 27, 28, 29, 30, 31,
4127 );
4128 #[rustfmt::skip]
4129 let b = _mm256_setr_epi8(
4130 0, 1, 2, 3, 4, 5, 6, 7,
4131 8, 9, 10, 11, 12, 13, 14, 15,
4132 16, 17, 18, 19, 20, 21, 22, 23,
4133 24, 25, 26, 27, 28, 29, 30, 31,
4134 );
4135 let r = _mm256_add_epi8(a, b);
4136 #[rustfmt::skip]
4137 let e = _mm256_setr_epi8(
4138 0, 2, 4, 6, 8, 10, 12, 14,
4139 16, 18, 20, 22, 24, 26, 28, 30,
4140 32, 34, 36, 38, 40, 42, 44, 46,
4141 48, 50, 52, 54, 56, 58, 60, 62,
4142 );
4143 assert_eq_m256i(r, e);
4144 }
4145
4146 #[simd_test(enable = "avx2")]
4147 const fn test_mm256_adds_epi8() {
4148 #[rustfmt::skip]
4149 let a = _mm256_setr_epi8(
4150 0, 1, 2, 3, 4, 5, 6, 7,
4151 8, 9, 10, 11, 12, 13, 14, 15,
4152 16, 17, 18, 19, 20, 21, 22, 23,
4153 24, 25, 26, 27, 28, 29, 30, 31,
4154 );
4155 #[rustfmt::skip]
4156 let b = _mm256_setr_epi8(
4157 32, 33, 34, 35, 36, 37, 38, 39,
4158 40, 41, 42, 43, 44, 45, 46, 47,
4159 48, 49, 50, 51, 52, 53, 54, 55,
4160 56, 57, 58, 59, 60, 61, 62, 63,
4161 );
4162 let r = _mm256_adds_epi8(a, b);
4163 #[rustfmt::skip]
4164 let e = _mm256_setr_epi8(
4165 32, 34, 36, 38, 40, 42, 44, 46,
4166 48, 50, 52, 54, 56, 58, 60, 62,
4167 64, 66, 68, 70, 72, 74, 76, 78,
4168 80, 82, 84, 86, 88, 90, 92, 94,
4169 );
4170 assert_eq_m256i(r, e);
4171 }
4172
4173 #[simd_test(enable = "avx2")]
4174 fn test_mm256_adds_epi8_saturate_positive() {
4175 let a = _mm256_set1_epi8(0x7F);
4176 let b = _mm256_set1_epi8(1);
4177 let r = _mm256_adds_epi8(a, b);
4178 assert_eq_m256i(r, a);
4179 }
4180
4181 #[simd_test(enable = "avx2")]
4182 fn test_mm256_adds_epi8_saturate_negative() {
4183 let a = _mm256_set1_epi8(-0x80);
4184 let b = _mm256_set1_epi8(-1);
4185 let r = _mm256_adds_epi8(a, b);
4186 assert_eq_m256i(r, a);
4187 }
4188
4189 #[simd_test(enable = "avx2")]
4190 const fn test_mm256_adds_epi16() {
4191 #[rustfmt::skip]
4192 let a = _mm256_setr_epi16(
4193 0, 1, 2, 3, 4, 5, 6, 7,
4194 8, 9, 10, 11, 12, 13, 14, 15,
4195 );
4196 #[rustfmt::skip]
4197 let b = _mm256_setr_epi16(
4198 32, 33, 34, 35, 36, 37, 38, 39,
4199 40, 41, 42, 43, 44, 45, 46, 47,
4200 );
4201 let r = _mm256_adds_epi16(a, b);
4202 #[rustfmt::skip]
4203 let e = _mm256_setr_epi16(
4204 32, 34, 36, 38, 40, 42, 44, 46,
4205 48, 50, 52, 54, 56, 58, 60, 62,
4206 );
4207
4208 assert_eq_m256i(r, e);
4209 }
4210
4211 #[simd_test(enable = "avx2")]
4212 fn test_mm256_adds_epi16_saturate_positive() {
4213 let a = _mm256_set1_epi16(0x7FFF);
4214 let b = _mm256_set1_epi16(1);
4215 let r = _mm256_adds_epi16(a, b);
4216 assert_eq_m256i(r, a);
4217 }
4218
4219 #[simd_test(enable = "avx2")]
4220 fn test_mm256_adds_epi16_saturate_negative() {
4221 let a = _mm256_set1_epi16(-0x8000);
4222 let b = _mm256_set1_epi16(-1);
4223 let r = _mm256_adds_epi16(a, b);
4224 assert_eq_m256i(r, a);
4225 }
4226
4227 #[simd_test(enable = "avx2")]
4228 const fn test_mm256_adds_epu8() {
4229 #[rustfmt::skip]
4230 let a = _mm256_setr_epi8(
4231 0, 1, 2, 3, 4, 5, 6, 7,
4232 8, 9, 10, 11, 12, 13, 14, 15,
4233 16, 17, 18, 19, 20, 21, 22, 23,
4234 24, 25, 26, 27, 28, 29, 30, 31,
4235 );
4236 #[rustfmt::skip]
4237 let b = _mm256_setr_epi8(
4238 32, 33, 34, 35, 36, 37, 38, 39,
4239 40, 41, 42, 43, 44, 45, 46, 47,
4240 48, 49, 50, 51, 52, 53, 54, 55,
4241 56, 57, 58, 59, 60, 61, 62, 63,
4242 );
4243 let r = _mm256_adds_epu8(a, b);
4244 #[rustfmt::skip]
4245 let e = _mm256_setr_epi8(
4246 32, 34, 36, 38, 40, 42, 44, 46,
4247 48, 50, 52, 54, 56, 58, 60, 62,
4248 64, 66, 68, 70, 72, 74, 76, 78,
4249 80, 82, 84, 86, 88, 90, 92, 94,
4250 );
4251 assert_eq_m256i(r, e);
4252 }
4253
4254 #[simd_test(enable = "avx2")]
4255 fn test_mm256_adds_epu8_saturate() {
4256 let a = _mm256_set1_epi8(!0);
4257 let b = _mm256_set1_epi8(1);
4258 let r = _mm256_adds_epu8(a, b);
4259 assert_eq_m256i(r, a);
4260 }
4261
4262 #[simd_test(enable = "avx2")]
4263 const fn test_mm256_adds_epu16() {
4264 #[rustfmt::skip]
4265 let a = _mm256_setr_epi16(
4266 0, 1, 2, 3, 4, 5, 6, 7,
4267 8, 9, 10, 11, 12, 13, 14, 15,
4268 );
4269 #[rustfmt::skip]
4270 let b = _mm256_setr_epi16(
4271 32, 33, 34, 35, 36, 37, 38, 39,
4272 40, 41, 42, 43, 44, 45, 46, 47,
4273 );
4274 let r = _mm256_adds_epu16(a, b);
4275 #[rustfmt::skip]
4276 let e = _mm256_setr_epi16(
4277 32, 34, 36, 38, 40, 42, 44, 46,
4278 48, 50, 52, 54, 56, 58, 60, 62,
4279 );
4280
4281 assert_eq_m256i(r, e);
4282 }
4283
4284 #[simd_test(enable = "avx2")]
4285 fn test_mm256_adds_epu16_saturate() {
4286 let a = _mm256_set1_epi16(!0);
4287 let b = _mm256_set1_epi16(1);
4288 let r = _mm256_adds_epu16(a, b);
4289 assert_eq_m256i(r, a);
4290 }
4291
4292 #[simd_test(enable = "avx2")]
4293 const fn test_mm256_and_si256() {
4294 let a = _mm256_set1_epi8(5);
4295 let b = _mm256_set1_epi8(3);
4296 let got = _mm256_and_si256(a, b);
4297 assert_eq_m256i(got, _mm256_set1_epi8(1));
4298 }
4299
4300 #[simd_test(enable = "avx2")]
4301 const fn test_mm256_andnot_si256() {
4302 let a = _mm256_set1_epi8(5);
4303 let b = _mm256_set1_epi8(3);
4304 let got = _mm256_andnot_si256(a, b);
4305 assert_eq_m256i(got, _mm256_set1_epi8(2));
4306 }
4307
4308 #[simd_test(enable = "avx2")]
4309 const fn test_mm256_avg_epu8() {
4310 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4311 let r = _mm256_avg_epu8(a, b);
4312 assert_eq_m256i(r, _mm256_set1_epi8(6));
4313 }
4314
4315 #[simd_test(enable = "avx2")]
4316 const fn test_mm256_avg_epu16() {
4317 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4318 let r = _mm256_avg_epu16(a, b);
4319 assert_eq_m256i(r, _mm256_set1_epi16(6));
4320 }
4321
4322 #[simd_test(enable = "avx2")]
4323 const fn test_mm_blend_epi32() {
4324 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4325 let e = _mm_setr_epi32(9, 3, 3, 3);
4326 let r = _mm_blend_epi32::<0x01>(a, b);
4327 assert_eq_m128i(r, e);
4328
4329 let r = _mm_blend_epi32::<0x0E>(b, a);
4330 assert_eq_m128i(r, e);
4331 }
4332
4333 #[simd_test(enable = "avx2")]
4334 const fn test_mm256_blend_epi32() {
4335 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4336 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4337 let r = _mm256_blend_epi32::<0x01>(a, b);
4338 assert_eq_m256i(r, e);
4339
4340 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4341 let r = _mm256_blend_epi32::<0x82>(a, b);
4342 assert_eq_m256i(r, e);
4343
4344 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4345 let r = _mm256_blend_epi32::<0x7C>(a, b);
4346 assert_eq_m256i(r, e);
4347 }
4348
4349 #[simd_test(enable = "avx2")]
4350 const fn test_mm256_blend_epi16() {
4351 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4352 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4353 let r = _mm256_blend_epi16::<0x01>(a, b);
4354 assert_eq_m256i(r, e);
4355
4356 let r = _mm256_blend_epi16::<0xFE>(b, a);
4357 assert_eq_m256i(r, e);
4358 }
4359
4360 #[simd_test(enable = "avx2")]
4361 const fn test_mm256_blendv_epi8() {
4362 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4363 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4364 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4365 let r = _mm256_blendv_epi8(a, b, mask);
4366 assert_eq_m256i(r, e);
4367 }
4368
4369 #[simd_test(enable = "avx2")]
4370 const fn test_mm_broadcastb_epi8() {
4371 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4372 let res = _mm_broadcastb_epi8(a);
4373 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4374 }
4375
4376 #[simd_test(enable = "avx2")]
4377 const fn test_mm256_broadcastb_epi8() {
4378 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4379 let res = _mm256_broadcastb_epi8(a);
4380 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4381 }
4382
4383 #[simd_test(enable = "avx2")]
4384 const fn test_mm_broadcastd_epi32() {
4385 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4386 let res = _mm_broadcastd_epi32(a);
4387 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4388 }
4389
4390 #[simd_test(enable = "avx2")]
4391 const fn test_mm256_broadcastd_epi32() {
4392 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4393 let res = _mm256_broadcastd_epi32(a);
4394 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4395 }
4396
4397 #[simd_test(enable = "avx2")]
4398 const fn test_mm_broadcastq_epi64() {
4399 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4400 let res = _mm_broadcastq_epi64(a);
4401 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4402 }
4403
4404 #[simd_test(enable = "avx2")]
4405 const fn test_mm256_broadcastq_epi64() {
4406 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4407 let res = _mm256_broadcastq_epi64(a);
4408 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4409 }
4410
4411 #[simd_test(enable = "avx2")]
4412 const fn test_mm_broadcastsd_pd() {
4413 let a = _mm_setr_pd(6.88, 3.44);
4414 let res = _mm_broadcastsd_pd(a);
4415 assert_eq_m128d(res, _mm_set1_pd(6.88));
4416 }
4417
4418 #[simd_test(enable = "avx2")]
4419 const fn test_mm256_broadcastsd_pd() {
4420 let a = _mm_setr_pd(6.88, 3.44);
4421 let res = _mm256_broadcastsd_pd(a);
4422 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4423 }
4424
4425 #[simd_test(enable = "avx2")]
4426 const fn test_mm_broadcastsi128_si256() {
4427 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4428 let res = _mm_broadcastsi128_si256(a);
4429 let retval = _mm256_setr_epi64x(
4430 0x0987654321012334,
4431 0x5678909876543210,
4432 0x0987654321012334,
4433 0x5678909876543210,
4434 );
4435 assert_eq_m256i(res, retval);
4436 }
4437
4438 #[simd_test(enable = "avx2")]
4439 const fn test_mm256_broadcastsi128_si256() {
4440 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4441 let res = _mm256_broadcastsi128_si256(a);
4442 let retval = _mm256_setr_epi64x(
4443 0x0987654321012334,
4444 0x5678909876543210,
4445 0x0987654321012334,
4446 0x5678909876543210,
4447 );
4448 assert_eq_m256i(res, retval);
4449 }
4450
4451 #[simd_test(enable = "avx2")]
4452 const fn test_mm_broadcastss_ps() {
4453 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4454 let res = _mm_broadcastss_ps(a);
4455 assert_eq_m128(res, _mm_set1_ps(6.88));
4456 }
4457
4458 #[simd_test(enable = "avx2")]
4459 const fn test_mm256_broadcastss_ps() {
4460 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4461 let res = _mm256_broadcastss_ps(a);
4462 assert_eq_m256(res, _mm256_set1_ps(6.88));
4463 }
4464
4465 #[simd_test(enable = "avx2")]
4466 const fn test_mm_broadcastw_epi16() {
4467 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4468 let res = _mm_broadcastw_epi16(a);
4469 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4470 }
4471
4472 #[simd_test(enable = "avx2")]
4473 const fn test_mm256_broadcastw_epi16() {
4474 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4475 let res = _mm256_broadcastw_epi16(a);
4476 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4477 }
4478
4479 #[simd_test(enable = "avx2")]
4480 const fn test_mm256_cmpeq_epi8() {
4481 #[rustfmt::skip]
4482 let a = _mm256_setr_epi8(
4483 0, 1, 2, 3, 4, 5, 6, 7,
4484 8, 9, 10, 11, 12, 13, 14, 15,
4485 16, 17, 18, 19, 20, 21, 22, 23,
4486 24, 25, 26, 27, 28, 29, 30, 31,
4487 );
4488 #[rustfmt::skip]
4489 let b = _mm256_setr_epi8(
4490 31, 30, 2, 28, 27, 26, 25, 24,
4491 23, 22, 21, 20, 19, 18, 17, 16,
4492 15, 14, 13, 12, 11, 10, 9, 8,
4493 7, 6, 5, 4, 3, 2, 1, 0,
4494 );
4495 let r = _mm256_cmpeq_epi8(a, b);
4496 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4497 }
4498
4499 #[simd_test(enable = "avx2")]
4500 const fn test_mm256_cmpeq_epi16() {
4501 #[rustfmt::skip]
4502 let a = _mm256_setr_epi16(
4503 0, 1, 2, 3, 4, 5, 6, 7,
4504 8, 9, 10, 11, 12, 13, 14, 15,
4505 );
4506 #[rustfmt::skip]
4507 let b = _mm256_setr_epi16(
4508 15, 14, 2, 12, 11, 10, 9, 8,
4509 7, 6, 5, 4, 3, 2, 1, 0,
4510 );
4511 let r = _mm256_cmpeq_epi16(a, b);
4512 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4513 }
4514
4515 #[simd_test(enable = "avx2")]
4516 const fn test_mm256_cmpeq_epi32() {
4517 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4518 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4519 let r = _mm256_cmpeq_epi32(a, b);
4520 let e = _mm256_set1_epi32(0);
4521 let e = _mm256_insert_epi32::<2>(e, !0);
4522 assert_eq_m256i(r, e);
4523 }
4524
4525 #[simd_test(enable = "avx2")]
4526 const fn test_mm256_cmpeq_epi64() {
4527 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4528 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4529 let r = _mm256_cmpeq_epi64(a, b);
4530 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4531 }
4532
4533 #[simd_test(enable = "avx2")]
4534 const fn test_mm256_cmpgt_epi8() {
4535 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4536 let b = _mm256_set1_epi8(0);
4537 let r = _mm256_cmpgt_epi8(a, b);
4538 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4539 }
4540
4541 #[simd_test(enable = "avx2")]
4542 const fn test_mm256_cmpgt_epi16() {
4543 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4544 let b = _mm256_set1_epi16(0);
4545 let r = _mm256_cmpgt_epi16(a, b);
4546 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4547 }
4548
4549 #[simd_test(enable = "avx2")]
4550 const fn test_mm256_cmpgt_epi32() {
4551 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4552 let b = _mm256_set1_epi32(0);
4553 let r = _mm256_cmpgt_epi32(a, b);
4554 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4555 }
4556
4557 #[simd_test(enable = "avx2")]
4558 const fn test_mm256_cmpgt_epi64() {
4559 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4560 let b = _mm256_set1_epi64x(0);
4561 let r = _mm256_cmpgt_epi64(a, b);
4562 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4563 }
4564
4565 #[simd_test(enable = "avx2")]
4566 const fn test_mm256_cvtepi8_epi16() {
4567 #[rustfmt::skip]
4568 let a = _mm_setr_epi8(
4569 0, 0, -1, 1, -2, 2, -3, 3,
4570 -4, 4, -5, 5, -6, 6, -7, 7,
4571 );
4572 #[rustfmt::skip]
4573 let r = _mm256_setr_epi16(
4574 0, 0, -1, 1, -2, 2, -3, 3,
4575 -4, 4, -5, 5, -6, 6, -7, 7,
4576 );
4577 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4578 }
4579
4580 #[simd_test(enable = "avx2")]
4581 const fn test_mm256_cvtepi8_epi32() {
4582 #[rustfmt::skip]
4583 let a = _mm_setr_epi8(
4584 0, 0, -1, 1, -2, 2, -3, 3,
4585 -4, 4, -5, 5, -6, 6, -7, 7,
4586 );
4587 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4588 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4589 }
4590
4591 #[simd_test(enable = "avx2")]
4592 const fn test_mm256_cvtepi8_epi64() {
4593 #[rustfmt::skip]
4594 let a = _mm_setr_epi8(
4595 0, 0, -1, 1, -2, 2, -3, 3,
4596 -4, 4, -5, 5, -6, 6, -7, 7,
4597 );
4598 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4599 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4600 }
4601
4602 #[simd_test(enable = "avx2")]
4603 const fn test_mm256_cvtepi16_epi32() {
4604 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4605 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4606 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4607 }
4608
4609 #[simd_test(enable = "avx2")]
4610 const fn test_mm256_cvtepi16_epi64() {
4611 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4612 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4613 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4614 }
4615
4616 #[simd_test(enable = "avx2")]
4617 const fn test_mm256_cvtepi32_epi64() {
4618 let a = _mm_setr_epi32(0, 0, -1, 1);
4619 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4620 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4621 }
4622
4623 #[simd_test(enable = "avx2")]
4624 const fn test_mm256_cvtepu16_epi32() {
4625 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4626 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4627 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4628 }
4629
4630 #[simd_test(enable = "avx2")]
4631 const fn test_mm256_cvtepu16_epi64() {
4632 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4633 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4634 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4635 }
4636
4637 #[simd_test(enable = "avx2")]
4638 const fn test_mm256_cvtepu32_epi64() {
4639 let a = _mm_setr_epi32(0, 1, 2, 3);
4640 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4641 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4642 }
4643
4644 #[simd_test(enable = "avx2")]
4645 const fn test_mm256_cvtepu8_epi16() {
4646 #[rustfmt::skip]
4647 let a = _mm_setr_epi8(
4648 0, 1, 2, 3, 4, 5, 6, 7,
4649 8, 9, 10, 11, 12, 13, 14, 15,
4650 );
4651 #[rustfmt::skip]
4652 let r = _mm256_setr_epi16(
4653 0, 1, 2, 3, 4, 5, 6, 7,
4654 8, 9, 10, 11, 12, 13, 14, 15,
4655 );
4656 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4657 }
4658
4659 #[simd_test(enable = "avx2")]
4660 const fn test_mm256_cvtepu8_epi32() {
4661 #[rustfmt::skip]
4662 let a = _mm_setr_epi8(
4663 0, 1, 2, 3, 4, 5, 6, 7,
4664 8, 9, 10, 11, 12, 13, 14, 15,
4665 );
4666 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4667 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4668 }
4669
4670 #[simd_test(enable = "avx2")]
4671 const fn test_mm256_cvtepu8_epi64() {
4672 #[rustfmt::skip]
4673 let a = _mm_setr_epi8(
4674 0, 1, 2, 3, 4, 5, 6, 7,
4675 8, 9, 10, 11, 12, 13, 14, 15,
4676 );
4677 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4678 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4679 }
4680
4681 #[simd_test(enable = "avx2")]
4682 const fn test_mm256_extracti128_si256() {
4683 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4684 let r = _mm256_extracti128_si256::<1>(a);
4685 let e = _mm_setr_epi64x(3, 4);
4686 assert_eq_m128i(r, e);
4687 }
4688
4689 #[simd_test(enable = "avx2")]
4690 const fn test_mm256_hadd_epi16() {
4691 let a = _mm256_set1_epi16(2);
4692 let b = _mm256_set1_epi16(4);
4693 let r = _mm256_hadd_epi16(a, b);
4694 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4695 assert_eq_m256i(r, e);
4696 }
4697
4698 #[simd_test(enable = "avx2")]
4699 const fn test_mm256_hadd_epi32() {
4700 let a = _mm256_set1_epi32(2);
4701 let b = _mm256_set1_epi32(4);
4702 let r = _mm256_hadd_epi32(a, b);
4703 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4704 assert_eq_m256i(r, e);
4705 }
4706
4707 #[simd_test(enable = "avx2")]
4708 fn test_mm256_hadds_epi16() {
4709 let a = _mm256_set1_epi16(2);
4710 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4711 let a = _mm256_insert_epi16::<1>(a, 1);
4712 let b = _mm256_set1_epi16(4);
4713 let r = _mm256_hadds_epi16(a, b);
4714 #[rustfmt::skip]
4715 let e = _mm256_setr_epi16(
4716 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4717 4, 4, 4, 4, 8, 8, 8, 8,
4718 );
4719 assert_eq_m256i(r, e);
4720 }
4721
4722 #[simd_test(enable = "avx2")]
4723 const fn test_mm256_hsub_epi16() {
4724 let a = _mm256_set1_epi16(2);
4725 let b = _mm256_set1_epi16(4);
4726 let r = _mm256_hsub_epi16(a, b);
4727 let e = _mm256_set1_epi16(0);
4728 assert_eq_m256i(r, e);
4729 }
4730
4731 #[simd_test(enable = "avx2")]
4732 const fn test_mm256_hsub_epi32() {
4733 let a = _mm256_set1_epi32(2);
4734 let b = _mm256_set1_epi32(4);
4735 let r = _mm256_hsub_epi32(a, b);
4736 let e = _mm256_set1_epi32(0);
4737 assert_eq_m256i(r, e);
4738 }
4739
4740 #[simd_test(enable = "avx2")]
4741 fn test_mm256_hsubs_epi16() {
4742 let a = _mm256_set1_epi16(2);
4743 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4744 let a = _mm256_insert_epi16::<1>(a, -1);
4745 let b = _mm256_set1_epi16(4);
4746 let r = _mm256_hsubs_epi16(a, b);
4747 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4748 assert_eq_m256i(r, e);
4749 }
4750
4751 #[simd_test(enable = "avx2")]
4752 fn test_mm256_madd_epi16() {
4753 let a = _mm256_set1_epi16(2);
4754 let b = _mm256_set1_epi16(4);
4755 let r = _mm256_madd_epi16(a, b);
4756 let e = _mm256_set1_epi32(16);
4757 assert_eq_m256i(r, e);
4758 }
4759
4760 #[target_feature(enable = "avx2")]
4761 #[cfg_attr(test, assert_instr(vpmaddwd))]
4762 unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i {
4763 let one_v = _mm256_set1_epi16(1);
4767 _mm256_madd_epi16(v, one_v)
4768 }
4769
4770 #[target_feature(enable = "avx2")]
4771 #[cfg_attr(test, assert_instr(vpmaddwd))]
4772 unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i {
4773 let shift_value = _mm256_set1_epi32(12i32);
4777 _mm256_madd_epi16(v, shift_value)
4778 }
4779
4780 #[simd_test(enable = "avx2")]
4781 const fn test_mm256_inserti128_si256() {
4782 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4783 let b = _mm_setr_epi64x(7, 8);
4784 let r = _mm256_inserti128_si256::<1>(a, b);
4785 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4786 assert_eq_m256i(r, e);
4787 }
4788
4789 #[simd_test(enable = "avx2")]
4790 fn test_mm256_maddubs_epi16() {
4791 let a = _mm256_set1_epi8(2);
4792 let b = _mm256_set1_epi8(4);
4793 let r = _mm256_maddubs_epi16(a, b);
4794 let e = _mm256_set1_epi16(16);
4795 assert_eq_m256i(r, e);
4796 }
4797
4798 #[simd_test(enable = "avx2")]
4799 const fn test_mm_maskload_epi32() {
4800 let nums = [1, 2, 3, 4];
4801 let a = &nums as *const i32;
4802 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4803 let r = unsafe { _mm_maskload_epi32(a, mask) };
4804 let e = _mm_setr_epi32(1, 0, 0, 4);
4805 assert_eq_m128i(r, e);
4806 }
4807
4808 #[simd_test(enable = "avx2")]
4809 const fn test_mm256_maskload_epi32() {
4810 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4811 let a = &nums as *const i32;
4812 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4813 let r = unsafe { _mm256_maskload_epi32(a, mask) };
4814 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4815 assert_eq_m256i(r, e);
4816 }
4817
4818 #[simd_test(enable = "avx2")]
4819 const fn test_mm_maskload_epi64() {
4820 let nums = [1_i64, 2_i64];
4821 let a = &nums as *const i64;
4822 let mask = _mm_setr_epi64x(0, -1);
4823 let r = unsafe { _mm_maskload_epi64(a, mask) };
4824 let e = _mm_setr_epi64x(0, 2);
4825 assert_eq_m128i(r, e);
4826 }
4827
4828 #[simd_test(enable = "avx2")]
4829 const fn test_mm256_maskload_epi64() {
4830 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4831 let a = &nums as *const i64;
4832 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4833 let r = unsafe { _mm256_maskload_epi64(a, mask) };
4834 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4835 assert_eq_m256i(r, e);
4836 }
4837
4838 #[simd_test(enable = "avx2")]
4839 const fn test_mm_maskstore_epi32() {
4840 let a = _mm_setr_epi32(1, 2, 3, 4);
4841 let mut arr = [-1, -1, -1, -1];
4842 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4843 unsafe {
4844 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4845 }
4846 let e = [1, -1, -1, 4];
4847 assert_eq!(arr, e);
4848 }
4849
4850 #[simd_test(enable = "avx2")]
4851 const fn test_mm256_maskstore_epi32() {
4852 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4853 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4854 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4855 unsafe {
4856 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4857 }
4858 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4859 assert_eq!(arr, e);
4860 }
4861
4862 #[simd_test(enable = "avx2")]
4863 const fn test_mm_maskstore_epi64() {
4864 let a = _mm_setr_epi64x(1_i64, 2_i64);
4865 let mut arr = [-1_i64, -1_i64];
4866 let mask = _mm_setr_epi64x(0, -1);
4867 unsafe {
4868 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4869 }
4870 let e = [-1, 2];
4871 assert_eq!(arr, e);
4872 }
4873
4874 #[simd_test(enable = "avx2")]
4875 const fn test_mm256_maskstore_epi64() {
4876 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4877 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4878 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4879 unsafe {
4880 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4881 }
4882 let e = [-1, 2, 3, -1];
4883 assert_eq!(arr, e);
4884 }
4885
4886 #[simd_test(enable = "avx2")]
4887 const fn test_mm256_max_epi16() {
4888 let a = _mm256_set1_epi16(2);
4889 let b = _mm256_set1_epi16(4);
4890 let r = _mm256_max_epi16(a, b);
4891 assert_eq_m256i(r, b);
4892 }
4893
4894 #[simd_test(enable = "avx2")]
4895 const fn test_mm256_max_epi32() {
4896 let a = _mm256_set1_epi32(2);
4897 let b = _mm256_set1_epi32(4);
4898 let r = _mm256_max_epi32(a, b);
4899 assert_eq_m256i(r, b);
4900 }
4901
4902 #[simd_test(enable = "avx2")]
4903 const fn test_mm256_max_epi8() {
4904 let a = _mm256_set1_epi8(2);
4905 let b = _mm256_set1_epi8(4);
4906 let r = _mm256_max_epi8(a, b);
4907 assert_eq_m256i(r, b);
4908 }
4909
4910 #[simd_test(enable = "avx2")]
4911 const fn test_mm256_max_epu16() {
4912 let a = _mm256_set1_epi16(2);
4913 let b = _mm256_set1_epi16(4);
4914 let r = _mm256_max_epu16(a, b);
4915 assert_eq_m256i(r, b);
4916 }
4917
4918 #[simd_test(enable = "avx2")]
4919 const fn test_mm256_max_epu32() {
4920 let a = _mm256_set1_epi32(2);
4921 let b = _mm256_set1_epi32(4);
4922 let r = _mm256_max_epu32(a, b);
4923 assert_eq_m256i(r, b);
4924 }
4925
4926 #[simd_test(enable = "avx2")]
4927 const fn test_mm256_max_epu8() {
4928 let a = _mm256_set1_epi8(2);
4929 let b = _mm256_set1_epi8(4);
4930 let r = _mm256_max_epu8(a, b);
4931 assert_eq_m256i(r, b);
4932 }
4933
4934 #[simd_test(enable = "avx2")]
4935 const fn test_mm256_min_epi16() {
4936 let a = _mm256_set1_epi16(2);
4937 let b = _mm256_set1_epi16(4);
4938 let r = _mm256_min_epi16(a, b);
4939 assert_eq_m256i(r, a);
4940 }
4941
4942 #[simd_test(enable = "avx2")]
4943 const fn test_mm256_min_epi32() {
4944 let a = _mm256_set1_epi32(2);
4945 let b = _mm256_set1_epi32(4);
4946 let r = _mm256_min_epi32(a, b);
4947 assert_eq_m256i(r, a);
4948 }
4949
4950 #[simd_test(enable = "avx2")]
4951 const fn test_mm256_min_epi8() {
4952 let a = _mm256_set1_epi8(2);
4953 let b = _mm256_set1_epi8(4);
4954 let r = _mm256_min_epi8(a, b);
4955 assert_eq_m256i(r, a);
4956 }
4957
4958 #[simd_test(enable = "avx2")]
4959 const fn test_mm256_min_epu16() {
4960 let a = _mm256_set1_epi16(2);
4961 let b = _mm256_set1_epi16(4);
4962 let r = _mm256_min_epu16(a, b);
4963 assert_eq_m256i(r, a);
4964 }
4965
4966 #[simd_test(enable = "avx2")]
4967 const fn test_mm256_min_epu32() {
4968 let a = _mm256_set1_epi32(2);
4969 let b = _mm256_set1_epi32(4);
4970 let r = _mm256_min_epu32(a, b);
4971 assert_eq_m256i(r, a);
4972 }
4973
4974 #[simd_test(enable = "avx2")]
4975 const fn test_mm256_min_epu8() {
4976 let a = _mm256_set1_epi8(2);
4977 let b = _mm256_set1_epi8(4);
4978 let r = _mm256_min_epu8(a, b);
4979 assert_eq_m256i(r, a);
4980 }
4981
4982 #[simd_test(enable = "avx2")]
4983 const fn test_mm256_movemask_epi8() {
4984 let a = _mm256_set1_epi8(-1);
4985 let r = _mm256_movemask_epi8(a);
4986 let e = -1;
4987 assert_eq!(r, e);
4988 }
4989
4990 #[simd_test(enable = "avx2")]
4991 fn test_mm256_mpsadbw_epu8() {
4992 let a = _mm256_set1_epi8(2);
4993 let b = _mm256_set1_epi8(4);
4994 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4995 let e = _mm256_set1_epi16(8);
4996 assert_eq_m256i(r, e);
4997 }
4998
4999 #[simd_test(enable = "avx2")]
5000 const fn test_mm256_mul_epi32() {
5001 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
5002 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
5003 let r = _mm256_mul_epi32(a, b);
5004 let e = _mm256_setr_epi64x(0, 0, 10, 14);
5005 assert_eq_m256i(r, e);
5006 }
5007
5008 #[simd_test(enable = "avx2")]
5009 const fn test_mm256_mul_epu32() {
5010 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
5011 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
5012 let r = _mm256_mul_epu32(a, b);
5013 let e = _mm256_setr_epi64x(0, 0, 10, 14);
5014 assert_eq_m256i(r, e);
5015 }
5016
5017 #[simd_test(enable = "avx2")]
5018 const fn test_mm256_mulhi_epi16() {
5019 let a = _mm256_set1_epi16(6535);
5020 let b = _mm256_set1_epi16(6535);
5021 let r = _mm256_mulhi_epi16(a, b);
5022 let e = _mm256_set1_epi16(651);
5023 assert_eq_m256i(r, e);
5024 }
5025
5026 #[simd_test(enable = "avx2")]
5027 const fn test_mm256_mulhi_epu16() {
5028 let a = _mm256_set1_epi16(6535);
5029 let b = _mm256_set1_epi16(6535);
5030 let r = _mm256_mulhi_epu16(a, b);
5031 let e = _mm256_set1_epi16(651);
5032 assert_eq_m256i(r, e);
5033 }
5034
5035 #[simd_test(enable = "avx2")]
5036 const fn test_mm256_mullo_epi16() {
5037 let a = _mm256_set1_epi16(2);
5038 let b = _mm256_set1_epi16(4);
5039 let r = _mm256_mullo_epi16(a, b);
5040 let e = _mm256_set1_epi16(8);
5041 assert_eq_m256i(r, e);
5042 }
5043
5044 #[simd_test(enable = "avx2")]
5045 const fn test_mm256_mullo_epi32() {
5046 let a = _mm256_set1_epi32(2);
5047 let b = _mm256_set1_epi32(4);
5048 let r = _mm256_mullo_epi32(a, b);
5049 let e = _mm256_set1_epi32(8);
5050 assert_eq_m256i(r, e);
5051 }
5052
5053 #[simd_test(enable = "avx2")]
5054 fn test_mm256_mulhrs_epi16() {
5055 let a = _mm256_set1_epi16(2);
5056 let b = _mm256_set1_epi16(4);
5057 let r = _mm256_mullo_epi16(a, b);
5058 let e = _mm256_set1_epi16(8);
5059 assert_eq_m256i(r, e);
5060 }
5061
5062 #[simd_test(enable = "avx2")]
5063 const fn test_mm256_or_si256() {
5064 let a = _mm256_set1_epi8(-1);
5065 let b = _mm256_set1_epi8(0);
5066 let r = _mm256_or_si256(a, b);
5067 assert_eq_m256i(r, a);
5068 }
5069
5070 #[simd_test(enable = "avx2")]
5071 const fn test_mm256_packs_epi16() {
5072 let a = _mm256_set1_epi16(2);
5073 let b = _mm256_set1_epi16(4);
5074 let r = _mm256_packs_epi16(a, b);
5075 #[rustfmt::skip]
5076 let e = _mm256_setr_epi8(
5077 2, 2, 2, 2, 2, 2, 2, 2,
5078 4, 4, 4, 4, 4, 4, 4, 4,
5079 2, 2, 2, 2, 2, 2, 2, 2,
5080 4, 4, 4, 4, 4, 4, 4, 4,
5081 );
5082
5083 assert_eq_m256i(r, e);
5084 }
5085
5086 #[simd_test(enable = "avx2")]
5087 const fn test_mm256_packs_epi32() {
5088 let a = _mm256_set1_epi32(2);
5089 let b = _mm256_set1_epi32(4);
5090 let r = _mm256_packs_epi32(a, b);
5091 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5092
5093 assert_eq_m256i(r, e);
5094 }
5095
5096 #[simd_test(enable = "avx2")]
5097 const fn test_mm256_packus_epi16() {
5098 let a = _mm256_set1_epi16(2);
5099 let b = _mm256_set1_epi16(4);
5100 let r = _mm256_packus_epi16(a, b);
5101 #[rustfmt::skip]
5102 let e = _mm256_setr_epi8(
5103 2, 2, 2, 2, 2, 2, 2, 2,
5104 4, 4, 4, 4, 4, 4, 4, 4,
5105 2, 2, 2, 2, 2, 2, 2, 2,
5106 4, 4, 4, 4, 4, 4, 4, 4,
5107 );
5108
5109 assert_eq_m256i(r, e);
5110 }
5111
5112 #[simd_test(enable = "avx2")]
5113 const fn test_mm256_packus_epi32() {
5114 let a = _mm256_set1_epi32(2);
5115 let b = _mm256_set1_epi32(4);
5116 let r = _mm256_packus_epi32(a, b);
5117 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5118
5119 assert_eq_m256i(r, e);
5120 }
5121
5122 #[simd_test(enable = "avx2")]
5123 fn test_mm256_sad_epu8() {
5124 let a = _mm256_set1_epi8(2);
5125 let b = _mm256_set1_epi8(4);
5126 let r = _mm256_sad_epu8(a, b);
5127 let e = _mm256_set1_epi64x(16);
5128 assert_eq_m256i(r, e);
5129 }
5130
5131 #[simd_test(enable = "avx2")]
5132 const fn test_mm256_shufflehi_epi16() {
5133 #[rustfmt::skip]
5134 let a = _mm256_setr_epi16(
5135 0, 1, 2, 3, 11, 22, 33, 44,
5136 4, 5, 6, 7, 55, 66, 77, 88,
5137 );
5138 #[rustfmt::skip]
5139 let e = _mm256_setr_epi16(
5140 0, 1, 2, 3, 44, 22, 22, 11,
5141 4, 5, 6, 7, 88, 66, 66, 55,
5142 );
5143 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5144 assert_eq_m256i(r, e);
5145 }
5146
5147 #[simd_test(enable = "avx2")]
5148 const fn test_mm256_shufflelo_epi16() {
5149 #[rustfmt::skip]
5150 let a = _mm256_setr_epi16(
5151 11, 22, 33, 44, 0, 1, 2, 3,
5152 55, 66, 77, 88, 4, 5, 6, 7,
5153 );
5154 #[rustfmt::skip]
5155 let e = _mm256_setr_epi16(
5156 44, 22, 22, 11, 0, 1, 2, 3,
5157 88, 66, 66, 55, 4, 5, 6, 7,
5158 );
5159 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5160 assert_eq_m256i(r, e);
5161 }
5162
5163 #[simd_test(enable = "avx2")]
5164 fn test_mm256_sign_epi16() {
5165 let a = _mm256_set1_epi16(2);
5166 let b = _mm256_set1_epi16(-1);
5167 let r = _mm256_sign_epi16(a, b);
5168 let e = _mm256_set1_epi16(-2);
5169 assert_eq_m256i(r, e);
5170 }
5171
5172 #[simd_test(enable = "avx2")]
5173 fn test_mm256_sign_epi32() {
5174 let a = _mm256_set1_epi32(2);
5175 let b = _mm256_set1_epi32(-1);
5176 let r = _mm256_sign_epi32(a, b);
5177 let e = _mm256_set1_epi32(-2);
5178 assert_eq_m256i(r, e);
5179 }
5180
5181 #[simd_test(enable = "avx2")]
5182 fn test_mm256_sign_epi8() {
5183 let a = _mm256_set1_epi8(2);
5184 let b = _mm256_set1_epi8(-1);
5185 let r = _mm256_sign_epi8(a, b);
5186 let e = _mm256_set1_epi8(-2);
5187 assert_eq_m256i(r, e);
5188 }
5189
5190 #[simd_test(enable = "avx2")]
5191 fn test_mm256_sll_epi16() {
5192 let a = _mm256_set1_epi16(0xFF);
5193 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5194 let r = _mm256_sll_epi16(a, b);
5195 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5196 }
5197
5198 #[simd_test(enable = "avx2")]
5199 fn test_mm256_sll_epi32() {
5200 let a = _mm256_set1_epi32(0xFFFF);
5201 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5202 let r = _mm256_sll_epi32(a, b);
5203 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5204 }
5205
5206 #[simd_test(enable = "avx2")]
5207 fn test_mm256_sll_epi64() {
5208 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5209 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5210 let r = _mm256_sll_epi64(a, b);
5211 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5212 }
5213
5214 #[simd_test(enable = "avx2")]
5215 const fn test_mm256_slli_epi16() {
5216 assert_eq_m256i(
5217 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5218 _mm256_set1_epi16(0xFF0),
5219 );
5220 }
5221
5222 #[simd_test(enable = "avx2")]
5223 const fn test_mm256_slli_epi32() {
5224 assert_eq_m256i(
5225 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5226 _mm256_set1_epi32(0xFFFF0),
5227 );
5228 }
5229
5230 #[simd_test(enable = "avx2")]
5231 const fn test_mm256_slli_epi64() {
5232 assert_eq_m256i(
5233 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5234 _mm256_set1_epi64x(0xFFFFFFFF0),
5235 );
5236 }
5237
5238 #[simd_test(enable = "avx2")]
5239 const fn test_mm256_slli_si256() {
5240 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5241 let r = _mm256_slli_si256::<3>(a);
5242 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5243 }
5244
5245 #[simd_test(enable = "avx2")]
5246 const fn test_mm_sllv_epi32() {
5247 let a = _mm_set1_epi32(2);
5248 let b = _mm_set1_epi32(1);
5249 let r = _mm_sllv_epi32(a, b);
5250 let e = _mm_set1_epi32(4);
5251 assert_eq_m128i(r, e);
5252 }
5253
5254 #[simd_test(enable = "avx2")]
5255 const fn test_mm256_sllv_epi32() {
5256 let a = _mm256_set1_epi32(2);
5257 let b = _mm256_set1_epi32(1);
5258 let r = _mm256_sllv_epi32(a, b);
5259 let e = _mm256_set1_epi32(4);
5260 assert_eq_m256i(r, e);
5261 }
5262
5263 #[simd_test(enable = "avx2")]
5264 const fn test_mm_sllv_epi64() {
5265 let a = _mm_set1_epi64x(2);
5266 let b = _mm_set1_epi64x(1);
5267 let r = _mm_sllv_epi64(a, b);
5268 let e = _mm_set1_epi64x(4);
5269 assert_eq_m128i(r, e);
5270 }
5271
5272 #[simd_test(enable = "avx2")]
5273 const fn test_mm256_sllv_epi64() {
5274 let a = _mm256_set1_epi64x(2);
5275 let b = _mm256_set1_epi64x(1);
5276 let r = _mm256_sllv_epi64(a, b);
5277 let e = _mm256_set1_epi64x(4);
5278 assert_eq_m256i(r, e);
5279 }
5280
5281 #[simd_test(enable = "avx2")]
5282 fn test_mm256_sra_epi16() {
5283 let a = _mm256_set1_epi16(-1);
5284 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5285 let r = _mm256_sra_epi16(a, b);
5286 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5287 }
5288
5289 #[simd_test(enable = "avx2")]
5290 fn test_mm256_sra_epi32() {
5291 let a = _mm256_set1_epi32(-1);
5292 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5293 let r = _mm256_sra_epi32(a, b);
5294 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5295 }
5296
5297 #[simd_test(enable = "avx2")]
5298 const fn test_mm256_srai_epi16() {
5299 assert_eq_m256i(
5300 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5301 _mm256_set1_epi16(-1),
5302 );
5303 }
5304
5305 #[simd_test(enable = "avx2")]
5306 const fn test_mm256_srai_epi32() {
5307 assert_eq_m256i(
5308 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5309 _mm256_set1_epi32(-1),
5310 );
5311 }
5312
5313 #[simd_test(enable = "avx2")]
5314 const fn test_mm_srav_epi32() {
5315 let a = _mm_set1_epi32(4);
5316 let count = _mm_set1_epi32(1);
5317 let r = _mm_srav_epi32(a, count);
5318 let e = _mm_set1_epi32(2);
5319 assert_eq_m128i(r, e);
5320 }
5321
5322 #[simd_test(enable = "avx2")]
5323 const fn test_mm256_srav_epi32() {
5324 let a = _mm256_set1_epi32(4);
5325 let count = _mm256_set1_epi32(1);
5326 let r = _mm256_srav_epi32(a, count);
5327 let e = _mm256_set1_epi32(2);
5328 assert_eq_m256i(r, e);
5329 }
5330
5331 #[simd_test(enable = "avx2")]
5332 const fn test_mm256_srli_si256() {
5333 #[rustfmt::skip]
5334 let a = _mm256_setr_epi8(
5335 1, 2, 3, 4, 5, 6, 7, 8,
5336 9, 10, 11, 12, 13, 14, 15, 16,
5337 17, 18, 19, 20, 21, 22, 23, 24,
5338 25, 26, 27, 28, 29, 30, 31, 32,
5339 );
5340 let r = _mm256_srli_si256::<3>(a);
5341 #[rustfmt::skip]
5342 let e = _mm256_setr_epi8(
5343 4, 5, 6, 7, 8, 9, 10, 11,
5344 12, 13, 14, 15, 16, 0, 0, 0,
5345 20, 21, 22, 23, 24, 25, 26, 27,
5346 28, 29, 30, 31, 32, 0, 0, 0,
5347 );
5348 assert_eq_m256i(r, e);
5349 }
5350
5351 #[simd_test(enable = "avx2")]
5352 fn test_mm256_srl_epi16() {
5353 let a = _mm256_set1_epi16(0xFF);
5354 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5355 let r = _mm256_srl_epi16(a, b);
5356 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5357 }
5358
5359 #[simd_test(enable = "avx2")]
5360 fn test_mm256_srl_epi32() {
5361 let a = _mm256_set1_epi32(0xFFFF);
5362 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5363 let r = _mm256_srl_epi32(a, b);
5364 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5365 }
5366
5367 #[simd_test(enable = "avx2")]
5368 fn test_mm256_srl_epi64() {
5369 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5370 let b = _mm_setr_epi64x(4, 0);
5371 let r = _mm256_srl_epi64(a, b);
5372 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5373 }
5374
5375 #[simd_test(enable = "avx2")]
5376 const fn test_mm256_srli_epi16() {
5377 assert_eq_m256i(
5378 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5379 _mm256_set1_epi16(0xF),
5380 );
5381 }
5382
5383 #[simd_test(enable = "avx2")]
5384 const fn test_mm256_srli_epi32() {
5385 assert_eq_m256i(
5386 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5387 _mm256_set1_epi32(0xFFF),
5388 );
5389 }
5390
5391 #[simd_test(enable = "avx2")]
5392 const fn test_mm256_srli_epi64() {
5393 assert_eq_m256i(
5394 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5395 _mm256_set1_epi64x(0xFFFFFFF),
5396 );
5397 }
5398
5399 #[simd_test(enable = "avx2")]
5400 const fn test_mm_srlv_epi32() {
5401 let a = _mm_set1_epi32(2);
5402 let count = _mm_set1_epi32(1);
5403 let r = _mm_srlv_epi32(a, count);
5404 let e = _mm_set1_epi32(1);
5405 assert_eq_m128i(r, e);
5406 }
5407
5408 #[simd_test(enable = "avx2")]
5409 const fn test_mm256_srlv_epi32() {
5410 let a = _mm256_set1_epi32(2);
5411 let count = _mm256_set1_epi32(1);
5412 let r = _mm256_srlv_epi32(a, count);
5413 let e = _mm256_set1_epi32(1);
5414 assert_eq_m256i(r, e);
5415 }
5416
5417 #[simd_test(enable = "avx2")]
5418 const fn test_mm_srlv_epi64() {
5419 let a = _mm_set1_epi64x(2);
5420 let count = _mm_set1_epi64x(1);
5421 let r = _mm_srlv_epi64(a, count);
5422 let e = _mm_set1_epi64x(1);
5423 assert_eq_m128i(r, e);
5424 }
5425
5426 #[simd_test(enable = "avx2")]
5427 const fn test_mm256_srlv_epi64() {
5428 let a = _mm256_set1_epi64x(2);
5429 let count = _mm256_set1_epi64x(1);
5430 let r = _mm256_srlv_epi64(a, count);
5431 let e = _mm256_set1_epi64x(1);
5432 assert_eq_m256i(r, e);
5433 }
5434
5435 #[simd_test(enable = "avx2")]
5436 fn test_mm256_stream_load_si256() {
5437 let a = _mm256_set_epi64x(5, 6, 7, 8);
5438 let r = unsafe { _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _) };
5439 assert_eq_m256i(a, r);
5440 }
5441
5442 #[simd_test(enable = "avx2")]
5443 const fn test_mm256_sub_epi16() {
5444 let a = _mm256_set1_epi16(4);
5445 let b = _mm256_set1_epi16(2);
5446 let r = _mm256_sub_epi16(a, b);
5447 assert_eq_m256i(r, b);
5448 }
5449
5450 #[simd_test(enable = "avx2")]
5451 const fn test_mm256_sub_epi32() {
5452 let a = _mm256_set1_epi32(4);
5453 let b = _mm256_set1_epi32(2);
5454 let r = _mm256_sub_epi32(a, b);
5455 assert_eq_m256i(r, b);
5456 }
5457
5458 #[simd_test(enable = "avx2")]
5459 const fn test_mm256_sub_epi64() {
5460 let a = _mm256_set1_epi64x(4);
5461 let b = _mm256_set1_epi64x(2);
5462 let r = _mm256_sub_epi64(a, b);
5463 assert_eq_m256i(r, b);
5464 }
5465
5466 #[simd_test(enable = "avx2")]
5467 const fn test_mm256_sub_epi8() {
5468 let a = _mm256_set1_epi8(4);
5469 let b = _mm256_set1_epi8(2);
5470 let r = _mm256_sub_epi8(a, b);
5471 assert_eq_m256i(r, b);
5472 }
5473
5474 #[simd_test(enable = "avx2")]
5475 const fn test_mm256_subs_epi16() {
5476 let a = _mm256_set1_epi16(4);
5477 let b = _mm256_set1_epi16(2);
5478 let r = _mm256_subs_epi16(a, b);
5479 assert_eq_m256i(r, b);
5480 }
5481
5482 #[simd_test(enable = "avx2")]
5483 const fn test_mm256_subs_epi8() {
5484 let a = _mm256_set1_epi8(4);
5485 let b = _mm256_set1_epi8(2);
5486 let r = _mm256_subs_epi8(a, b);
5487 assert_eq_m256i(r, b);
5488 }
5489
5490 #[simd_test(enable = "avx2")]
5491 const fn test_mm256_subs_epu16() {
5492 let a = _mm256_set1_epi16(4);
5493 let b = _mm256_set1_epi16(2);
5494 let r = _mm256_subs_epu16(a, b);
5495 assert_eq_m256i(r, b);
5496 }
5497
5498 #[simd_test(enable = "avx2")]
5499 const fn test_mm256_subs_epu8() {
5500 let a = _mm256_set1_epi8(4);
5501 let b = _mm256_set1_epi8(2);
5502 let r = _mm256_subs_epu8(a, b);
5503 assert_eq_m256i(r, b);
5504 }
5505
5506 #[simd_test(enable = "avx2")]
5507 const fn test_mm256_xor_si256() {
5508 let a = _mm256_set1_epi8(5);
5509 let b = _mm256_set1_epi8(3);
5510 let r = _mm256_xor_si256(a, b);
5511 assert_eq_m256i(r, _mm256_set1_epi8(6));
5512 }
5513
5514 #[simd_test(enable = "avx2")]
5515 const fn test_mm256_alignr_epi8() {
5516 #[rustfmt::skip]
5517 let a = _mm256_setr_epi8(
5518 1, 2, 3, 4, 5, 6, 7, 8,
5519 9, 10, 11, 12, 13, 14, 15, 16,
5520 17, 18, 19, 20, 21, 22, 23, 24,
5521 25, 26, 27, 28, 29, 30, 31, 32,
5522 );
5523 #[rustfmt::skip]
5524 let b = _mm256_setr_epi8(
5525 -1, -2, -3, -4, -5, -6, -7, -8,
5526 -9, -10, -11, -12, -13, -14, -15, -16,
5527 -17, -18, -19, -20, -21, -22, -23, -24,
5528 -25, -26, -27, -28, -29, -30, -31, -32,
5529 );
5530 let r = _mm256_alignr_epi8::<33>(a, b);
5531 assert_eq_m256i(r, _mm256_set1_epi8(0));
5532
5533 let r = _mm256_alignr_epi8::<17>(a, b);
5534 #[rustfmt::skip]
5535 let expected = _mm256_setr_epi8(
5536 2, 3, 4, 5, 6, 7, 8, 9,
5537 10, 11, 12, 13, 14, 15, 16, 0,
5538 18, 19, 20, 21, 22, 23, 24, 25,
5539 26, 27, 28, 29, 30, 31, 32, 0,
5540 );
5541 assert_eq_m256i(r, expected);
5542
5543 let r = _mm256_alignr_epi8::<4>(a, b);
5544 #[rustfmt::skip]
5545 let expected = _mm256_setr_epi8(
5546 -5, -6, -7, -8, -9, -10, -11, -12,
5547 -13, -14, -15, -16, 1, 2, 3, 4,
5548 -21, -22, -23, -24, -25, -26, -27, -28,
5549 -29, -30, -31, -32, 17, 18, 19, 20,
5550 );
5551 assert_eq_m256i(r, expected);
5552
5553 let r = _mm256_alignr_epi8::<15>(a, b);
5554 #[rustfmt::skip]
5555 let expected = _mm256_setr_epi8(
5556 -16, 1, 2, 3, 4, 5, 6, 7,
5557 8, 9, 10, 11, 12, 13, 14, 15,
5558 -32, 17, 18, 19, 20, 21, 22, 23,
5559 24, 25, 26, 27, 28, 29, 30, 31,
5560 );
5561 assert_eq_m256i(r, expected);
5562
5563 let r = _mm256_alignr_epi8::<0>(a, b);
5564 assert_eq_m256i(r, b);
5565
5566 let r = _mm256_alignr_epi8::<16>(a, b);
5567 assert_eq_m256i(r, a);
5568 }
5569
5570 #[simd_test(enable = "avx2")]
5571 fn test_mm256_shuffle_epi8() {
5572 #[rustfmt::skip]
5573 let a = _mm256_setr_epi8(
5574 1, 2, 3, 4, 5, 6, 7, 8,
5575 9, 10, 11, 12, 13, 14, 15, 16,
5576 17, 18, 19, 20, 21, 22, 23, 24,
5577 25, 26, 27, 28, 29, 30, 31, 32,
5578 );
5579 #[rustfmt::skip]
5580 let b = _mm256_setr_epi8(
5581 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5582 12, 5, 5, 10, 4, 1, 8, 0,
5583 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5584 12, 5, 5, 10, 4, 1, 8, 0,
5585 );
5586 #[rustfmt::skip]
5587 let expected = _mm256_setr_epi8(
5588 5, 0, 5, 4, 9, 13, 7, 4,
5589 13, 6, 6, 11, 5, 2, 9, 1,
5590 21, 0, 21, 20, 25, 29, 23, 20,
5591 29, 22, 22, 27, 21, 18, 25, 17,
5592 );
5593 let r = _mm256_shuffle_epi8(a, b);
5594 assert_eq_m256i(r, expected);
5595 }
5596
5597 #[simd_test(enable = "avx2")]
5598 fn test_mm256_permutevar8x32_epi32() {
5599 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5600 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5601 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5602 let r = _mm256_permutevar8x32_epi32(a, b);
5603 assert_eq_m256i(r, expected);
5604 }
5605
5606 #[simd_test(enable = "avx2")]
5607 const fn test_mm256_permute4x64_epi64() {
5608 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5609 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5610 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5611 assert_eq_m256i(r, expected);
5612 }
5613
5614 #[simd_test(enable = "avx2")]
5615 const fn test_mm256_permute2x128_si256() {
5616 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5617 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5618 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5619 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5620 assert_eq_m256i(r, e);
5621 }
5622
5623 #[simd_test(enable = "avx2")]
5624 const fn test_mm256_permute4x64_pd() {
5625 let a = _mm256_setr_pd(1., 2., 3., 4.);
5626 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5627 let e = _mm256_setr_pd(4., 1., 2., 1.);
5628 assert_eq_m256d(r, e);
5629 }
5630
5631 #[simd_test(enable = "avx2")]
5632 fn test_mm256_permutevar8x32_ps() {
5633 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5634 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5635 let r = _mm256_permutevar8x32_ps(a, b);
5636 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5637 assert_eq_m256(r, e);
5638 }
5639
5640 #[simd_test(enable = "avx2")]
5641 fn test_mm_i32gather_epi32() {
5642 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5643 let r = unsafe { _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5645 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5646 }
5647
5648 #[simd_test(enable = "avx2")]
5649 fn test_mm_mask_i32gather_epi32() {
5650 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5651 let r = unsafe {
5653 _mm_mask_i32gather_epi32::<4>(
5654 _mm_set1_epi32(256),
5655 arr.as_ptr(),
5656 _mm_setr_epi32(0, 16, 64, 96),
5657 _mm_setr_epi32(-1, -1, -1, 0),
5658 )
5659 };
5660 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5661 }
5662
5663 #[simd_test(enable = "avx2")]
5664 fn test_mm256_i32gather_epi32() {
5665 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5666 let r = unsafe {
5668 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
5669 };
5670 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5671 }
5672
5673 #[simd_test(enable = "avx2")]
5674 fn test_mm256_mask_i32gather_epi32() {
5675 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5676 let r = unsafe {
5678 _mm256_mask_i32gather_epi32::<4>(
5679 _mm256_set1_epi32(256),
5680 arr.as_ptr(),
5681 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5682 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5683 )
5684 };
5685 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5686 }
5687
5688 #[simd_test(enable = "avx2")]
5689 fn test_mm_i32gather_ps() {
5690 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5691 let r = unsafe { _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5693 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5694 }
5695
5696 #[simd_test(enable = "avx2")]
5697 fn test_mm_mask_i32gather_ps() {
5698 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5699 let r = unsafe {
5701 _mm_mask_i32gather_ps::<4>(
5702 _mm_set1_ps(256.0),
5703 arr.as_ptr(),
5704 _mm_setr_epi32(0, 16, 64, 96),
5705 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5706 )
5707 };
5708 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5709 }
5710
5711 #[simd_test(enable = "avx2")]
5712 fn test_mm256_i32gather_ps() {
5713 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5714 let r = unsafe {
5716 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
5717 };
5718 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5719 }
5720
5721 #[simd_test(enable = "avx2")]
5722 fn test_mm256_mask_i32gather_ps() {
5723 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5724 let r = unsafe {
5726 _mm256_mask_i32gather_ps::<4>(
5727 _mm256_set1_ps(256.0),
5728 arr.as_ptr(),
5729 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5730 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5731 )
5732 };
5733 assert_eq_m256(
5734 r,
5735 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5736 );
5737 }
5738
5739 #[simd_test(enable = "avx2")]
5740 fn test_mm_i32gather_epi64() {
5741 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5742 let r = unsafe { _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
5744 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5745 }
5746
5747 #[simd_test(enable = "avx2")]
5748 fn test_mm_mask_i32gather_epi64() {
5749 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5750 let r = unsafe {
5752 _mm_mask_i32gather_epi64::<8>(
5753 _mm_set1_epi64x(256),
5754 arr.as_ptr(),
5755 _mm_setr_epi32(16, 16, 16, 16),
5756 _mm_setr_epi64x(-1, 0),
5757 )
5758 };
5759 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5760 }
5761
5762 #[simd_test(enable = "avx2")]
5763 fn test_mm256_i32gather_epi64() {
5764 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5765 let r = unsafe { _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5767 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5768 }
5769
5770 #[simd_test(enable = "avx2")]
5771 fn test_mm256_mask_i32gather_epi64() {
5772 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5773 let r = unsafe {
5775 _mm256_mask_i32gather_epi64::<8>(
5776 _mm256_set1_epi64x(256),
5777 arr.as_ptr(),
5778 _mm_setr_epi32(0, 16, 64, 96),
5779 _mm256_setr_epi64x(-1, -1, -1, 0),
5780 )
5781 };
5782 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5783 }
5784
5785 #[simd_test(enable = "avx2")]
5786 fn test_mm_i32gather_pd() {
5787 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5788 let r = unsafe { _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
5790 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5791 }
5792
5793 #[simd_test(enable = "avx2")]
5794 fn test_mm_mask_i32gather_pd() {
5795 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5796 let r = unsafe {
5798 _mm_mask_i32gather_pd::<8>(
5799 _mm_set1_pd(256.0),
5800 arr.as_ptr(),
5801 _mm_setr_epi32(16, 16, 16, 16),
5802 _mm_setr_pd(-1.0, 0.0),
5803 )
5804 };
5805 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5806 }
5807
5808 #[simd_test(enable = "avx2")]
5809 fn test_mm256_i32gather_pd() {
5810 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5811 let r = unsafe { _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
5813 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5814 }
5815
5816 #[simd_test(enable = "avx2")]
5817 fn test_mm256_mask_i32gather_pd() {
5818 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5819 let r = unsafe {
5821 _mm256_mask_i32gather_pd::<8>(
5822 _mm256_set1_pd(256.0),
5823 arr.as_ptr(),
5824 _mm_setr_epi32(0, 16, 64, 96),
5825 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5826 )
5827 };
5828 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5829 }
5830
5831 #[simd_test(enable = "avx2")]
5832 fn test_mm_i64gather_epi32() {
5833 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5834 let r = unsafe { _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5836 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5837 }
5838
5839 #[simd_test(enable = "avx2")]
5840 fn test_mm_mask_i64gather_epi32() {
5841 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5842 let r = unsafe {
5844 _mm_mask_i64gather_epi32::<4>(
5845 _mm_set1_epi32(256),
5846 arr.as_ptr(),
5847 _mm_setr_epi64x(0, 16),
5848 _mm_setr_epi32(-1, 0, -1, 0),
5849 )
5850 };
5851 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5852 }
5853
5854 #[simd_test(enable = "avx2")]
5855 fn test_mm256_i64gather_epi32() {
5856 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5857 let r =
5859 unsafe { _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5860 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5861 }
5862
5863 #[simd_test(enable = "avx2")]
5864 fn test_mm256_mask_i64gather_epi32() {
5865 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5866 let r = unsafe {
5868 _mm256_mask_i64gather_epi32::<4>(
5869 _mm_set1_epi32(256),
5870 arr.as_ptr(),
5871 _mm256_setr_epi64x(0, 16, 64, 96),
5872 _mm_setr_epi32(-1, -1, -1, 0),
5873 )
5874 };
5875 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5876 }
5877
5878 #[simd_test(enable = "avx2")]
5879 fn test_mm_i64gather_ps() {
5880 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5881 let r = unsafe { _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5883 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5884 }
5885
5886 #[simd_test(enable = "avx2")]
5887 fn test_mm_mask_i64gather_ps() {
5888 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5889 let r = unsafe {
5891 _mm_mask_i64gather_ps::<4>(
5892 _mm_set1_ps(256.0),
5893 arr.as_ptr(),
5894 _mm_setr_epi64x(0, 16),
5895 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5896 )
5897 };
5898 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5899 }
5900
5901 #[simd_test(enable = "avx2")]
5902 fn test_mm256_i64gather_ps() {
5903 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5904 let r =
5906 unsafe { _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5907 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5908 }
5909
5910 #[simd_test(enable = "avx2")]
5911 fn test_mm256_mask_i64gather_ps() {
5912 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5913 let r = unsafe {
5915 _mm256_mask_i64gather_ps::<4>(
5916 _mm_set1_ps(256.0),
5917 arr.as_ptr(),
5918 _mm256_setr_epi64x(0, 16, 64, 96),
5919 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5920 )
5921 };
5922 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5923 }
5924
5925 #[simd_test(enable = "avx2")]
5926 fn test_mm_i64gather_epi64() {
5927 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5928 let r = unsafe { _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5930 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5931 }
5932
5933 #[simd_test(enable = "avx2")]
5934 fn test_mm_mask_i64gather_epi64() {
5935 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5936 let r = unsafe {
5938 _mm_mask_i64gather_epi64::<8>(
5939 _mm_set1_epi64x(256),
5940 arr.as_ptr(),
5941 _mm_setr_epi64x(16, 16),
5942 _mm_setr_epi64x(-1, 0),
5943 )
5944 };
5945 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5946 }
5947
5948 #[simd_test(enable = "avx2")]
5949 fn test_mm256_i64gather_epi64() {
5950 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5951 let r =
5953 unsafe { _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
5954 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5955 }
5956
5957 #[simd_test(enable = "avx2")]
5958 fn test_mm256_mask_i64gather_epi64() {
5959 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5960 let r = unsafe {
5962 _mm256_mask_i64gather_epi64::<8>(
5963 _mm256_set1_epi64x(256),
5964 arr.as_ptr(),
5965 _mm256_setr_epi64x(0, 16, 64, 96),
5966 _mm256_setr_epi64x(-1, -1, -1, 0),
5967 )
5968 };
5969 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5970 }
5971
5972 #[simd_test(enable = "avx2")]
5973 fn test_mm_i64gather_pd() {
5974 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5975 let r = unsafe { _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
5977 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5978 }
5979
5980 #[simd_test(enable = "avx2")]
5981 fn test_mm_mask_i64gather_pd() {
5982 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5983 let r = unsafe {
5985 _mm_mask_i64gather_pd::<8>(
5986 _mm_set1_pd(256.0),
5987 arr.as_ptr(),
5988 _mm_setr_epi64x(16, 16),
5989 _mm_setr_pd(-1.0, 0.0),
5990 )
5991 };
5992 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5993 }
5994
5995 #[simd_test(enable = "avx2")]
5996 fn test_mm256_i64gather_pd() {
5997 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5998 let r =
6000 unsafe { _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
6001 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
6002 }
6003
6004 #[simd_test(enable = "avx2")]
6005 fn test_mm256_mask_i64gather_pd() {
6006 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
6007 let r = unsafe {
6009 _mm256_mask_i64gather_pd::<8>(
6010 _mm256_set1_pd(256.0),
6011 arr.as_ptr(),
6012 _mm256_setr_epi64x(0, 16, 64, 96),
6013 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
6014 )
6015 };
6016 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
6017 }
6018
6019 #[simd_test(enable = "avx2")]
6020 const fn test_mm256_extract_epi8() {
6021 #[rustfmt::skip]
6022 let a = _mm256_setr_epi8(
6023 -1, 1, 2, 3, 4, 5, 6, 7,
6024 8, 9, 10, 11, 12, 13, 14, 15,
6025 16, 17, 18, 19, 20, 21, 22, 23,
6026 24, 25, 26, 27, 28, 29, 30, 31
6027 );
6028 let r1 = _mm256_extract_epi8::<0>(a);
6029 let r2 = _mm256_extract_epi8::<3>(a);
6030 assert_eq!(r1, 0xFF);
6031 assert_eq!(r2, 3);
6032 }
6033
6034 #[simd_test(enable = "avx2")]
6035 const fn test_mm256_extract_epi16() {
6036 #[rustfmt::skip]
6037 let a = _mm256_setr_epi16(
6038 -1, 1, 2, 3, 4, 5, 6, 7,
6039 8, 9, 10, 11, 12, 13, 14, 15,
6040 );
6041 let r1 = _mm256_extract_epi16::<0>(a);
6042 let r2 = _mm256_extract_epi16::<3>(a);
6043 assert_eq!(r1, 0xFFFF);
6044 assert_eq!(r2, 3);
6045 }
6046}