1use core::hint::unreachable_unchecked;
22
23use crate::core_arch::{simd::*, x86::*};
24use crate::intrinsics::simd::*;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[inline]
33#[target_feature(enable = "avx2")]
34#[cfg_attr(test, assert_instr(vpabsd))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
37 unsafe {
38 let a = a.as_i32x8();
39 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
40 transmute(r)
41 }
42}
43
44#[inline]
48#[target_feature(enable = "avx2")]
49#[cfg_attr(test, assert_instr(vpabsw))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52 unsafe {
53 let a = a.as_i16x16();
54 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
55 transmute(r)
56 }
57}
58
59#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
67 unsafe {
68 let a = a.as_i8x32();
69 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
70 transmute(r)
71 }
72}
73
74#[inline]
78#[target_feature(enable = "avx2")]
79#[cfg_attr(test, assert_instr(vpaddq))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
82 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
83}
84
85#[inline]
89#[target_feature(enable = "avx2")]
90#[cfg_attr(test, assert_instr(vpaddd))]
91#[stable(feature = "simd_x86", since = "1.27.0")]
92pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
93 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
94}
95
96#[inline]
100#[target_feature(enable = "avx2")]
101#[cfg_attr(test, assert_instr(vpaddw))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
104 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
105}
106
107#[inline]
111#[target_feature(enable = "avx2")]
112#[cfg_attr(test, assert_instr(vpaddb))]
113#[stable(feature = "simd_x86", since = "1.27.0")]
114pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
115 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
116}
117
118#[inline]
122#[target_feature(enable = "avx2")]
123#[cfg_attr(test, assert_instr(vpaddsb))]
124#[stable(feature = "simd_x86", since = "1.27.0")]
125pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
126 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
127}
128
129#[inline]
133#[target_feature(enable = "avx2")]
134#[cfg_attr(test, assert_instr(vpaddsw))]
135#[stable(feature = "simd_x86", since = "1.27.0")]
136pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
137 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
138}
139
140#[inline]
144#[target_feature(enable = "avx2")]
145#[cfg_attr(test, assert_instr(vpaddusb))]
146#[stable(feature = "simd_x86", since = "1.27.0")]
147pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
148 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
149}
150
151#[inline]
155#[target_feature(enable = "avx2")]
156#[cfg_attr(test, assert_instr(vpaddusw))]
157#[stable(feature = "simd_x86", since = "1.27.0")]
158pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
159 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
160}
161
162#[inline]
167#[target_feature(enable = "avx2")]
168#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
169#[rustc_legacy_const_generics(2)]
170#[stable(feature = "simd_x86", since = "1.27.0")]
171pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
172 static_assert_uimm_bits!(IMM8, 8);
173 unsafe {
174 if IMM8 >= 32 {
177 return _mm256_setzero_si256();
178 }
179 let (a, b) = if IMM8 > 16 {
182 (_mm256_setzero_si256(), a)
183 } else {
184 (a, b)
185 };
186
187 let a = a.as_i8x32();
188 let b = b.as_i8x32();
189
190 if IMM8 == 16 {
191 return transmute(a);
192 }
193
194 let r: i8x32 = match IMM8 % 16 {
195 0 => simd_shuffle!(
196 b,
197 a,
198 [
199 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
200 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
201 ],
202 ),
203 1 => simd_shuffle!(
204 b,
205 a,
206 [
207 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
208 23, 24, 25, 26, 27, 28, 29, 30, 31, 48,
209 ],
210 ),
211 2 => simd_shuffle!(
212 b,
213 a,
214 [
215 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23,
216 24, 25, 26, 27, 28, 29, 30, 31, 48, 49,
217 ],
218 ),
219 3 => simd_shuffle!(
220 b,
221 a,
222 [
223 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23,
224 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
225 ],
226 ),
227 4 => simd_shuffle!(
228 b,
229 a,
230 [
231 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24,
232 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
233 ],
234 ),
235 5 => simd_shuffle!(
236 b,
237 a,
238 [
239 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25,
240 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
241 ],
242 ),
243 6 => simd_shuffle!(
244 b,
245 a,
246 [
247 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26,
248 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
249 ],
250 ),
251 7 => simd_shuffle!(
252 b,
253 a,
254 [
255 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26,
256 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
257 ],
258 ),
259 8 => simd_shuffle!(
260 b,
261 a,
262 [
263 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27,
264 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
265 ],
266 ),
267 9 => simd_shuffle!(
268 b,
269 a,
270 [
271 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28,
272 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
273 ],
274 ),
275 10 => simd_shuffle!(
276 b,
277 a,
278 [
279 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29,
280 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
281 ],
282 ),
283 11 => simd_shuffle!(
284 b,
285 a,
286 [
287 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30,
288 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
289 ],
290 ),
291 12 => simd_shuffle!(
292 b,
293 a,
294 [
295 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31,
296 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
297 ],
298 ),
299 13 => simd_shuffle!(
300 b,
301 a,
302 [
303 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48,
304 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
305 ],
306 ),
307 14 => simd_shuffle!(
308 b,
309 a,
310 [
311 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49,
312 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
313 ],
314 ),
315 15 => simd_shuffle!(
316 b,
317 a,
318 [
319 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50,
320 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
321 ],
322 ),
323 _ => unreachable_unchecked(),
324 };
325 transmute(r)
326 }
327}
328
329#[inline]
334#[target_feature(enable = "avx2")]
335#[cfg_attr(test, assert_instr(vandps))]
336#[stable(feature = "simd_x86", since = "1.27.0")]
337pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
338 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
339}
340
341#[inline]
346#[target_feature(enable = "avx2")]
347#[cfg_attr(test, assert_instr(vandnps))]
348#[stable(feature = "simd_x86", since = "1.27.0")]
349pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
350 unsafe {
351 let all_ones = _mm256_set1_epi8(-1);
352 transmute(simd_and(
353 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
354 b.as_i64x4(),
355 ))
356 }
357}
358
359#[inline]
363#[target_feature(enable = "avx2")]
364#[cfg_attr(test, assert_instr(vpavgw))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
367 unsafe {
368 let a = simd_cast::<_, u32x16>(a.as_u16x16());
369 let b = simd_cast::<_, u32x16>(b.as_u16x16());
370 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
371 transmute(simd_cast::<_, u16x16>(r))
372 }
373}
374
375#[inline]
379#[target_feature(enable = "avx2")]
380#[cfg_attr(test, assert_instr(vpavgb))]
381#[stable(feature = "simd_x86", since = "1.27.0")]
382pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
383 unsafe {
384 let a = simd_cast::<_, u16x32>(a.as_u8x32());
385 let b = simd_cast::<_, u16x32>(b.as_u8x32());
386 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
387 transmute(simd_cast::<_, u8x32>(r))
388 }
389}
390
391#[inline]
395#[target_feature(enable = "avx2")]
396#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
397#[rustc_legacy_const_generics(2)]
398#[stable(feature = "simd_x86", since = "1.27.0")]
399pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
400 static_assert_uimm_bits!(IMM4, 4);
401 unsafe {
402 let a = a.as_i32x4();
403 let b = b.as_i32x4();
404 let r: i32x4 = simd_shuffle!(
405 a,
406 b,
407 [
408 [0, 4, 0, 4][IMM4 as usize & 0b11],
409 [1, 1, 5, 5][IMM4 as usize & 0b11],
410 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
411 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
412 ],
413 );
414 transmute(r)
415 }
416}
417
418#[inline]
422#[target_feature(enable = "avx2")]
423#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
424#[rustc_legacy_const_generics(2)]
425#[stable(feature = "simd_x86", since = "1.27.0")]
426pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
427 static_assert_uimm_bits!(IMM8, 8);
428 unsafe {
429 let a = a.as_i32x8();
430 let b = b.as_i32x8();
431 let r: i32x8 = simd_shuffle!(
432 a,
433 b,
434 [
435 [0, 8, 0, 8][IMM8 as usize & 0b11],
436 [1, 1, 9, 9][IMM8 as usize & 0b11],
437 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
438 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
439 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
440 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
441 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
442 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
443 ],
444 );
445 transmute(r)
446 }
447}
448
449#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
455#[rustc_legacy_const_generics(2)]
456#[stable(feature = "simd_x86", since = "1.27.0")]
457pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
458 static_assert_uimm_bits!(IMM8, 8);
459 unsafe {
460 let a = a.as_i16x16();
461 let b = b.as_i16x16();
462
463 let r: i16x16 = simd_shuffle!(
464 a,
465 b,
466 [
467 [0, 16, 0, 16][IMM8 as usize & 0b11],
468 [1, 1, 17, 17][IMM8 as usize & 0b11],
469 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
470 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
471 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
472 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
473 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
474 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
475 [8, 24, 8, 24][IMM8 as usize & 0b11],
476 [9, 9, 25, 25][IMM8 as usize & 0b11],
477 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
478 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
479 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
480 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
481 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
482 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
483 ],
484 );
485 transmute(r)
486 }
487}
488
489#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vpblendvb))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
497 unsafe {
498 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
499 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
500 }
501}
502
503#[inline]
508#[target_feature(enable = "avx2")]
509#[cfg_attr(test, assert_instr(vpbroadcastb))]
510#[stable(feature = "simd_x86", since = "1.27.0")]
511pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
512 unsafe {
513 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
514 transmute::<i8x16, _>(ret)
515 }
516}
517
518#[inline]
523#[target_feature(enable = "avx2")]
524#[cfg_attr(test, assert_instr(vpbroadcastb))]
525#[stable(feature = "simd_x86", since = "1.27.0")]
526pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
527 unsafe {
528 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
529 transmute::<i8x32, _>(ret)
530 }
531}
532
533#[inline]
540#[target_feature(enable = "avx2")]
541#[cfg_attr(test, assert_instr(vbroadcastss))]
542#[stable(feature = "simd_x86", since = "1.27.0")]
543pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
544 unsafe {
545 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
546 transmute::<i32x4, _>(ret)
547 }
548}
549
550#[inline]
557#[target_feature(enable = "avx2")]
558#[cfg_attr(test, assert_instr(vbroadcastss))]
559#[stable(feature = "simd_x86", since = "1.27.0")]
560pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
561 unsafe {
562 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
563 transmute::<i32x8, _>(ret)
564 }
565}
566
567#[inline]
572#[target_feature(enable = "avx2")]
573#[cfg_attr(test, assert_instr(vmovddup))]
576#[stable(feature = "simd_x86", since = "1.27.0")]
577pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
578 unsafe {
579 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
580 transmute::<i64x2, _>(ret)
581 }
582}
583
584#[inline]
589#[target_feature(enable = "avx2")]
590#[cfg_attr(test, assert_instr(vbroadcastsd))]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
593 unsafe {
594 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
595 transmute::<i64x4, _>(ret)
596 }
597}
598
599#[inline]
604#[target_feature(enable = "avx2")]
605#[cfg_attr(test, assert_instr(vmovddup))]
606#[stable(feature = "simd_x86", since = "1.27.0")]
607pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
608 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
609}
610
611#[inline]
616#[target_feature(enable = "avx2")]
617#[cfg_attr(test, assert_instr(vbroadcastsd))]
618#[stable(feature = "simd_x86", since = "1.27.0")]
619pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
620 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
621}
622
623#[inline]
628#[target_feature(enable = "avx2")]
629#[stable(feature = "simd_x86_updates", since = "1.82.0")]
630pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
631 unsafe {
632 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
633 transmute::<i64x4, _>(ret)
634 }
635}
636
637#[inline]
644#[target_feature(enable = "avx2")]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
647 unsafe {
648 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
649 transmute::<i64x4, _>(ret)
650 }
651}
652
653#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vbroadcastss))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
662 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
663}
664
665#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vbroadcastss))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
674 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
675}
676
677#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpbroadcastw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
686 unsafe {
687 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
688 transmute::<i16x8, _>(ret)
689 }
690}
691
692#[inline]
697#[target_feature(enable = "avx2")]
698#[cfg_attr(test, assert_instr(vpbroadcastw))]
699#[stable(feature = "simd_x86", since = "1.27.0")]
700pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
701 unsafe {
702 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
703 transmute::<i16x16, _>(ret)
704 }
705}
706
707#[inline]
711#[target_feature(enable = "avx2")]
712#[cfg_attr(test, assert_instr(vpcmpeqq))]
713#[stable(feature = "simd_x86", since = "1.27.0")]
714pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
715 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
716}
717
718#[inline]
722#[target_feature(enable = "avx2")]
723#[cfg_attr(test, assert_instr(vpcmpeqd))]
724#[stable(feature = "simd_x86", since = "1.27.0")]
725pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
726 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
727}
728
729#[inline]
733#[target_feature(enable = "avx2")]
734#[cfg_attr(test, assert_instr(vpcmpeqw))]
735#[stable(feature = "simd_x86", since = "1.27.0")]
736pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
737 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
738}
739
740#[inline]
744#[target_feature(enable = "avx2")]
745#[cfg_attr(test, assert_instr(vpcmpeqb))]
746#[stable(feature = "simd_x86", since = "1.27.0")]
747pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
748 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
749}
750
751#[inline]
755#[target_feature(enable = "avx2")]
756#[cfg_attr(test, assert_instr(vpcmpgtq))]
757#[stable(feature = "simd_x86", since = "1.27.0")]
758pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
759 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
760}
761
762#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpcmpgtd))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
770 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
771}
772
773#[inline]
777#[target_feature(enable = "avx2")]
778#[cfg_attr(test, assert_instr(vpcmpgtw))]
779#[stable(feature = "simd_x86", since = "1.27.0")]
780pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
781 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
782}
783
784#[inline]
788#[target_feature(enable = "avx2")]
789#[cfg_attr(test, assert_instr(vpcmpgtb))]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
792 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
793}
794
795#[inline]
799#[target_feature(enable = "avx2")]
800#[cfg_attr(test, assert_instr(vpmovsxwd))]
801#[stable(feature = "simd_x86", since = "1.27.0")]
802pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
803 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
804}
805
806#[inline]
810#[target_feature(enable = "avx2")]
811#[cfg_attr(test, assert_instr(vpmovsxwq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
814 unsafe {
815 let a = a.as_i16x8();
816 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
817 transmute::<i64x4, _>(simd_cast(v64))
818 }
819}
820
821#[inline]
825#[target_feature(enable = "avx2")]
826#[cfg_attr(test, assert_instr(vpmovsxdq))]
827#[stable(feature = "simd_x86", since = "1.27.0")]
828pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
829 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
830}
831
832#[inline]
836#[target_feature(enable = "avx2")]
837#[cfg_attr(test, assert_instr(vpmovsxbw))]
838#[stable(feature = "simd_x86", since = "1.27.0")]
839pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
840 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
841}
842
843#[inline]
847#[target_feature(enable = "avx2")]
848#[cfg_attr(test, assert_instr(vpmovsxbd))]
849#[stable(feature = "simd_x86", since = "1.27.0")]
850pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
851 unsafe {
852 let a = a.as_i8x16();
853 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
854 transmute::<i32x8, _>(simd_cast(v64))
855 }
856}
857
858#[inline]
862#[target_feature(enable = "avx2")]
863#[cfg_attr(test, assert_instr(vpmovsxbq))]
864#[stable(feature = "simd_x86", since = "1.27.0")]
865pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
866 unsafe {
867 let a = a.as_i8x16();
868 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
869 transmute::<i64x4, _>(simd_cast(v32))
870 }
871}
872
873#[inline]
878#[target_feature(enable = "avx2")]
879#[cfg_attr(test, assert_instr(vpmovzxwd))]
880#[stable(feature = "simd_x86", since = "1.27.0")]
881pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
882 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
883}
884
885#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vpmovzxwq))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
894 unsafe {
895 let a = a.as_u16x8();
896 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
897 transmute::<i64x4, _>(simd_cast(v64))
898 }
899}
900
901#[inline]
905#[target_feature(enable = "avx2")]
906#[cfg_attr(test, assert_instr(vpmovzxdq))]
907#[stable(feature = "simd_x86", since = "1.27.0")]
908pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
909 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
910}
911
912#[inline]
916#[target_feature(enable = "avx2")]
917#[cfg_attr(test, assert_instr(vpmovzxbw))]
918#[stable(feature = "simd_x86", since = "1.27.0")]
919pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
920 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
921}
922
923#[inline]
928#[target_feature(enable = "avx2")]
929#[cfg_attr(test, assert_instr(vpmovzxbd))]
930#[stable(feature = "simd_x86", since = "1.27.0")]
931pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
932 unsafe {
933 let a = a.as_u8x16();
934 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
935 transmute::<i32x8, _>(simd_cast(v64))
936 }
937}
938
939#[inline]
944#[target_feature(enable = "avx2")]
945#[cfg_attr(test, assert_instr(vpmovzxbq))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
948 unsafe {
949 let a = a.as_u8x16();
950 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
951 transmute::<i64x4, _>(simd_cast(v32))
952 }
953}
954
955#[inline]
959#[target_feature(enable = "avx2")]
960#[cfg_attr(
961 all(test, not(target_env = "msvc")),
962 assert_instr(vextractf128, IMM1 = 1)
963)]
964#[rustc_legacy_const_generics(1)]
965#[stable(feature = "simd_x86", since = "1.27.0")]
966pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
967 static_assert_uimm_bits!(IMM1, 1);
968 unsafe {
969 let a = a.as_i64x4();
970 let b = i64x4::ZERO;
971 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
972 transmute(dst)
973 }
974}
975
976#[inline]
980#[target_feature(enable = "avx2")]
981#[cfg_attr(test, assert_instr(vphaddw))]
982#[stable(feature = "simd_x86", since = "1.27.0")]
983pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
984 unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) }
985}
986
987#[inline]
991#[target_feature(enable = "avx2")]
992#[cfg_attr(test, assert_instr(vphaddd))]
993#[stable(feature = "simd_x86", since = "1.27.0")]
994pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
995 unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) }
996}
997
998#[inline]
1003#[target_feature(enable = "avx2")]
1004#[cfg_attr(test, assert_instr(vphaddsw))]
1005#[stable(feature = "simd_x86", since = "1.27.0")]
1006pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
1007 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
1008}
1009
1010#[inline]
1014#[target_feature(enable = "avx2")]
1015#[cfg_attr(test, assert_instr(vphsubw))]
1016#[stable(feature = "simd_x86", since = "1.27.0")]
1017pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1018 unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) }
1019}
1020
1021#[inline]
1025#[target_feature(enable = "avx2")]
1026#[cfg_attr(test, assert_instr(vphsubd))]
1027#[stable(feature = "simd_x86", since = "1.27.0")]
1028pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1029 unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) }
1030}
1031
1032#[inline]
1037#[target_feature(enable = "avx2")]
1038#[cfg_attr(test, assert_instr(vphsubsw))]
1039#[stable(feature = "simd_x86", since = "1.27.0")]
1040pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1041 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
1042}
1043
1044#[inline]
1050#[target_feature(enable = "avx2")]
1051#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1052#[rustc_legacy_const_generics(2)]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1054pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1055 slice: *const i32,
1056 offsets: __m128i,
1057) -> __m128i {
1058 static_assert_imm8_scale!(SCALE);
1059 let zero = i32x4::ZERO;
1060 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1061 let offsets = offsets.as_i32x4();
1062 let slice = slice as *const i8;
1063 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1064 transmute(r)
1065}
1066
1067#[inline]
1074#[target_feature(enable = "avx2")]
1075#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1076#[rustc_legacy_const_generics(4)]
1077#[stable(feature = "simd_x86", since = "1.27.0")]
1078pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1079 src: __m128i,
1080 slice: *const i32,
1081 offsets: __m128i,
1082 mask: __m128i,
1083) -> __m128i {
1084 static_assert_imm8_scale!(SCALE);
1085 let src = src.as_i32x4();
1086 let mask = mask.as_i32x4();
1087 let offsets = offsets.as_i32x4();
1088 let slice = slice as *const i8;
1089 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1090 transmute(r)
1091}
1092
1093#[inline]
1099#[target_feature(enable = "avx2")]
1100#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1101#[rustc_legacy_const_generics(2)]
1102#[stable(feature = "simd_x86", since = "1.27.0")]
1103pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1104 slice: *const i32,
1105 offsets: __m256i,
1106) -> __m256i {
1107 static_assert_imm8_scale!(SCALE);
1108 let zero = i32x8::ZERO;
1109 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1110 let offsets = offsets.as_i32x8();
1111 let slice = slice as *const i8;
1112 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1113 transmute(r)
1114}
1115
1116#[inline]
1123#[target_feature(enable = "avx2")]
1124#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1125#[rustc_legacy_const_generics(4)]
1126#[stable(feature = "simd_x86", since = "1.27.0")]
1127pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1128 src: __m256i,
1129 slice: *const i32,
1130 offsets: __m256i,
1131 mask: __m256i,
1132) -> __m256i {
1133 static_assert_imm8_scale!(SCALE);
1134 let src = src.as_i32x8();
1135 let mask = mask.as_i32x8();
1136 let offsets = offsets.as_i32x8();
1137 let slice = slice as *const i8;
1138 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1139 transmute(r)
1140}
1141
1142#[inline]
1148#[target_feature(enable = "avx2")]
1149#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1150#[rustc_legacy_const_generics(2)]
1151#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1153 static_assert_imm8_scale!(SCALE);
1154 let zero = _mm_setzero_ps();
1155 let neg_one = _mm_set1_ps(-1.0);
1156 let offsets = offsets.as_i32x4();
1157 let slice = slice as *const i8;
1158 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1159}
1160
1161#[inline]
1168#[target_feature(enable = "avx2")]
1169#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1170#[rustc_legacy_const_generics(4)]
1171#[stable(feature = "simd_x86", since = "1.27.0")]
1172pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1173 src: __m128,
1174 slice: *const f32,
1175 offsets: __m128i,
1176 mask: __m128,
1177) -> __m128 {
1178 static_assert_imm8_scale!(SCALE);
1179 let offsets = offsets.as_i32x4();
1180 let slice = slice as *const i8;
1181 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1182}
1183
1184#[inline]
1190#[target_feature(enable = "avx2")]
1191#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1192#[rustc_legacy_const_generics(2)]
1193#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1195 static_assert_imm8_scale!(SCALE);
1196 let zero = _mm256_setzero_ps();
1197 let neg_one = _mm256_set1_ps(-1.0);
1198 let offsets = offsets.as_i32x8();
1199 let slice = slice as *const i8;
1200 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1201}
1202
1203#[inline]
1210#[target_feature(enable = "avx2")]
1211#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1212#[rustc_legacy_const_generics(4)]
1213#[stable(feature = "simd_x86", since = "1.27.0")]
1214pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1215 src: __m256,
1216 slice: *const f32,
1217 offsets: __m256i,
1218 mask: __m256,
1219) -> __m256 {
1220 static_assert_imm8_scale!(SCALE);
1221 let offsets = offsets.as_i32x8();
1222 let slice = slice as *const i8;
1223 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1224}
1225
1226#[inline]
1232#[target_feature(enable = "avx2")]
1233#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1234#[rustc_legacy_const_generics(2)]
1235#[stable(feature = "simd_x86", since = "1.27.0")]
1236pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1237 slice: *const i64,
1238 offsets: __m128i,
1239) -> __m128i {
1240 static_assert_imm8_scale!(SCALE);
1241 let zero = i64x2::ZERO;
1242 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1243 let offsets = offsets.as_i32x4();
1244 let slice = slice as *const i8;
1245 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1246 transmute(r)
1247}
1248
1249#[inline]
1256#[target_feature(enable = "avx2")]
1257#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1258#[rustc_legacy_const_generics(4)]
1259#[stable(feature = "simd_x86", since = "1.27.0")]
1260pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1261 src: __m128i,
1262 slice: *const i64,
1263 offsets: __m128i,
1264 mask: __m128i,
1265) -> __m128i {
1266 static_assert_imm8_scale!(SCALE);
1267 let src = src.as_i64x2();
1268 let mask = mask.as_i64x2();
1269 let offsets = offsets.as_i32x4();
1270 let slice = slice as *const i8;
1271 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1272 transmute(r)
1273}
1274
1275#[inline]
1281#[target_feature(enable = "avx2")]
1282#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1283#[rustc_legacy_const_generics(2)]
1284#[stable(feature = "simd_x86", since = "1.27.0")]
1285pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1286 slice: *const i64,
1287 offsets: __m128i,
1288) -> __m256i {
1289 static_assert_imm8_scale!(SCALE);
1290 let zero = i64x4::ZERO;
1291 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1292 let offsets = offsets.as_i32x4();
1293 let slice = slice as *const i8;
1294 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1295 transmute(r)
1296}
1297
1298#[inline]
1305#[target_feature(enable = "avx2")]
1306#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1307#[rustc_legacy_const_generics(4)]
1308#[stable(feature = "simd_x86", since = "1.27.0")]
1309pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1310 src: __m256i,
1311 slice: *const i64,
1312 offsets: __m128i,
1313 mask: __m256i,
1314) -> __m256i {
1315 static_assert_imm8_scale!(SCALE);
1316 let src = src.as_i64x4();
1317 let mask = mask.as_i64x4();
1318 let offsets = offsets.as_i32x4();
1319 let slice = slice as *const i8;
1320 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1321 transmute(r)
1322}
1323
1324#[inline]
1330#[target_feature(enable = "avx2")]
1331#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1332#[rustc_legacy_const_generics(2)]
1333#[stable(feature = "simd_x86", since = "1.27.0")]
1334pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1335 static_assert_imm8_scale!(SCALE);
1336 let zero = _mm_setzero_pd();
1337 let neg_one = _mm_set1_pd(-1.0);
1338 let offsets = offsets.as_i32x4();
1339 let slice = slice as *const i8;
1340 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1341}
1342
1343#[inline]
1350#[target_feature(enable = "avx2")]
1351#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1352#[rustc_legacy_const_generics(4)]
1353#[stable(feature = "simd_x86", since = "1.27.0")]
1354pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1355 src: __m128d,
1356 slice: *const f64,
1357 offsets: __m128i,
1358 mask: __m128d,
1359) -> __m128d {
1360 static_assert_imm8_scale!(SCALE);
1361 let offsets = offsets.as_i32x4();
1362 let slice = slice as *const i8;
1363 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1364}
1365
1366#[inline]
1372#[target_feature(enable = "avx2")]
1373#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1374#[rustc_legacy_const_generics(2)]
1375#[stable(feature = "simd_x86", since = "1.27.0")]
1376pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1377 slice: *const f64,
1378 offsets: __m128i,
1379) -> __m256d {
1380 static_assert_imm8_scale!(SCALE);
1381 let zero = _mm256_setzero_pd();
1382 let neg_one = _mm256_set1_pd(-1.0);
1383 let offsets = offsets.as_i32x4();
1384 let slice = slice as *const i8;
1385 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1386}
1387
1388#[inline]
1395#[target_feature(enable = "avx2")]
1396#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1397#[rustc_legacy_const_generics(4)]
1398#[stable(feature = "simd_x86", since = "1.27.0")]
1399pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1400 src: __m256d,
1401 slice: *const f64,
1402 offsets: __m128i,
1403 mask: __m256d,
1404) -> __m256d {
1405 static_assert_imm8_scale!(SCALE);
1406 let offsets = offsets.as_i32x4();
1407 let slice = slice as *const i8;
1408 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1409}
1410
1411#[inline]
1417#[target_feature(enable = "avx2")]
1418#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1419#[rustc_legacy_const_generics(2)]
1420#[stable(feature = "simd_x86", since = "1.27.0")]
1421pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1422 slice: *const i32,
1423 offsets: __m128i,
1424) -> __m128i {
1425 static_assert_imm8_scale!(SCALE);
1426 let zero = i32x4::ZERO;
1427 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1428 let offsets = offsets.as_i64x2();
1429 let slice = slice as *const i8;
1430 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1431 transmute(r)
1432}
1433
1434#[inline]
1441#[target_feature(enable = "avx2")]
1442#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1443#[rustc_legacy_const_generics(4)]
1444#[stable(feature = "simd_x86", since = "1.27.0")]
1445pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1446 src: __m128i,
1447 slice: *const i32,
1448 offsets: __m128i,
1449 mask: __m128i,
1450) -> __m128i {
1451 static_assert_imm8_scale!(SCALE);
1452 let src = src.as_i32x4();
1453 let mask = mask.as_i32x4();
1454 let offsets = offsets.as_i64x2();
1455 let slice = slice as *const i8;
1456 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1457 transmute(r)
1458}
1459
1460#[inline]
1466#[target_feature(enable = "avx2")]
1467#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1468#[rustc_legacy_const_generics(2)]
1469#[stable(feature = "simd_x86", since = "1.27.0")]
1470pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1471 slice: *const i32,
1472 offsets: __m256i,
1473) -> __m128i {
1474 static_assert_imm8_scale!(SCALE);
1475 let zero = i32x4::ZERO;
1476 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1477 let offsets = offsets.as_i64x4();
1478 let slice = slice as *const i8;
1479 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1480 transmute(r)
1481}
1482
1483#[inline]
1490#[target_feature(enable = "avx2")]
1491#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1492#[rustc_legacy_const_generics(4)]
1493#[stable(feature = "simd_x86", since = "1.27.0")]
1494pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1495 src: __m128i,
1496 slice: *const i32,
1497 offsets: __m256i,
1498 mask: __m128i,
1499) -> __m128i {
1500 static_assert_imm8_scale!(SCALE);
1501 let src = src.as_i32x4();
1502 let mask = mask.as_i32x4();
1503 let offsets = offsets.as_i64x4();
1504 let slice = slice as *const i8;
1505 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1506 transmute(r)
1507}
1508
1509#[inline]
1515#[target_feature(enable = "avx2")]
1516#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1517#[rustc_legacy_const_generics(2)]
1518#[stable(feature = "simd_x86", since = "1.27.0")]
1519pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1520 static_assert_imm8_scale!(SCALE);
1521 let zero = _mm_setzero_ps();
1522 let neg_one = _mm_set1_ps(-1.0);
1523 let offsets = offsets.as_i64x2();
1524 let slice = slice as *const i8;
1525 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1526}
1527
1528#[inline]
1535#[target_feature(enable = "avx2")]
1536#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1537#[rustc_legacy_const_generics(4)]
1538#[stable(feature = "simd_x86", since = "1.27.0")]
1539pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1540 src: __m128,
1541 slice: *const f32,
1542 offsets: __m128i,
1543 mask: __m128,
1544) -> __m128 {
1545 static_assert_imm8_scale!(SCALE);
1546 let offsets = offsets.as_i64x2();
1547 let slice = slice as *const i8;
1548 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1549}
1550
1551#[inline]
1557#[target_feature(enable = "avx2")]
1558#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1559#[rustc_legacy_const_generics(2)]
1560#[stable(feature = "simd_x86", since = "1.27.0")]
1561pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1562 static_assert_imm8_scale!(SCALE);
1563 let zero = _mm_setzero_ps();
1564 let neg_one = _mm_set1_ps(-1.0);
1565 let offsets = offsets.as_i64x4();
1566 let slice = slice as *const i8;
1567 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1568}
1569
1570#[inline]
1577#[target_feature(enable = "avx2")]
1578#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1579#[rustc_legacy_const_generics(4)]
1580#[stable(feature = "simd_x86", since = "1.27.0")]
1581pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1582 src: __m128,
1583 slice: *const f32,
1584 offsets: __m256i,
1585 mask: __m128,
1586) -> __m128 {
1587 static_assert_imm8_scale!(SCALE);
1588 let offsets = offsets.as_i64x4();
1589 let slice = slice as *const i8;
1590 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1591}
1592
1593#[inline]
1599#[target_feature(enable = "avx2")]
1600#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1601#[rustc_legacy_const_generics(2)]
1602#[stable(feature = "simd_x86", since = "1.27.0")]
1603pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1604 slice: *const i64,
1605 offsets: __m128i,
1606) -> __m128i {
1607 static_assert_imm8_scale!(SCALE);
1608 let zero = i64x2::ZERO;
1609 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1610 let slice = slice as *const i8;
1611 let offsets = offsets.as_i64x2();
1612 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1613 transmute(r)
1614}
1615
1616#[inline]
1623#[target_feature(enable = "avx2")]
1624#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1625#[rustc_legacy_const_generics(4)]
1626#[stable(feature = "simd_x86", since = "1.27.0")]
1627pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1628 src: __m128i,
1629 slice: *const i64,
1630 offsets: __m128i,
1631 mask: __m128i,
1632) -> __m128i {
1633 static_assert_imm8_scale!(SCALE);
1634 let src = src.as_i64x2();
1635 let mask = mask.as_i64x2();
1636 let offsets = offsets.as_i64x2();
1637 let slice = slice as *const i8;
1638 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1639 transmute(r)
1640}
1641
1642#[inline]
1648#[target_feature(enable = "avx2")]
1649#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1650#[rustc_legacy_const_generics(2)]
1651#[stable(feature = "simd_x86", since = "1.27.0")]
1652pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1653 slice: *const i64,
1654 offsets: __m256i,
1655) -> __m256i {
1656 static_assert_imm8_scale!(SCALE);
1657 let zero = i64x4::ZERO;
1658 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1659 let slice = slice as *const i8;
1660 let offsets = offsets.as_i64x4();
1661 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1662 transmute(r)
1663}
1664
1665#[inline]
1672#[target_feature(enable = "avx2")]
1673#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1674#[rustc_legacy_const_generics(4)]
1675#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1677 src: __m256i,
1678 slice: *const i64,
1679 offsets: __m256i,
1680 mask: __m256i,
1681) -> __m256i {
1682 static_assert_imm8_scale!(SCALE);
1683 let src = src.as_i64x4();
1684 let mask = mask.as_i64x4();
1685 let offsets = offsets.as_i64x4();
1686 let slice = slice as *const i8;
1687 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1688 transmute(r)
1689}
1690
1691#[inline]
1697#[target_feature(enable = "avx2")]
1698#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1699#[rustc_legacy_const_generics(2)]
1700#[stable(feature = "simd_x86", since = "1.27.0")]
1701pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1702 static_assert_imm8_scale!(SCALE);
1703 let zero = _mm_setzero_pd();
1704 let neg_one = _mm_set1_pd(-1.0);
1705 let slice = slice as *const i8;
1706 let offsets = offsets.as_i64x2();
1707 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1708}
1709
1710#[inline]
1717#[target_feature(enable = "avx2")]
1718#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1719#[rustc_legacy_const_generics(4)]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1722 src: __m128d,
1723 slice: *const f64,
1724 offsets: __m128i,
1725 mask: __m128d,
1726) -> __m128d {
1727 static_assert_imm8_scale!(SCALE);
1728 let slice = slice as *const i8;
1729 let offsets = offsets.as_i64x2();
1730 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1731}
1732
1733#[inline]
1739#[target_feature(enable = "avx2")]
1740#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1741#[rustc_legacy_const_generics(2)]
1742#[stable(feature = "simd_x86", since = "1.27.0")]
1743pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1744 slice: *const f64,
1745 offsets: __m256i,
1746) -> __m256d {
1747 static_assert_imm8_scale!(SCALE);
1748 let zero = _mm256_setzero_pd();
1749 let neg_one = _mm256_set1_pd(-1.0);
1750 let slice = slice as *const i8;
1751 let offsets = offsets.as_i64x4();
1752 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1753}
1754
1755#[inline]
1762#[target_feature(enable = "avx2")]
1763#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1764#[rustc_legacy_const_generics(4)]
1765#[stable(feature = "simd_x86", since = "1.27.0")]
1766pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1767 src: __m256d,
1768 slice: *const f64,
1769 offsets: __m256i,
1770 mask: __m256d,
1771) -> __m256d {
1772 static_assert_imm8_scale!(SCALE);
1773 let slice = slice as *const i8;
1774 let offsets = offsets.as_i64x4();
1775 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1776}
1777
1778#[inline]
1783#[target_feature(enable = "avx2")]
1784#[cfg_attr(
1785 all(test, not(target_env = "msvc")),
1786 assert_instr(vinsertf128, IMM1 = 1)
1787)]
1788#[rustc_legacy_const_generics(2)]
1789#[stable(feature = "simd_x86", since = "1.27.0")]
1790pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1791 static_assert_uimm_bits!(IMM1, 1);
1792 unsafe {
1793 let a = a.as_i64x4();
1794 let b = _mm256_castsi128_si256(b).as_i64x4();
1795 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1796 transmute(dst)
1797 }
1798}
1799
1800#[inline]
1806#[target_feature(enable = "avx2")]
1807#[cfg_attr(test, assert_instr(vpmaddwd))]
1808#[stable(feature = "simd_x86", since = "1.27.0")]
1809pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1810 unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1811}
1812
1813#[inline]
1820#[target_feature(enable = "avx2")]
1821#[cfg_attr(test, assert_instr(vpmaddubsw))]
1822#[stable(feature = "simd_x86", since = "1.27.0")]
1823pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1824 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
1825}
1826
1827#[inline]
1833#[target_feature(enable = "avx2")]
1834#[cfg_attr(test, assert_instr(vpmaskmovd))]
1835#[stable(feature = "simd_x86", since = "1.27.0")]
1836pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1837 transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
1838}
1839
1840#[inline]
1846#[target_feature(enable = "avx2")]
1847#[cfg_attr(test, assert_instr(vpmaskmovd))]
1848#[stable(feature = "simd_x86", since = "1.27.0")]
1849pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1850 transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
1851}
1852
1853#[inline]
1859#[target_feature(enable = "avx2")]
1860#[cfg_attr(test, assert_instr(vpmaskmovq))]
1861#[stable(feature = "simd_x86", since = "1.27.0")]
1862pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1863 transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
1864}
1865
1866#[inline]
1872#[target_feature(enable = "avx2")]
1873#[cfg_attr(test, assert_instr(vpmaskmovq))]
1874#[stable(feature = "simd_x86", since = "1.27.0")]
1875pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1876 transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
1877}
1878
1879#[inline]
1885#[target_feature(enable = "avx2")]
1886#[cfg_attr(test, assert_instr(vpmaskmovd))]
1887#[stable(feature = "simd_x86", since = "1.27.0")]
1888pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1889 maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
1890}
1891
1892#[inline]
1898#[target_feature(enable = "avx2")]
1899#[cfg_attr(test, assert_instr(vpmaskmovd))]
1900#[stable(feature = "simd_x86", since = "1.27.0")]
1901pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1902 maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
1903}
1904
1905#[inline]
1911#[target_feature(enable = "avx2")]
1912#[cfg_attr(test, assert_instr(vpmaskmovq))]
1913#[stable(feature = "simd_x86", since = "1.27.0")]
1914pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1915 maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
1916}
1917
1918#[inline]
1924#[target_feature(enable = "avx2")]
1925#[cfg_attr(test, assert_instr(vpmaskmovq))]
1926#[stable(feature = "simd_x86", since = "1.27.0")]
1927pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1928 maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
1929}
1930
1931#[inline]
1936#[target_feature(enable = "avx2")]
1937#[cfg_attr(test, assert_instr(vpmaxsw))]
1938#[stable(feature = "simd_x86", since = "1.27.0")]
1939pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1940 unsafe {
1941 let a = a.as_i16x16();
1942 let b = b.as_i16x16();
1943 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1944 }
1945}
1946
1947#[inline]
1952#[target_feature(enable = "avx2")]
1953#[cfg_attr(test, assert_instr(vpmaxsd))]
1954#[stable(feature = "simd_x86", since = "1.27.0")]
1955pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1956 unsafe {
1957 let a = a.as_i32x8();
1958 let b = b.as_i32x8();
1959 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1960 }
1961}
1962
1963#[inline]
1968#[target_feature(enable = "avx2")]
1969#[cfg_attr(test, assert_instr(vpmaxsb))]
1970#[stable(feature = "simd_x86", since = "1.27.0")]
1971pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1972 unsafe {
1973 let a = a.as_i8x32();
1974 let b = b.as_i8x32();
1975 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1976 }
1977}
1978
1979#[inline]
1984#[target_feature(enable = "avx2")]
1985#[cfg_attr(test, assert_instr(vpmaxuw))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1988 unsafe {
1989 let a = a.as_u16x16();
1990 let b = b.as_u16x16();
1991 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1992 }
1993}
1994
1995#[inline]
2000#[target_feature(enable = "avx2")]
2001#[cfg_attr(test, assert_instr(vpmaxud))]
2002#[stable(feature = "simd_x86", since = "1.27.0")]
2003pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
2004 unsafe {
2005 let a = a.as_u32x8();
2006 let b = b.as_u32x8();
2007 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
2008 }
2009}
2010
2011#[inline]
2016#[target_feature(enable = "avx2")]
2017#[cfg_attr(test, assert_instr(vpmaxub))]
2018#[stable(feature = "simd_x86", since = "1.27.0")]
2019pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2020 unsafe {
2021 let a = a.as_u8x32();
2022 let b = b.as_u8x32();
2023 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
2024 }
2025}
2026
2027#[inline]
2032#[target_feature(enable = "avx2")]
2033#[cfg_attr(test, assert_instr(vpminsw))]
2034#[stable(feature = "simd_x86", since = "1.27.0")]
2035pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2036 unsafe {
2037 let a = a.as_i16x16();
2038 let b = b.as_i16x16();
2039 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2040 }
2041}
2042
2043#[inline]
2048#[target_feature(enable = "avx2")]
2049#[cfg_attr(test, assert_instr(vpminsd))]
2050#[stable(feature = "simd_x86", since = "1.27.0")]
2051pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2052 unsafe {
2053 let a = a.as_i32x8();
2054 let b = b.as_i32x8();
2055 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2056 }
2057}
2058
2059#[inline]
2064#[target_feature(enable = "avx2")]
2065#[cfg_attr(test, assert_instr(vpminsb))]
2066#[stable(feature = "simd_x86", since = "1.27.0")]
2067pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2068 unsafe {
2069 let a = a.as_i8x32();
2070 let b = b.as_i8x32();
2071 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2072 }
2073}
2074
2075#[inline]
2080#[target_feature(enable = "avx2")]
2081#[cfg_attr(test, assert_instr(vpminuw))]
2082#[stable(feature = "simd_x86", since = "1.27.0")]
2083pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2084 unsafe {
2085 let a = a.as_u16x16();
2086 let b = b.as_u16x16();
2087 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2088 }
2089}
2090
2091#[inline]
2096#[target_feature(enable = "avx2")]
2097#[cfg_attr(test, assert_instr(vpminud))]
2098#[stable(feature = "simd_x86", since = "1.27.0")]
2099pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2100 unsafe {
2101 let a = a.as_u32x8();
2102 let b = b.as_u32x8();
2103 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2104 }
2105}
2106
2107#[inline]
2112#[target_feature(enable = "avx2")]
2113#[cfg_attr(test, assert_instr(vpminub))]
2114#[stable(feature = "simd_x86", since = "1.27.0")]
2115pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2116 unsafe {
2117 let a = a.as_u8x32();
2118 let b = b.as_u8x32();
2119 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2120 }
2121}
2122
2123#[inline]
2128#[target_feature(enable = "avx2")]
2129#[cfg_attr(test, assert_instr(vpmovmskb))]
2130#[stable(feature = "simd_x86", since = "1.27.0")]
2131pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2132 unsafe {
2133 let z = i8x32::ZERO;
2134 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2135 simd_bitmask::<_, u32>(m) as i32
2136 }
2137}
2138
2139#[inline]
2149#[target_feature(enable = "avx2")]
2150#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2151#[rustc_legacy_const_generics(2)]
2152#[stable(feature = "simd_x86", since = "1.27.0")]
2153pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2154 static_assert_uimm_bits!(IMM8, 8);
2155 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) }
2156}
2157
2158#[inline]
2165#[target_feature(enable = "avx2")]
2166#[cfg_attr(test, assert_instr(vpmuldq))]
2167#[stable(feature = "simd_x86", since = "1.27.0")]
2168pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2169 unsafe {
2170 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2171 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2172 transmute(simd_mul(a, b))
2173 }
2174}
2175
2176#[inline]
2183#[target_feature(enable = "avx2")]
2184#[cfg_attr(test, assert_instr(vpmuludq))]
2185#[stable(feature = "simd_x86", since = "1.27.0")]
2186pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2187 unsafe {
2188 let a = a.as_u64x4();
2189 let b = b.as_u64x4();
2190 let mask = u64x4::splat(u32::MAX.into());
2191 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2192 }
2193}
2194
2195#[inline]
2201#[target_feature(enable = "avx2")]
2202#[cfg_attr(test, assert_instr(vpmulhw))]
2203#[stable(feature = "simd_x86", since = "1.27.0")]
2204pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2205 unsafe {
2206 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2207 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2208 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2209 transmute(simd_cast::<i32x16, i16x16>(r))
2210 }
2211}
2212
2213#[inline]
2219#[target_feature(enable = "avx2")]
2220#[cfg_attr(test, assert_instr(vpmulhuw))]
2221#[stable(feature = "simd_x86", since = "1.27.0")]
2222pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2223 unsafe {
2224 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2225 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2226 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2227 transmute(simd_cast::<u32x16, u16x16>(r))
2228 }
2229}
2230
2231#[inline]
2237#[target_feature(enable = "avx2")]
2238#[cfg_attr(test, assert_instr(vpmullw))]
2239#[stable(feature = "simd_x86", since = "1.27.0")]
2240pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2241 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2242}
2243
2244#[inline]
2250#[target_feature(enable = "avx2")]
2251#[cfg_attr(test, assert_instr(vpmulld))]
2252#[stable(feature = "simd_x86", since = "1.27.0")]
2253pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2254 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2255}
2256
2257#[inline]
2264#[target_feature(enable = "avx2")]
2265#[cfg_attr(test, assert_instr(vpmulhrsw))]
2266#[stable(feature = "simd_x86", since = "1.27.0")]
2267pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2268 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2269}
2270
2271#[inline]
2276#[target_feature(enable = "avx2")]
2277#[cfg_attr(test, assert_instr(vorps))]
2278#[stable(feature = "simd_x86", since = "1.27.0")]
2279pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2280 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2281}
2282
2283#[inline]
2288#[target_feature(enable = "avx2")]
2289#[cfg_attr(test, assert_instr(vpacksswb))]
2290#[stable(feature = "simd_x86", since = "1.27.0")]
2291pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2292 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2293}
2294
2295#[inline]
2300#[target_feature(enable = "avx2")]
2301#[cfg_attr(test, assert_instr(vpackssdw))]
2302#[stable(feature = "simd_x86", since = "1.27.0")]
2303pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2304 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2305}
2306
2307#[inline]
2312#[target_feature(enable = "avx2")]
2313#[cfg_attr(test, assert_instr(vpackuswb))]
2314#[stable(feature = "simd_x86", since = "1.27.0")]
2315pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2316 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2317}
2318
2319#[inline]
2324#[target_feature(enable = "avx2")]
2325#[cfg_attr(test, assert_instr(vpackusdw))]
2326#[stable(feature = "simd_x86", since = "1.27.0")]
2327pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2328 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2329}
2330
2331#[inline]
2338#[target_feature(enable = "avx2")]
2339#[cfg_attr(test, assert_instr(vpermps))]
2340#[stable(feature = "simd_x86", since = "1.27.0")]
2341pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2342 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2343}
2344
2345#[inline]
2349#[target_feature(enable = "avx2")]
2350#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2351#[rustc_legacy_const_generics(1)]
2352#[stable(feature = "simd_x86", since = "1.27.0")]
2353pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2354 static_assert_uimm_bits!(IMM8, 8);
2355 unsafe {
2356 let zero = i64x4::ZERO;
2357 let r: i64x4 = simd_shuffle!(
2358 a.as_i64x4(),
2359 zero,
2360 [
2361 IMM8 as u32 & 0b11,
2362 (IMM8 as u32 >> 2) & 0b11,
2363 (IMM8 as u32 >> 4) & 0b11,
2364 (IMM8 as u32 >> 6) & 0b11,
2365 ],
2366 );
2367 transmute(r)
2368 }
2369}
2370
2371#[inline]
2375#[target_feature(enable = "avx2")]
2376#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2377#[rustc_legacy_const_generics(2)]
2378#[stable(feature = "simd_x86", since = "1.27.0")]
2379pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2380 static_assert_uimm_bits!(IMM8, 8);
2381 unsafe { transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) }
2382}
2383
2384#[inline]
2389#[target_feature(enable = "avx2")]
2390#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2391#[rustc_legacy_const_generics(1)]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2394 static_assert_uimm_bits!(IMM8, 8);
2395 unsafe {
2396 simd_shuffle!(
2397 a,
2398 _mm256_undefined_pd(),
2399 [
2400 IMM8 as u32 & 0b11,
2401 (IMM8 as u32 >> 2) & 0b11,
2402 (IMM8 as u32 >> 4) & 0b11,
2403 (IMM8 as u32 >> 6) & 0b11,
2404 ],
2405 )
2406 }
2407}
2408
2409#[inline]
2414#[target_feature(enable = "avx2")]
2415#[cfg_attr(test, assert_instr(vpermps))]
2416#[stable(feature = "simd_x86", since = "1.27.0")]
2417pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2418 unsafe { permps(a, idx.as_i32x8()) }
2419}
2420
2421#[inline]
2428#[target_feature(enable = "avx2")]
2429#[cfg_attr(test, assert_instr(vpsadbw))]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2431pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2432 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2433}
2434
2435#[inline]
2466#[target_feature(enable = "avx2")]
2467#[cfg_attr(test, assert_instr(vpshufb))]
2468#[stable(feature = "simd_x86", since = "1.27.0")]
2469pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2470 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2471}
2472
2473#[inline]
2504#[target_feature(enable = "avx2")]
2505#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2506#[rustc_legacy_const_generics(1)]
2507#[stable(feature = "simd_x86", since = "1.27.0")]
2508pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2509 static_assert_uimm_bits!(MASK, 8);
2510 unsafe {
2511 let r: i32x8 = simd_shuffle!(
2512 a.as_i32x8(),
2513 a.as_i32x8(),
2514 [
2515 MASK as u32 & 0b11,
2516 (MASK as u32 >> 2) & 0b11,
2517 (MASK as u32 >> 4) & 0b11,
2518 (MASK as u32 >> 6) & 0b11,
2519 (MASK as u32 & 0b11) + 4,
2520 ((MASK as u32 >> 2) & 0b11) + 4,
2521 ((MASK as u32 >> 4) & 0b11) + 4,
2522 ((MASK as u32 >> 6) & 0b11) + 4,
2523 ],
2524 );
2525 transmute(r)
2526 }
2527}
2528
2529#[inline]
2535#[target_feature(enable = "avx2")]
2536#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2537#[rustc_legacy_const_generics(1)]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2540 static_assert_uimm_bits!(IMM8, 8);
2541 unsafe {
2542 let a = a.as_i16x16();
2543 let r: i16x16 = simd_shuffle!(
2544 a,
2545 a,
2546 [
2547 0,
2548 1,
2549 2,
2550 3,
2551 4 + (IMM8 as u32 & 0b11),
2552 4 + ((IMM8 as u32 >> 2) & 0b11),
2553 4 + ((IMM8 as u32 >> 4) & 0b11),
2554 4 + ((IMM8 as u32 >> 6) & 0b11),
2555 8,
2556 9,
2557 10,
2558 11,
2559 12 + (IMM8 as u32 & 0b11),
2560 12 + ((IMM8 as u32 >> 2) & 0b11),
2561 12 + ((IMM8 as u32 >> 4) & 0b11),
2562 12 + ((IMM8 as u32 >> 6) & 0b11),
2563 ],
2564 );
2565 transmute(r)
2566 }
2567}
2568
2569#[inline]
2575#[target_feature(enable = "avx2")]
2576#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2577#[rustc_legacy_const_generics(1)]
2578#[stable(feature = "simd_x86", since = "1.27.0")]
2579pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2580 static_assert_uimm_bits!(IMM8, 8);
2581 unsafe {
2582 let a = a.as_i16x16();
2583 let r: i16x16 = simd_shuffle!(
2584 a,
2585 a,
2586 [
2587 0 + (IMM8 as u32 & 0b11),
2588 0 + ((IMM8 as u32 >> 2) & 0b11),
2589 0 + ((IMM8 as u32 >> 4) & 0b11),
2590 0 + ((IMM8 as u32 >> 6) & 0b11),
2591 4,
2592 5,
2593 6,
2594 7,
2595 8 + (IMM8 as u32 & 0b11),
2596 8 + ((IMM8 as u32 >> 2) & 0b11),
2597 8 + ((IMM8 as u32 >> 4) & 0b11),
2598 8 + ((IMM8 as u32 >> 6) & 0b11),
2599 12,
2600 13,
2601 14,
2602 15,
2603 ],
2604 );
2605 transmute(r)
2606 }
2607}
2608
2609#[inline]
2615#[target_feature(enable = "avx2")]
2616#[cfg_attr(test, assert_instr(vpsignw))]
2617#[stable(feature = "simd_x86", since = "1.27.0")]
2618pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2619 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2620}
2621
2622#[inline]
2628#[target_feature(enable = "avx2")]
2629#[cfg_attr(test, assert_instr(vpsignd))]
2630#[stable(feature = "simd_x86", since = "1.27.0")]
2631pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2632 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2633}
2634
2635#[inline]
2641#[target_feature(enable = "avx2")]
2642#[cfg_attr(test, assert_instr(vpsignb))]
2643#[stable(feature = "simd_x86", since = "1.27.0")]
2644pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2645 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2646}
2647
2648#[inline]
2653#[target_feature(enable = "avx2")]
2654#[cfg_attr(test, assert_instr(vpsllw))]
2655#[stable(feature = "simd_x86", since = "1.27.0")]
2656pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2657 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2658}
2659
2660#[inline]
2665#[target_feature(enable = "avx2")]
2666#[cfg_attr(test, assert_instr(vpslld))]
2667#[stable(feature = "simd_x86", since = "1.27.0")]
2668pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2669 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2670}
2671
2672#[inline]
2677#[target_feature(enable = "avx2")]
2678#[cfg_attr(test, assert_instr(vpsllq))]
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2680pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2681 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2682}
2683
2684#[inline]
2689#[target_feature(enable = "avx2")]
2690#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2691#[rustc_legacy_const_generics(1)]
2692#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2694 static_assert_uimm_bits!(IMM8, 8);
2695 unsafe {
2696 if IMM8 >= 16 {
2697 _mm256_setzero_si256()
2698 } else {
2699 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2700 }
2701 }
2702}
2703
2704#[inline]
2709#[target_feature(enable = "avx2")]
2710#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2711#[rustc_legacy_const_generics(1)]
2712#[stable(feature = "simd_x86", since = "1.27.0")]
2713pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2714 unsafe {
2715 static_assert_uimm_bits!(IMM8, 8);
2716 if IMM8 >= 32 {
2717 _mm256_setzero_si256()
2718 } else {
2719 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2720 }
2721 }
2722}
2723
2724#[inline]
2729#[target_feature(enable = "avx2")]
2730#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2731#[rustc_legacy_const_generics(1)]
2732#[stable(feature = "simd_x86", since = "1.27.0")]
2733pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2734 unsafe {
2735 static_assert_uimm_bits!(IMM8, 8);
2736 if IMM8 >= 64 {
2737 _mm256_setzero_si256()
2738 } else {
2739 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2740 }
2741 }
2742}
2743
2744#[inline]
2748#[target_feature(enable = "avx2")]
2749#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2750#[rustc_legacy_const_generics(1)]
2751#[stable(feature = "simd_x86", since = "1.27.0")]
2752pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2753 static_assert_uimm_bits!(IMM8, 8);
2754 _mm256_bslli_epi128::<IMM8>(a)
2755}
2756
2757#[inline]
2761#[target_feature(enable = "avx2")]
2762#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2763#[rustc_legacy_const_generics(1)]
2764#[stable(feature = "simd_x86", since = "1.27.0")]
2765pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2766 static_assert_uimm_bits!(IMM8, 8);
2767 const fn mask(shift: i32, i: u32) -> u32 {
2768 let shift = shift as u32 & 0xff;
2769 if shift > 15 || i % 16 < shift {
2770 0
2771 } else {
2772 32 + (i - shift)
2773 }
2774 }
2775 unsafe {
2776 let a = a.as_i8x32();
2777 let r: i8x32 = simd_shuffle!(
2778 i8x32::ZERO,
2779 a,
2780 [
2781 mask(IMM8, 0),
2782 mask(IMM8, 1),
2783 mask(IMM8, 2),
2784 mask(IMM8, 3),
2785 mask(IMM8, 4),
2786 mask(IMM8, 5),
2787 mask(IMM8, 6),
2788 mask(IMM8, 7),
2789 mask(IMM8, 8),
2790 mask(IMM8, 9),
2791 mask(IMM8, 10),
2792 mask(IMM8, 11),
2793 mask(IMM8, 12),
2794 mask(IMM8, 13),
2795 mask(IMM8, 14),
2796 mask(IMM8, 15),
2797 mask(IMM8, 16),
2798 mask(IMM8, 17),
2799 mask(IMM8, 18),
2800 mask(IMM8, 19),
2801 mask(IMM8, 20),
2802 mask(IMM8, 21),
2803 mask(IMM8, 22),
2804 mask(IMM8, 23),
2805 mask(IMM8, 24),
2806 mask(IMM8, 25),
2807 mask(IMM8, 26),
2808 mask(IMM8, 27),
2809 mask(IMM8, 28),
2810 mask(IMM8, 29),
2811 mask(IMM8, 30),
2812 mask(IMM8, 31),
2813 ],
2814 );
2815 transmute(r)
2816 }
2817}
2818
2819#[inline]
2825#[target_feature(enable = "avx2")]
2826#[cfg_attr(test, assert_instr(vpsllvd))]
2827#[stable(feature = "simd_x86", since = "1.27.0")]
2828pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2829 unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
2830}
2831
2832#[inline]
2838#[target_feature(enable = "avx2")]
2839#[cfg_attr(test, assert_instr(vpsllvd))]
2840#[stable(feature = "simd_x86", since = "1.27.0")]
2841pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2842 unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
2843}
2844
2845#[inline]
2851#[target_feature(enable = "avx2")]
2852#[cfg_attr(test, assert_instr(vpsllvq))]
2853#[stable(feature = "simd_x86", since = "1.27.0")]
2854pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2855 unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
2856}
2857
2858#[inline]
2864#[target_feature(enable = "avx2")]
2865#[cfg_attr(test, assert_instr(vpsllvq))]
2866#[stable(feature = "simd_x86", since = "1.27.0")]
2867pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2868 unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
2869}
2870
2871#[inline]
2876#[target_feature(enable = "avx2")]
2877#[cfg_attr(test, assert_instr(vpsraw))]
2878#[stable(feature = "simd_x86", since = "1.27.0")]
2879pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2880 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2881}
2882
2883#[inline]
2888#[target_feature(enable = "avx2")]
2889#[cfg_attr(test, assert_instr(vpsrad))]
2890#[stable(feature = "simd_x86", since = "1.27.0")]
2891pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2892 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2893}
2894
2895#[inline]
2900#[target_feature(enable = "avx2")]
2901#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2902#[rustc_legacy_const_generics(1)]
2903#[stable(feature = "simd_x86", since = "1.27.0")]
2904pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2905 static_assert_uimm_bits!(IMM8, 8);
2906 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2907}
2908
2909#[inline]
2914#[target_feature(enable = "avx2")]
2915#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2916#[rustc_legacy_const_generics(1)]
2917#[stable(feature = "simd_x86", since = "1.27.0")]
2918pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2919 static_assert_uimm_bits!(IMM8, 8);
2920 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2921}
2922
2923#[inline]
2928#[target_feature(enable = "avx2")]
2929#[cfg_attr(test, assert_instr(vpsravd))]
2930#[stable(feature = "simd_x86", since = "1.27.0")]
2931pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2932 unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
2933}
2934
2935#[inline]
2940#[target_feature(enable = "avx2")]
2941#[cfg_attr(test, assert_instr(vpsravd))]
2942#[stable(feature = "simd_x86", since = "1.27.0")]
2943pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2944 unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
2945}
2946
2947#[inline]
2951#[target_feature(enable = "avx2")]
2952#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2953#[rustc_legacy_const_generics(1)]
2954#[stable(feature = "simd_x86", since = "1.27.0")]
2955pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2956 static_assert_uimm_bits!(IMM8, 8);
2957 _mm256_bsrli_epi128::<IMM8>(a)
2958}
2959
2960#[inline]
2964#[target_feature(enable = "avx2")]
2965#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2966#[rustc_legacy_const_generics(1)]
2967#[stable(feature = "simd_x86", since = "1.27.0")]
2968pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2969 static_assert_uimm_bits!(IMM8, 8);
2970 unsafe {
2971 let a = a.as_i8x32();
2972 let zero = i8x32::ZERO;
2973 let r: i8x32 = match IMM8 % 16 {
2974 0 => simd_shuffle!(
2975 a,
2976 zero,
2977 [
2978 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
2979 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2980 ],
2981 ),
2982 1 => simd_shuffle!(
2983 a,
2984 zero,
2985 [
2986 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
2987 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
2988 ],
2989 ),
2990 2 => simd_shuffle!(
2991 a,
2992 zero,
2993 [
2994 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23,
2995 24, 25, 26, 27, 28, 29, 30, 31, 32, 32,
2996 ],
2997 ),
2998 3 => simd_shuffle!(
2999 a,
3000 zero,
3001 [
3002 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23,
3003 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32,
3004 ],
3005 ),
3006 4 => simd_shuffle!(
3007 a,
3008 zero,
3009 [
3010 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24,
3011 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32,
3012 ],
3013 ),
3014 5 => simd_shuffle!(
3015 a,
3016 zero,
3017 [
3018 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25,
3019 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32,
3020 ],
3021 ),
3022 6 => simd_shuffle!(
3023 a,
3024 zero,
3025 [
3026 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26,
3027 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32,
3028 ],
3029 ),
3030 7 => simd_shuffle!(
3031 a,
3032 zero,
3033 [
3034 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26,
3035 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32,
3036 ],
3037 ),
3038 8 => simd_shuffle!(
3039 a,
3040 zero,
3041 [
3042 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27,
3043 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32,
3044 ],
3045 ),
3046 9 => simd_shuffle!(
3047 a,
3048 zero,
3049 [
3050 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28,
3051 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3052 ],
3053 ),
3054 10 => simd_shuffle!(
3055 a,
3056 zero,
3057 [
3058 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29,
3059 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3060 ],
3061 ),
3062 11 => simd_shuffle!(
3063 a,
3064 zero,
3065 [
3066 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30,
3067 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3068 ],
3069 ),
3070 12 => simd_shuffle!(
3071 a,
3072 zero,
3073 [
3074 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31,
3075 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3076 ],
3077 ),
3078 13 => simd_shuffle!(
3079 a,
3080 zero,
3081 [
3082 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32,
3083 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3084 ],
3085 ),
3086 14 => simd_shuffle!(
3087 a,
3088 zero,
3089 [
3090 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32,
3091 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3092 ],
3093 ),
3094 15 => simd_shuffle!(
3095 a,
3096 zero,
3097 [
3098 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32,
3099 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3100 ],
3101 ),
3102 _ => zero,
3103 };
3104 transmute(r)
3105 }
3106}
3107
3108#[inline]
3113#[target_feature(enable = "avx2")]
3114#[cfg_attr(test, assert_instr(vpsrlw))]
3115#[stable(feature = "simd_x86", since = "1.27.0")]
3116pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3117 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3118}
3119
3120#[inline]
3125#[target_feature(enable = "avx2")]
3126#[cfg_attr(test, assert_instr(vpsrld))]
3127#[stable(feature = "simd_x86", since = "1.27.0")]
3128pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3129 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3130}
3131
3132#[inline]
3137#[target_feature(enable = "avx2")]
3138#[cfg_attr(test, assert_instr(vpsrlq))]
3139#[stable(feature = "simd_x86", since = "1.27.0")]
3140pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3141 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3142}
3143
3144#[inline]
3149#[target_feature(enable = "avx2")]
3150#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3151#[rustc_legacy_const_generics(1)]
3152#[stable(feature = "simd_x86", since = "1.27.0")]
3153pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3154 static_assert_uimm_bits!(IMM8, 8);
3155 unsafe {
3156 if IMM8 >= 16 {
3157 _mm256_setzero_si256()
3158 } else {
3159 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3160 }
3161 }
3162}
3163
3164#[inline]
3169#[target_feature(enable = "avx2")]
3170#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3171#[rustc_legacy_const_generics(1)]
3172#[stable(feature = "simd_x86", since = "1.27.0")]
3173pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3174 static_assert_uimm_bits!(IMM8, 8);
3175 unsafe {
3176 if IMM8 >= 32 {
3177 _mm256_setzero_si256()
3178 } else {
3179 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3180 }
3181 }
3182}
3183
3184#[inline]
3189#[target_feature(enable = "avx2")]
3190#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3191#[rustc_legacy_const_generics(1)]
3192#[stable(feature = "simd_x86", since = "1.27.0")]
3193pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3194 static_assert_uimm_bits!(IMM8, 8);
3195 unsafe {
3196 if IMM8 >= 64 {
3197 _mm256_setzero_si256()
3198 } else {
3199 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3200 }
3201 }
3202}
3203
3204#[inline]
3209#[target_feature(enable = "avx2")]
3210#[cfg_attr(test, assert_instr(vpsrlvd))]
3211#[stable(feature = "simd_x86", since = "1.27.0")]
3212pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3213 unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
3214}
3215
3216#[inline]
3221#[target_feature(enable = "avx2")]
3222#[cfg_attr(test, assert_instr(vpsrlvd))]
3223#[stable(feature = "simd_x86", since = "1.27.0")]
3224pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3225 unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
3226}
3227
3228#[inline]
3233#[target_feature(enable = "avx2")]
3234#[cfg_attr(test, assert_instr(vpsrlvq))]
3235#[stable(feature = "simd_x86", since = "1.27.0")]
3236pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3237 unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
3238}
3239
3240#[inline]
3245#[target_feature(enable = "avx2")]
3246#[cfg_attr(test, assert_instr(vpsrlvq))]
3247#[stable(feature = "simd_x86", since = "1.27.0")]
3248pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3249 unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
3250}
3251
3252#[inline]
3258#[target_feature(enable = "avx2")]
3259#[cfg_attr(test, assert_instr(vmovntdqa))]
3260#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3261pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3262 let dst: __m256i;
3263 crate::arch::asm!(
3264 vpl!("vmovntdqa {a}"),
3265 a = out(ymm_reg) dst,
3266 p = in(reg) mem_addr,
3267 options(pure, readonly, nostack, preserves_flags),
3268 );
3269 dst
3270}
3271
3272#[inline]
3276#[target_feature(enable = "avx2")]
3277#[cfg_attr(test, assert_instr(vpsubw))]
3278#[stable(feature = "simd_x86", since = "1.27.0")]
3279pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3280 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3281}
3282
3283#[inline]
3287#[target_feature(enable = "avx2")]
3288#[cfg_attr(test, assert_instr(vpsubd))]
3289#[stable(feature = "simd_x86", since = "1.27.0")]
3290pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3291 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3292}
3293
3294#[inline]
3298#[target_feature(enable = "avx2")]
3299#[cfg_attr(test, assert_instr(vpsubq))]
3300#[stable(feature = "simd_x86", since = "1.27.0")]
3301pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3302 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3303}
3304
3305#[inline]
3309#[target_feature(enable = "avx2")]
3310#[cfg_attr(test, assert_instr(vpsubb))]
3311#[stable(feature = "simd_x86", since = "1.27.0")]
3312pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3313 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3314}
3315
3316#[inline]
3321#[target_feature(enable = "avx2")]
3322#[cfg_attr(test, assert_instr(vpsubsw))]
3323#[stable(feature = "simd_x86", since = "1.27.0")]
3324pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3325 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3326}
3327
3328#[inline]
3333#[target_feature(enable = "avx2")]
3334#[cfg_attr(test, assert_instr(vpsubsb))]
3335#[stable(feature = "simd_x86", since = "1.27.0")]
3336pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3337 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3338}
3339
3340#[inline]
3345#[target_feature(enable = "avx2")]
3346#[cfg_attr(test, assert_instr(vpsubusw))]
3347#[stable(feature = "simd_x86", since = "1.27.0")]
3348pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3349 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3350}
3351
3352#[inline]
3357#[target_feature(enable = "avx2")]
3358#[cfg_attr(test, assert_instr(vpsubusb))]
3359#[stable(feature = "simd_x86", since = "1.27.0")]
3360pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3361 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3362}
3363
3364#[inline]
3404#[target_feature(enable = "avx2")]
3405#[cfg_attr(test, assert_instr(vpunpckhbw))]
3406#[stable(feature = "simd_x86", since = "1.27.0")]
3407pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3408 unsafe {
3409 #[rustfmt::skip]
3410 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3411 8, 40, 9, 41, 10, 42, 11, 43,
3412 12, 44, 13, 45, 14, 46, 15, 47,
3413 24, 56, 25, 57, 26, 58, 27, 59,
3414 28, 60, 29, 61, 30, 62, 31, 63,
3415 ]);
3416 transmute(r)
3417 }
3418}
3419
3420#[inline]
3459#[target_feature(enable = "avx2")]
3460#[cfg_attr(test, assert_instr(vpunpcklbw))]
3461#[stable(feature = "simd_x86", since = "1.27.0")]
3462pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3463 unsafe {
3464 #[rustfmt::skip]
3465 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3466 0, 32, 1, 33, 2, 34, 3, 35,
3467 4, 36, 5, 37, 6, 38, 7, 39,
3468 16, 48, 17, 49, 18, 50, 19, 51,
3469 20, 52, 21, 53, 22, 54, 23, 55,
3470 ]);
3471 transmute(r)
3472 }
3473}
3474
3475#[inline]
3510#[target_feature(enable = "avx2")]
3511#[cfg_attr(test, assert_instr(vpunpckhwd))]
3512#[stable(feature = "simd_x86", since = "1.27.0")]
3513pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3514 unsafe {
3515 let r: i16x16 = simd_shuffle!(
3516 a.as_i16x16(),
3517 b.as_i16x16(),
3518 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3519 );
3520 transmute(r)
3521 }
3522}
3523
3524#[inline]
3560#[target_feature(enable = "avx2")]
3561#[cfg_attr(test, assert_instr(vpunpcklwd))]
3562#[stable(feature = "simd_x86", since = "1.27.0")]
3563pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3564 unsafe {
3565 let r: i16x16 = simd_shuffle!(
3566 a.as_i16x16(),
3567 b.as_i16x16(),
3568 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3569 );
3570 transmute(r)
3571 }
3572}
3573
3574#[inline]
3603#[target_feature(enable = "avx2")]
3604#[cfg_attr(test, assert_instr(vunpckhps))]
3605#[stable(feature = "simd_x86", since = "1.27.0")]
3606pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3607 unsafe {
3608 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3609 transmute(r)
3610 }
3611}
3612
3613#[inline]
3642#[target_feature(enable = "avx2")]
3643#[cfg_attr(test, assert_instr(vunpcklps))]
3644#[stable(feature = "simd_x86", since = "1.27.0")]
3645pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3646 unsafe {
3647 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3648 transmute(r)
3649 }
3650}
3651
3652#[inline]
3681#[target_feature(enable = "avx2")]
3682#[cfg_attr(test, assert_instr(vunpckhpd))]
3683#[stable(feature = "simd_x86", since = "1.27.0")]
3684pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3685 unsafe {
3686 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3687 transmute(r)
3688 }
3689}
3690
3691#[inline]
3720#[target_feature(enable = "avx2")]
3721#[cfg_attr(test, assert_instr(vunpcklpd))]
3722#[stable(feature = "simd_x86", since = "1.27.0")]
3723pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3724 unsafe {
3725 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3726 transmute(r)
3727 }
3728}
3729
3730#[inline]
3735#[target_feature(enable = "avx2")]
3736#[cfg_attr(test, assert_instr(vxorps))]
3737#[stable(feature = "simd_x86", since = "1.27.0")]
3738pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3739 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3740}
3741
3742#[inline]
3749#[target_feature(enable = "avx2")]
3750#[rustc_legacy_const_generics(1)]
3752#[stable(feature = "simd_x86", since = "1.27.0")]
3753pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3754 static_assert_uimm_bits!(INDEX, 5);
3755 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3756}
3757
3758#[inline]
3765#[target_feature(enable = "avx2")]
3766#[rustc_legacy_const_generics(1)]
3768#[stable(feature = "simd_x86", since = "1.27.0")]
3769pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3770 static_assert_uimm_bits!(INDEX, 4);
3771 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3772}
3773
3774#[allow(improper_ctypes)]
3775unsafe extern "C" {
3776 #[link_name = "llvm.x86.avx2.phadd.w"]
3777 fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3778 #[link_name = "llvm.x86.avx2.phadd.d"]
3779 fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3780 #[link_name = "llvm.x86.avx2.phadd.sw"]
3781 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3782 #[link_name = "llvm.x86.avx2.phsub.w"]
3783 fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3784 #[link_name = "llvm.x86.avx2.phsub.d"]
3785 fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3786 #[link_name = "llvm.x86.avx2.phsub.sw"]
3787 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3788 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3789 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3790 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3791 fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3792 #[link_name = "llvm.x86.avx2.maskload.d"]
3793 fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3794 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3795 fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3796 #[link_name = "llvm.x86.avx2.maskload.q"]
3797 fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3798 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3799 fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3800 #[link_name = "llvm.x86.avx2.maskstore.d"]
3801 fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3802 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3803 fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3804 #[link_name = "llvm.x86.avx2.maskstore.q"]
3805 fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3806 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3807 fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3808 #[link_name = "llvm.x86.avx2.mpsadbw"]
3809 fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3810 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3811 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3812 #[link_name = "llvm.x86.avx2.packsswb"]
3813 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3814 #[link_name = "llvm.x86.avx2.packssdw"]
3815 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3816 #[link_name = "llvm.x86.avx2.packuswb"]
3817 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3818 #[link_name = "llvm.x86.avx2.packusdw"]
3819 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3820 #[link_name = "llvm.x86.avx2.psad.bw"]
3821 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3822 #[link_name = "llvm.x86.avx2.psign.b"]
3823 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3824 #[link_name = "llvm.x86.avx2.psign.w"]
3825 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3826 #[link_name = "llvm.x86.avx2.psign.d"]
3827 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3828 #[link_name = "llvm.x86.avx2.psll.w"]
3829 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3830 #[link_name = "llvm.x86.avx2.psll.d"]
3831 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3832 #[link_name = "llvm.x86.avx2.psll.q"]
3833 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3834 #[link_name = "llvm.x86.avx2.psllv.d"]
3835 fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3836 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3837 fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3838 #[link_name = "llvm.x86.avx2.psllv.q"]
3839 fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3840 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3841 fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3842 #[link_name = "llvm.x86.avx2.psra.w"]
3843 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3844 #[link_name = "llvm.x86.avx2.psra.d"]
3845 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3846 #[link_name = "llvm.x86.avx2.psrav.d"]
3847 fn psravd(a: i32x4, count: i32x4) -> i32x4;
3848 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3849 fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3850 #[link_name = "llvm.x86.avx2.psrl.w"]
3851 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3852 #[link_name = "llvm.x86.avx2.psrl.d"]
3853 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3854 #[link_name = "llvm.x86.avx2.psrl.q"]
3855 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3856 #[link_name = "llvm.x86.avx2.psrlv.d"]
3857 fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3858 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3859 fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3860 #[link_name = "llvm.x86.avx2.psrlv.q"]
3861 fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3862 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3863 fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3864 #[link_name = "llvm.x86.avx2.pshuf.b"]
3865 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3866 #[link_name = "llvm.x86.avx2.permd"]
3867 fn permd(a: u32x8, b: u32x8) -> u32x8;
3868 #[link_name = "llvm.x86.avx2.permps"]
3869 fn permps(a: __m256, b: i32x8) -> __m256;
3870 #[link_name = "llvm.x86.avx2.vperm2i128"]
3871 fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3872 #[link_name = "llvm.x86.avx2.gather.d.d"]
3873 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3874 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3875 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3876 #[link_name = "llvm.x86.avx2.gather.d.q"]
3877 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3878 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3879 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3880 #[link_name = "llvm.x86.avx2.gather.q.d"]
3881 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3882 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3883 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3884 #[link_name = "llvm.x86.avx2.gather.q.q"]
3885 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3886 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3887 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3888 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3889 fn pgatherdpd(
3890 src: __m128d,
3891 slice: *const i8,
3892 offsets: i32x4,
3893 mask: __m128d,
3894 scale: i8,
3895 ) -> __m128d;
3896 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3897 fn vpgatherdpd(
3898 src: __m256d,
3899 slice: *const i8,
3900 offsets: i32x4,
3901 mask: __m256d,
3902 scale: i8,
3903 ) -> __m256d;
3904 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3905 fn pgatherqpd(
3906 src: __m128d,
3907 slice: *const i8,
3908 offsets: i64x2,
3909 mask: __m128d,
3910 scale: i8,
3911 ) -> __m128d;
3912 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3913 fn vpgatherqpd(
3914 src: __m256d,
3915 slice: *const i8,
3916 offsets: i64x4,
3917 mask: __m256d,
3918 scale: i8,
3919 ) -> __m256d;
3920 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3921 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3922 -> __m128;
3923 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3924 fn vpgatherdps(
3925 src: __m256,
3926 slice: *const i8,
3927 offsets: i32x8,
3928 mask: __m256,
3929 scale: i8,
3930 ) -> __m256;
3931 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3932 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3933 -> __m128;
3934 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3935 fn vpgatherqps(
3936 src: __m128,
3937 slice: *const i8,
3938 offsets: i64x4,
3939 mask: __m128,
3940 scale: i8,
3941 ) -> __m128;
3942}
3943
3944#[cfg(test)]
3945mod tests {
3946
3947 use stdarch_test::simd_test;
3948
3949 use crate::core_arch::x86::*;
3950
3951 #[simd_test(enable = "avx2")]
3952 unsafe fn test_mm256_abs_epi32() {
3953 #[rustfmt::skip]
3954 let a = _mm256_setr_epi32(
3955 0, 1, -1, i32::MAX,
3956 i32::MIN, 100, -100, -32,
3957 );
3958 let r = _mm256_abs_epi32(a);
3959 #[rustfmt::skip]
3960 let e = _mm256_setr_epi32(
3961 0, 1, 1, i32::MAX,
3962 i32::MAX.wrapping_add(1), 100, 100, 32,
3963 );
3964 assert_eq_m256i(r, e);
3965 }
3966
3967 #[simd_test(enable = "avx2")]
3968 unsafe fn test_mm256_abs_epi16() {
3969 #[rustfmt::skip]
3970 let a = _mm256_setr_epi16(
3971 0, 1, -1, 2, -2, 3, -3, 4,
3972 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3973 );
3974 let r = _mm256_abs_epi16(a);
3975 #[rustfmt::skip]
3976 let e = _mm256_setr_epi16(
3977 0, 1, 1, 2, 2, 3, 3, 4,
3978 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3979 );
3980 assert_eq_m256i(r, e);
3981 }
3982
3983 #[simd_test(enable = "avx2")]
3984 unsafe fn test_mm256_abs_epi8() {
3985 #[rustfmt::skip]
3986 let a = _mm256_setr_epi8(
3987 0, 1, -1, 2, -2, 3, -3, 4,
3988 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3989 0, 1, -1, 2, -2, 3, -3, 4,
3990 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3991 );
3992 let r = _mm256_abs_epi8(a);
3993 #[rustfmt::skip]
3994 let e = _mm256_setr_epi8(
3995 0, 1, 1, 2, 2, 3, 3, 4,
3996 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3997 0, 1, 1, 2, 2, 3, 3, 4,
3998 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3999 );
4000 assert_eq_m256i(r, e);
4001 }
4002
4003 #[simd_test(enable = "avx2")]
4004 unsafe fn test_mm256_add_epi64() {
4005 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4006 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
4007 let r = _mm256_add_epi64(a, b);
4008 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4009 assert_eq_m256i(r, e);
4010 }
4011
4012 #[simd_test(enable = "avx2")]
4013 unsafe fn test_mm256_add_epi32() {
4014 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4015 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4016 let r = _mm256_add_epi32(a, b);
4017 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4018 assert_eq_m256i(r, e);
4019 }
4020
4021 #[simd_test(enable = "avx2")]
4022 unsafe fn test_mm256_add_epi16() {
4023 #[rustfmt::skip]
4024 let a = _mm256_setr_epi16(
4025 0, 1, 2, 3, 4, 5, 6, 7,
4026 8, 9, 10, 11, 12, 13, 14, 15,
4027 );
4028 #[rustfmt::skip]
4029 let b = _mm256_setr_epi16(
4030 0, 1, 2, 3, 4, 5, 6, 7,
4031 8, 9, 10, 11, 12, 13, 14, 15,
4032 );
4033 let r = _mm256_add_epi16(a, b);
4034 #[rustfmt::skip]
4035 let e = _mm256_setr_epi16(
4036 0, 2, 4, 6, 8, 10, 12, 14,
4037 16, 18, 20, 22, 24, 26, 28, 30,
4038 );
4039 assert_eq_m256i(r, e);
4040 }
4041
4042 #[simd_test(enable = "avx2")]
4043 unsafe fn test_mm256_add_epi8() {
4044 #[rustfmt::skip]
4045 let a = _mm256_setr_epi8(
4046 0, 1, 2, 3, 4, 5, 6, 7,
4047 8, 9, 10, 11, 12, 13, 14, 15,
4048 16, 17, 18, 19, 20, 21, 22, 23,
4049 24, 25, 26, 27, 28, 29, 30, 31,
4050 );
4051 #[rustfmt::skip]
4052 let b = _mm256_setr_epi8(
4053 0, 1, 2, 3, 4, 5, 6, 7,
4054 8, 9, 10, 11, 12, 13, 14, 15,
4055 16, 17, 18, 19, 20, 21, 22, 23,
4056 24, 25, 26, 27, 28, 29, 30, 31,
4057 );
4058 let r = _mm256_add_epi8(a, b);
4059 #[rustfmt::skip]
4060 let e = _mm256_setr_epi8(
4061 0, 2, 4, 6, 8, 10, 12, 14,
4062 16, 18, 20, 22, 24, 26, 28, 30,
4063 32, 34, 36, 38, 40, 42, 44, 46,
4064 48, 50, 52, 54, 56, 58, 60, 62,
4065 );
4066 assert_eq_m256i(r, e);
4067 }
4068
4069 #[simd_test(enable = "avx2")]
4070 unsafe fn test_mm256_adds_epi8() {
4071 #[rustfmt::skip]
4072 let a = _mm256_setr_epi8(
4073 0, 1, 2, 3, 4, 5, 6, 7,
4074 8, 9, 10, 11, 12, 13, 14, 15,
4075 16, 17, 18, 19, 20, 21, 22, 23,
4076 24, 25, 26, 27, 28, 29, 30, 31,
4077 );
4078 #[rustfmt::skip]
4079 let b = _mm256_setr_epi8(
4080 32, 33, 34, 35, 36, 37, 38, 39,
4081 40, 41, 42, 43, 44, 45, 46, 47,
4082 48, 49, 50, 51, 52, 53, 54, 55,
4083 56, 57, 58, 59, 60, 61, 62, 63,
4084 );
4085 let r = _mm256_adds_epi8(a, b);
4086 #[rustfmt::skip]
4087 let e = _mm256_setr_epi8(
4088 32, 34, 36, 38, 40, 42, 44, 46,
4089 48, 50, 52, 54, 56, 58, 60, 62,
4090 64, 66, 68, 70, 72, 74, 76, 78,
4091 80, 82, 84, 86, 88, 90, 92, 94,
4092 );
4093 assert_eq_m256i(r, e);
4094 }
4095
4096 #[simd_test(enable = "avx2")]
4097 unsafe fn test_mm256_adds_epi8_saturate_positive() {
4098 let a = _mm256_set1_epi8(0x7F);
4099 let b = _mm256_set1_epi8(1);
4100 let r = _mm256_adds_epi8(a, b);
4101 assert_eq_m256i(r, a);
4102 }
4103
4104 #[simd_test(enable = "avx2")]
4105 unsafe fn test_mm256_adds_epi8_saturate_negative() {
4106 let a = _mm256_set1_epi8(-0x80);
4107 let b = _mm256_set1_epi8(-1);
4108 let r = _mm256_adds_epi8(a, b);
4109 assert_eq_m256i(r, a);
4110 }
4111
4112 #[simd_test(enable = "avx2")]
4113 unsafe fn test_mm256_adds_epi16() {
4114 #[rustfmt::skip]
4115 let a = _mm256_setr_epi16(
4116 0, 1, 2, 3, 4, 5, 6, 7,
4117 8, 9, 10, 11, 12, 13, 14, 15,
4118 );
4119 #[rustfmt::skip]
4120 let b = _mm256_setr_epi16(
4121 32, 33, 34, 35, 36, 37, 38, 39,
4122 40, 41, 42, 43, 44, 45, 46, 47,
4123 );
4124 let r = _mm256_adds_epi16(a, b);
4125 #[rustfmt::skip]
4126 let e = _mm256_setr_epi16(
4127 32, 34, 36, 38, 40, 42, 44, 46,
4128 48, 50, 52, 54, 56, 58, 60, 62,
4129 );
4130
4131 assert_eq_m256i(r, e);
4132 }
4133
4134 #[simd_test(enable = "avx2")]
4135 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4136 let a = _mm256_set1_epi16(0x7FFF);
4137 let b = _mm256_set1_epi16(1);
4138 let r = _mm256_adds_epi16(a, b);
4139 assert_eq_m256i(r, a);
4140 }
4141
4142 #[simd_test(enable = "avx2")]
4143 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4144 let a = _mm256_set1_epi16(-0x8000);
4145 let b = _mm256_set1_epi16(-1);
4146 let r = _mm256_adds_epi16(a, b);
4147 assert_eq_m256i(r, a);
4148 }
4149
4150 #[simd_test(enable = "avx2")]
4151 unsafe fn test_mm256_adds_epu8() {
4152 #[rustfmt::skip]
4153 let a = _mm256_setr_epi8(
4154 0, 1, 2, 3, 4, 5, 6, 7,
4155 8, 9, 10, 11, 12, 13, 14, 15,
4156 16, 17, 18, 19, 20, 21, 22, 23,
4157 24, 25, 26, 27, 28, 29, 30, 31,
4158 );
4159 #[rustfmt::skip]
4160 let b = _mm256_setr_epi8(
4161 32, 33, 34, 35, 36, 37, 38, 39,
4162 40, 41, 42, 43, 44, 45, 46, 47,
4163 48, 49, 50, 51, 52, 53, 54, 55,
4164 56, 57, 58, 59, 60, 61, 62, 63,
4165 );
4166 let r = _mm256_adds_epu8(a, b);
4167 #[rustfmt::skip]
4168 let e = _mm256_setr_epi8(
4169 32, 34, 36, 38, 40, 42, 44, 46,
4170 48, 50, 52, 54, 56, 58, 60, 62,
4171 64, 66, 68, 70, 72, 74, 76, 78,
4172 80, 82, 84, 86, 88, 90, 92, 94,
4173 );
4174 assert_eq_m256i(r, e);
4175 }
4176
4177 #[simd_test(enable = "avx2")]
4178 unsafe fn test_mm256_adds_epu8_saturate() {
4179 let a = _mm256_set1_epi8(!0);
4180 let b = _mm256_set1_epi8(1);
4181 let r = _mm256_adds_epu8(a, b);
4182 assert_eq_m256i(r, a);
4183 }
4184
4185 #[simd_test(enable = "avx2")]
4186 unsafe fn test_mm256_adds_epu16() {
4187 #[rustfmt::skip]
4188 let a = _mm256_setr_epi16(
4189 0, 1, 2, 3, 4, 5, 6, 7,
4190 8, 9, 10, 11, 12, 13, 14, 15,
4191 );
4192 #[rustfmt::skip]
4193 let b = _mm256_setr_epi16(
4194 32, 33, 34, 35, 36, 37, 38, 39,
4195 40, 41, 42, 43, 44, 45, 46, 47,
4196 );
4197 let r = _mm256_adds_epu16(a, b);
4198 #[rustfmt::skip]
4199 let e = _mm256_setr_epi16(
4200 32, 34, 36, 38, 40, 42, 44, 46,
4201 48, 50, 52, 54, 56, 58, 60, 62,
4202 );
4203
4204 assert_eq_m256i(r, e);
4205 }
4206
4207 #[simd_test(enable = "avx2")]
4208 unsafe fn test_mm256_adds_epu16_saturate() {
4209 let a = _mm256_set1_epi16(!0);
4210 let b = _mm256_set1_epi16(1);
4211 let r = _mm256_adds_epu16(a, b);
4212 assert_eq_m256i(r, a);
4213 }
4214
4215 #[simd_test(enable = "avx2")]
4216 unsafe fn test_mm256_and_si256() {
4217 let a = _mm256_set1_epi8(5);
4218 let b = _mm256_set1_epi8(3);
4219 let got = _mm256_and_si256(a, b);
4220 assert_eq_m256i(got, _mm256_set1_epi8(1));
4221 }
4222
4223 #[simd_test(enable = "avx2")]
4224 unsafe fn test_mm256_andnot_si256() {
4225 let a = _mm256_set1_epi8(5);
4226 let b = _mm256_set1_epi8(3);
4227 let got = _mm256_andnot_si256(a, b);
4228 assert_eq_m256i(got, _mm256_set1_epi8(2));
4229 }
4230
4231 #[simd_test(enable = "avx2")]
4232 unsafe fn test_mm256_avg_epu8() {
4233 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4234 let r = _mm256_avg_epu8(a, b);
4235 assert_eq_m256i(r, _mm256_set1_epi8(6));
4236 }
4237
4238 #[simd_test(enable = "avx2")]
4239 unsafe fn test_mm256_avg_epu16() {
4240 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4241 let r = _mm256_avg_epu16(a, b);
4242 assert_eq_m256i(r, _mm256_set1_epi16(6));
4243 }
4244
4245 #[simd_test(enable = "avx2")]
4246 unsafe fn test_mm_blend_epi32() {
4247 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4248 let e = _mm_setr_epi32(9, 3, 3, 3);
4249 let r = _mm_blend_epi32::<0x01>(a, b);
4250 assert_eq_m128i(r, e);
4251
4252 let r = _mm_blend_epi32::<0x0E>(b, a);
4253 assert_eq_m128i(r, e);
4254 }
4255
4256 #[simd_test(enable = "avx2")]
4257 unsafe fn test_mm256_blend_epi32() {
4258 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4259 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4260 let r = _mm256_blend_epi32::<0x01>(a, b);
4261 assert_eq_m256i(r, e);
4262
4263 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4264 let r = _mm256_blend_epi32::<0x82>(a, b);
4265 assert_eq_m256i(r, e);
4266
4267 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4268 let r = _mm256_blend_epi32::<0x7C>(a, b);
4269 assert_eq_m256i(r, e);
4270 }
4271
4272 #[simd_test(enable = "avx2")]
4273 unsafe fn test_mm256_blend_epi16() {
4274 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4275 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4276 let r = _mm256_blend_epi16::<0x01>(a, b);
4277 assert_eq_m256i(r, e);
4278
4279 let r = _mm256_blend_epi16::<0xFE>(b, a);
4280 assert_eq_m256i(r, e);
4281 }
4282
4283 #[simd_test(enable = "avx2")]
4284 unsafe fn test_mm256_blendv_epi8() {
4285 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4286 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4287 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4288 let r = _mm256_blendv_epi8(a, b, mask);
4289 assert_eq_m256i(r, e);
4290 }
4291
4292 #[simd_test(enable = "avx2")]
4293 unsafe fn test_mm_broadcastb_epi8() {
4294 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4295 let res = _mm_broadcastb_epi8(a);
4296 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4297 }
4298
4299 #[simd_test(enable = "avx2")]
4300 unsafe fn test_mm256_broadcastb_epi8() {
4301 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4302 let res = _mm256_broadcastb_epi8(a);
4303 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4304 }
4305
4306 #[simd_test(enable = "avx2")]
4307 unsafe fn test_mm_broadcastd_epi32() {
4308 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4309 let res = _mm_broadcastd_epi32(a);
4310 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4311 }
4312
4313 #[simd_test(enable = "avx2")]
4314 unsafe fn test_mm256_broadcastd_epi32() {
4315 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4316 let res = _mm256_broadcastd_epi32(a);
4317 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4318 }
4319
4320 #[simd_test(enable = "avx2")]
4321 unsafe fn test_mm_broadcastq_epi64() {
4322 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4323 let res = _mm_broadcastq_epi64(a);
4324 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4325 }
4326
4327 #[simd_test(enable = "avx2")]
4328 unsafe fn test_mm256_broadcastq_epi64() {
4329 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4330 let res = _mm256_broadcastq_epi64(a);
4331 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4332 }
4333
4334 #[simd_test(enable = "avx2")]
4335 unsafe fn test_mm_broadcastsd_pd() {
4336 let a = _mm_setr_pd(6.88, 3.44);
4337 let res = _mm_broadcastsd_pd(a);
4338 assert_eq_m128d(res, _mm_set1_pd(6.88));
4339 }
4340
4341 #[simd_test(enable = "avx2")]
4342 unsafe fn test_mm256_broadcastsd_pd() {
4343 let a = _mm_setr_pd(6.88, 3.44);
4344 let res = _mm256_broadcastsd_pd(a);
4345 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4346 }
4347
4348 #[simd_test(enable = "avx2")]
4349 unsafe fn test_mm_broadcastsi128_si256() {
4350 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4351 let res = _mm_broadcastsi128_si256(a);
4352 let retval = _mm256_setr_epi64x(
4353 0x0987654321012334,
4354 0x5678909876543210,
4355 0x0987654321012334,
4356 0x5678909876543210,
4357 );
4358 assert_eq_m256i(res, retval);
4359 }
4360
4361 #[simd_test(enable = "avx2")]
4362 unsafe fn test_mm256_broadcastsi128_si256() {
4363 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4364 let res = _mm256_broadcastsi128_si256(a);
4365 let retval = _mm256_setr_epi64x(
4366 0x0987654321012334,
4367 0x5678909876543210,
4368 0x0987654321012334,
4369 0x5678909876543210,
4370 );
4371 assert_eq_m256i(res, retval);
4372 }
4373
4374 #[simd_test(enable = "avx2")]
4375 unsafe fn test_mm_broadcastss_ps() {
4376 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4377 let res = _mm_broadcastss_ps(a);
4378 assert_eq_m128(res, _mm_set1_ps(6.88));
4379 }
4380
4381 #[simd_test(enable = "avx2")]
4382 unsafe fn test_mm256_broadcastss_ps() {
4383 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4384 let res = _mm256_broadcastss_ps(a);
4385 assert_eq_m256(res, _mm256_set1_ps(6.88));
4386 }
4387
4388 #[simd_test(enable = "avx2")]
4389 unsafe fn test_mm_broadcastw_epi16() {
4390 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4391 let res = _mm_broadcastw_epi16(a);
4392 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4393 }
4394
4395 #[simd_test(enable = "avx2")]
4396 unsafe fn test_mm256_broadcastw_epi16() {
4397 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4398 let res = _mm256_broadcastw_epi16(a);
4399 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4400 }
4401
4402 #[simd_test(enable = "avx2")]
4403 unsafe fn test_mm256_cmpeq_epi8() {
4404 #[rustfmt::skip]
4405 let a = _mm256_setr_epi8(
4406 0, 1, 2, 3, 4, 5, 6, 7,
4407 8, 9, 10, 11, 12, 13, 14, 15,
4408 16, 17, 18, 19, 20, 21, 22, 23,
4409 24, 25, 26, 27, 28, 29, 30, 31,
4410 );
4411 #[rustfmt::skip]
4412 let b = _mm256_setr_epi8(
4413 31, 30, 2, 28, 27, 26, 25, 24,
4414 23, 22, 21, 20, 19, 18, 17, 16,
4415 15, 14, 13, 12, 11, 10, 9, 8,
4416 7, 6, 5, 4, 3, 2, 1, 0,
4417 );
4418 let r = _mm256_cmpeq_epi8(a, b);
4419 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4420 }
4421
4422 #[simd_test(enable = "avx2")]
4423 unsafe fn test_mm256_cmpeq_epi16() {
4424 #[rustfmt::skip]
4425 let a = _mm256_setr_epi16(
4426 0, 1, 2, 3, 4, 5, 6, 7,
4427 8, 9, 10, 11, 12, 13, 14, 15,
4428 );
4429 #[rustfmt::skip]
4430 let b = _mm256_setr_epi16(
4431 15, 14, 2, 12, 11, 10, 9, 8,
4432 7, 6, 5, 4, 3, 2, 1, 0,
4433 );
4434 let r = _mm256_cmpeq_epi16(a, b);
4435 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4436 }
4437
4438 #[simd_test(enable = "avx2")]
4439 unsafe fn test_mm256_cmpeq_epi32() {
4440 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4441 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4442 let r = _mm256_cmpeq_epi32(a, b);
4443 let e = _mm256_set1_epi32(0);
4444 let e = _mm256_insert_epi32::<2>(e, !0);
4445 assert_eq_m256i(r, e);
4446 }
4447
4448 #[simd_test(enable = "avx2")]
4449 unsafe fn test_mm256_cmpeq_epi64() {
4450 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4451 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4452 let r = _mm256_cmpeq_epi64(a, b);
4453 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4454 }
4455
4456 #[simd_test(enable = "avx2")]
4457 unsafe fn test_mm256_cmpgt_epi8() {
4458 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4459 let b = _mm256_set1_epi8(0);
4460 let r = _mm256_cmpgt_epi8(a, b);
4461 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4462 }
4463
4464 #[simd_test(enable = "avx2")]
4465 unsafe fn test_mm256_cmpgt_epi16() {
4466 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4467 let b = _mm256_set1_epi16(0);
4468 let r = _mm256_cmpgt_epi16(a, b);
4469 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4470 }
4471
4472 #[simd_test(enable = "avx2")]
4473 unsafe fn test_mm256_cmpgt_epi32() {
4474 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4475 let b = _mm256_set1_epi32(0);
4476 let r = _mm256_cmpgt_epi32(a, b);
4477 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4478 }
4479
4480 #[simd_test(enable = "avx2")]
4481 unsafe fn test_mm256_cmpgt_epi64() {
4482 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4483 let b = _mm256_set1_epi64x(0);
4484 let r = _mm256_cmpgt_epi64(a, b);
4485 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4486 }
4487
4488 #[simd_test(enable = "avx2")]
4489 unsafe fn test_mm256_cvtepi8_epi16() {
4490 #[rustfmt::skip]
4491 let a = _mm_setr_epi8(
4492 0, 0, -1, 1, -2, 2, -3, 3,
4493 -4, 4, -5, 5, -6, 6, -7, 7,
4494 );
4495 #[rustfmt::skip]
4496 let r = _mm256_setr_epi16(
4497 0, 0, -1, 1, -2, 2, -3, 3,
4498 -4, 4, -5, 5, -6, 6, -7, 7,
4499 );
4500 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4501 }
4502
4503 #[simd_test(enable = "avx2")]
4504 unsafe fn test_mm256_cvtepi8_epi32() {
4505 #[rustfmt::skip]
4506 let a = _mm_setr_epi8(
4507 0, 0, -1, 1, -2, 2, -3, 3,
4508 -4, 4, -5, 5, -6, 6, -7, 7,
4509 );
4510 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4511 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4512 }
4513
4514 #[simd_test(enable = "avx2")]
4515 unsafe fn test_mm256_cvtepi8_epi64() {
4516 #[rustfmt::skip]
4517 let a = _mm_setr_epi8(
4518 0, 0, -1, 1, -2, 2, -3, 3,
4519 -4, 4, -5, 5, -6, 6, -7, 7,
4520 );
4521 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4522 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4523 }
4524
4525 #[simd_test(enable = "avx2")]
4526 unsafe fn test_mm256_cvtepi16_epi32() {
4527 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4528 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4529 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4530 }
4531
4532 #[simd_test(enable = "avx2")]
4533 unsafe fn test_mm256_cvtepi16_epi64() {
4534 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4535 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4536 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4537 }
4538
4539 #[simd_test(enable = "avx2")]
4540 unsafe fn test_mm256_cvtepi32_epi64() {
4541 let a = _mm_setr_epi32(0, 0, -1, 1);
4542 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4543 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4544 }
4545
4546 #[simd_test(enable = "avx2")]
4547 unsafe fn test_mm256_cvtepu16_epi32() {
4548 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4549 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4550 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4551 }
4552
4553 #[simd_test(enable = "avx2")]
4554 unsafe fn test_mm256_cvtepu16_epi64() {
4555 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4556 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4557 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4558 }
4559
4560 #[simd_test(enable = "avx2")]
4561 unsafe fn test_mm256_cvtepu32_epi64() {
4562 let a = _mm_setr_epi32(0, 1, 2, 3);
4563 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4564 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4565 }
4566
4567 #[simd_test(enable = "avx2")]
4568 unsafe fn test_mm256_cvtepu8_epi16() {
4569 #[rustfmt::skip]
4570 let a = _mm_setr_epi8(
4571 0, 1, 2, 3, 4, 5, 6, 7,
4572 8, 9, 10, 11, 12, 13, 14, 15,
4573 );
4574 #[rustfmt::skip]
4575 let r = _mm256_setr_epi16(
4576 0, 1, 2, 3, 4, 5, 6, 7,
4577 8, 9, 10, 11, 12, 13, 14, 15,
4578 );
4579 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4580 }
4581
4582 #[simd_test(enable = "avx2")]
4583 unsafe fn test_mm256_cvtepu8_epi32() {
4584 #[rustfmt::skip]
4585 let a = _mm_setr_epi8(
4586 0, 1, 2, 3, 4, 5, 6, 7,
4587 8, 9, 10, 11, 12, 13, 14, 15,
4588 );
4589 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4590 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4591 }
4592
4593 #[simd_test(enable = "avx2")]
4594 unsafe fn test_mm256_cvtepu8_epi64() {
4595 #[rustfmt::skip]
4596 let a = _mm_setr_epi8(
4597 0, 1, 2, 3, 4, 5, 6, 7,
4598 8, 9, 10, 11, 12, 13, 14, 15,
4599 );
4600 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4601 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4602 }
4603
4604 #[simd_test(enable = "avx2")]
4605 unsafe fn test_mm256_extracti128_si256() {
4606 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4607 let r = _mm256_extracti128_si256::<1>(a);
4608 let e = _mm_setr_epi64x(3, 4);
4609 assert_eq_m128i(r, e);
4610 }
4611
4612 #[simd_test(enable = "avx2")]
4613 unsafe fn test_mm256_hadd_epi16() {
4614 let a = _mm256_set1_epi16(2);
4615 let b = _mm256_set1_epi16(4);
4616 let r = _mm256_hadd_epi16(a, b);
4617 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4618 assert_eq_m256i(r, e);
4619 }
4620
4621 #[simd_test(enable = "avx2")]
4622 unsafe fn test_mm256_hadd_epi32() {
4623 let a = _mm256_set1_epi32(2);
4624 let b = _mm256_set1_epi32(4);
4625 let r = _mm256_hadd_epi32(a, b);
4626 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4627 assert_eq_m256i(r, e);
4628 }
4629
4630 #[simd_test(enable = "avx2")]
4631 unsafe fn test_mm256_hadds_epi16() {
4632 let a = _mm256_set1_epi16(2);
4633 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4634 let a = _mm256_insert_epi16::<1>(a, 1);
4635 let b = _mm256_set1_epi16(4);
4636 let r = _mm256_hadds_epi16(a, b);
4637 #[rustfmt::skip]
4638 let e = _mm256_setr_epi16(
4639 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4640 4, 4, 4, 4, 8, 8, 8, 8,
4641 );
4642 assert_eq_m256i(r, e);
4643 }
4644
4645 #[simd_test(enable = "avx2")]
4646 unsafe fn test_mm256_hsub_epi16() {
4647 let a = _mm256_set1_epi16(2);
4648 let b = _mm256_set1_epi16(4);
4649 let r = _mm256_hsub_epi16(a, b);
4650 let e = _mm256_set1_epi16(0);
4651 assert_eq_m256i(r, e);
4652 }
4653
4654 #[simd_test(enable = "avx2")]
4655 unsafe fn test_mm256_hsub_epi32() {
4656 let a = _mm256_set1_epi32(2);
4657 let b = _mm256_set1_epi32(4);
4658 let r = _mm256_hsub_epi32(a, b);
4659 let e = _mm256_set1_epi32(0);
4660 assert_eq_m256i(r, e);
4661 }
4662
4663 #[simd_test(enable = "avx2")]
4664 unsafe fn test_mm256_hsubs_epi16() {
4665 let a = _mm256_set1_epi16(2);
4666 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4667 let a = _mm256_insert_epi16::<1>(a, -1);
4668 let b = _mm256_set1_epi16(4);
4669 let r = _mm256_hsubs_epi16(a, b);
4670 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4671 assert_eq_m256i(r, e);
4672 }
4673
4674 #[simd_test(enable = "avx2")]
4675 unsafe fn test_mm256_madd_epi16() {
4676 let a = _mm256_set1_epi16(2);
4677 let b = _mm256_set1_epi16(4);
4678 let r = _mm256_madd_epi16(a, b);
4679 let e = _mm256_set1_epi32(16);
4680 assert_eq_m256i(r, e);
4681 }
4682
4683 #[simd_test(enable = "avx2")]
4684 unsafe fn test_mm256_inserti128_si256() {
4685 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4686 let b = _mm_setr_epi64x(7, 8);
4687 let r = _mm256_inserti128_si256::<1>(a, b);
4688 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4689 assert_eq_m256i(r, e);
4690 }
4691
4692 #[simd_test(enable = "avx2")]
4693 unsafe fn test_mm256_maddubs_epi16() {
4694 let a = _mm256_set1_epi8(2);
4695 let b = _mm256_set1_epi8(4);
4696 let r = _mm256_maddubs_epi16(a, b);
4697 let e = _mm256_set1_epi16(16);
4698 assert_eq_m256i(r, e);
4699 }
4700
4701 #[simd_test(enable = "avx2")]
4702 unsafe fn test_mm_maskload_epi32() {
4703 let nums = [1, 2, 3, 4];
4704 let a = &nums as *const i32;
4705 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4706 let r = _mm_maskload_epi32(a, mask);
4707 let e = _mm_setr_epi32(1, 0, 0, 4);
4708 assert_eq_m128i(r, e);
4709 }
4710
4711 #[simd_test(enable = "avx2")]
4712 unsafe fn test_mm256_maskload_epi32() {
4713 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4714 let a = &nums as *const i32;
4715 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4716 let r = _mm256_maskload_epi32(a, mask);
4717 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4718 assert_eq_m256i(r, e);
4719 }
4720
4721 #[simd_test(enable = "avx2")]
4722 unsafe fn test_mm_maskload_epi64() {
4723 let nums = [1_i64, 2_i64];
4724 let a = &nums as *const i64;
4725 let mask = _mm_setr_epi64x(0, -1);
4726 let r = _mm_maskload_epi64(a, mask);
4727 let e = _mm_setr_epi64x(0, 2);
4728 assert_eq_m128i(r, e);
4729 }
4730
4731 #[simd_test(enable = "avx2")]
4732 unsafe fn test_mm256_maskload_epi64() {
4733 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4734 let a = &nums as *const i64;
4735 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4736 let r = _mm256_maskload_epi64(a, mask);
4737 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4738 assert_eq_m256i(r, e);
4739 }
4740
4741 #[simd_test(enable = "avx2")]
4742 unsafe fn test_mm_maskstore_epi32() {
4743 let a = _mm_setr_epi32(1, 2, 3, 4);
4744 let mut arr = [-1, -1, -1, -1];
4745 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4746 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4747 let e = [1, -1, -1, 4];
4748 assert_eq!(arr, e);
4749 }
4750
4751 #[simd_test(enable = "avx2")]
4752 unsafe fn test_mm256_maskstore_epi32() {
4753 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4754 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4755 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4756 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4757 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4758 assert_eq!(arr, e);
4759 }
4760
4761 #[simd_test(enable = "avx2")]
4762 unsafe fn test_mm_maskstore_epi64() {
4763 let a = _mm_setr_epi64x(1_i64, 2_i64);
4764 let mut arr = [-1_i64, -1_i64];
4765 let mask = _mm_setr_epi64x(0, -1);
4766 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4767 let e = [-1, 2];
4768 assert_eq!(arr, e);
4769 }
4770
4771 #[simd_test(enable = "avx2")]
4772 unsafe fn test_mm256_maskstore_epi64() {
4773 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4774 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4775 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4776 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4777 let e = [-1, 2, 3, -1];
4778 assert_eq!(arr, e);
4779 }
4780
4781 #[simd_test(enable = "avx2")]
4782 unsafe fn test_mm256_max_epi16() {
4783 let a = _mm256_set1_epi16(2);
4784 let b = _mm256_set1_epi16(4);
4785 let r = _mm256_max_epi16(a, b);
4786 assert_eq_m256i(r, b);
4787 }
4788
4789 #[simd_test(enable = "avx2")]
4790 unsafe fn test_mm256_max_epi32() {
4791 let a = _mm256_set1_epi32(2);
4792 let b = _mm256_set1_epi32(4);
4793 let r = _mm256_max_epi32(a, b);
4794 assert_eq_m256i(r, b);
4795 }
4796
4797 #[simd_test(enable = "avx2")]
4798 unsafe fn test_mm256_max_epi8() {
4799 let a = _mm256_set1_epi8(2);
4800 let b = _mm256_set1_epi8(4);
4801 let r = _mm256_max_epi8(a, b);
4802 assert_eq_m256i(r, b);
4803 }
4804
4805 #[simd_test(enable = "avx2")]
4806 unsafe fn test_mm256_max_epu16() {
4807 let a = _mm256_set1_epi16(2);
4808 let b = _mm256_set1_epi16(4);
4809 let r = _mm256_max_epu16(a, b);
4810 assert_eq_m256i(r, b);
4811 }
4812
4813 #[simd_test(enable = "avx2")]
4814 unsafe fn test_mm256_max_epu32() {
4815 let a = _mm256_set1_epi32(2);
4816 let b = _mm256_set1_epi32(4);
4817 let r = _mm256_max_epu32(a, b);
4818 assert_eq_m256i(r, b);
4819 }
4820
4821 #[simd_test(enable = "avx2")]
4822 unsafe fn test_mm256_max_epu8() {
4823 let a = _mm256_set1_epi8(2);
4824 let b = _mm256_set1_epi8(4);
4825 let r = _mm256_max_epu8(a, b);
4826 assert_eq_m256i(r, b);
4827 }
4828
4829 #[simd_test(enable = "avx2")]
4830 unsafe fn test_mm256_min_epi16() {
4831 let a = _mm256_set1_epi16(2);
4832 let b = _mm256_set1_epi16(4);
4833 let r = _mm256_min_epi16(a, b);
4834 assert_eq_m256i(r, a);
4835 }
4836
4837 #[simd_test(enable = "avx2")]
4838 unsafe fn test_mm256_min_epi32() {
4839 let a = _mm256_set1_epi32(2);
4840 let b = _mm256_set1_epi32(4);
4841 let r = _mm256_min_epi32(a, b);
4842 assert_eq_m256i(r, a);
4843 }
4844
4845 #[simd_test(enable = "avx2")]
4846 unsafe fn test_mm256_min_epi8() {
4847 let a = _mm256_set1_epi8(2);
4848 let b = _mm256_set1_epi8(4);
4849 let r = _mm256_min_epi8(a, b);
4850 assert_eq_m256i(r, a);
4851 }
4852
4853 #[simd_test(enable = "avx2")]
4854 unsafe fn test_mm256_min_epu16() {
4855 let a = _mm256_set1_epi16(2);
4856 let b = _mm256_set1_epi16(4);
4857 let r = _mm256_min_epu16(a, b);
4858 assert_eq_m256i(r, a);
4859 }
4860
4861 #[simd_test(enable = "avx2")]
4862 unsafe fn test_mm256_min_epu32() {
4863 let a = _mm256_set1_epi32(2);
4864 let b = _mm256_set1_epi32(4);
4865 let r = _mm256_min_epu32(a, b);
4866 assert_eq_m256i(r, a);
4867 }
4868
4869 #[simd_test(enable = "avx2")]
4870 unsafe fn test_mm256_min_epu8() {
4871 let a = _mm256_set1_epi8(2);
4872 let b = _mm256_set1_epi8(4);
4873 let r = _mm256_min_epu8(a, b);
4874 assert_eq_m256i(r, a);
4875 }
4876
4877 #[simd_test(enable = "avx2")]
4878 unsafe fn test_mm256_movemask_epi8() {
4879 let a = _mm256_set1_epi8(-1);
4880 let r = _mm256_movemask_epi8(a);
4881 let e = -1;
4882 assert_eq!(r, e);
4883 }
4884
4885 #[simd_test(enable = "avx2")]
4886 unsafe fn test_mm256_mpsadbw_epu8() {
4887 let a = _mm256_set1_epi8(2);
4888 let b = _mm256_set1_epi8(4);
4889 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4890 let e = _mm256_set1_epi16(8);
4891 assert_eq_m256i(r, e);
4892 }
4893
4894 #[simd_test(enable = "avx2")]
4895 unsafe fn test_mm256_mul_epi32() {
4896 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4897 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4898 let r = _mm256_mul_epi32(a, b);
4899 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4900 assert_eq_m256i(r, e);
4901 }
4902
4903 #[simd_test(enable = "avx2")]
4904 unsafe fn test_mm256_mul_epu32() {
4905 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4906 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4907 let r = _mm256_mul_epu32(a, b);
4908 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4909 assert_eq_m256i(r, e);
4910 }
4911
4912 #[simd_test(enable = "avx2")]
4913 unsafe fn test_mm256_mulhi_epi16() {
4914 let a = _mm256_set1_epi16(6535);
4915 let b = _mm256_set1_epi16(6535);
4916 let r = _mm256_mulhi_epi16(a, b);
4917 let e = _mm256_set1_epi16(651);
4918 assert_eq_m256i(r, e);
4919 }
4920
4921 #[simd_test(enable = "avx2")]
4922 unsafe fn test_mm256_mulhi_epu16() {
4923 let a = _mm256_set1_epi16(6535);
4924 let b = _mm256_set1_epi16(6535);
4925 let r = _mm256_mulhi_epu16(a, b);
4926 let e = _mm256_set1_epi16(651);
4927 assert_eq_m256i(r, e);
4928 }
4929
4930 #[simd_test(enable = "avx2")]
4931 unsafe fn test_mm256_mullo_epi16() {
4932 let a = _mm256_set1_epi16(2);
4933 let b = _mm256_set1_epi16(4);
4934 let r = _mm256_mullo_epi16(a, b);
4935 let e = _mm256_set1_epi16(8);
4936 assert_eq_m256i(r, e);
4937 }
4938
4939 #[simd_test(enable = "avx2")]
4940 unsafe fn test_mm256_mullo_epi32() {
4941 let a = _mm256_set1_epi32(2);
4942 let b = _mm256_set1_epi32(4);
4943 let r = _mm256_mullo_epi32(a, b);
4944 let e = _mm256_set1_epi32(8);
4945 assert_eq_m256i(r, e);
4946 }
4947
4948 #[simd_test(enable = "avx2")]
4949 unsafe fn test_mm256_mulhrs_epi16() {
4950 let a = _mm256_set1_epi16(2);
4951 let b = _mm256_set1_epi16(4);
4952 let r = _mm256_mullo_epi16(a, b);
4953 let e = _mm256_set1_epi16(8);
4954 assert_eq_m256i(r, e);
4955 }
4956
4957 #[simd_test(enable = "avx2")]
4958 unsafe fn test_mm256_or_si256() {
4959 let a = _mm256_set1_epi8(-1);
4960 let b = _mm256_set1_epi8(0);
4961 let r = _mm256_or_si256(a, b);
4962 assert_eq_m256i(r, a);
4963 }
4964
4965 #[simd_test(enable = "avx2")]
4966 unsafe fn test_mm256_packs_epi16() {
4967 let a = _mm256_set1_epi16(2);
4968 let b = _mm256_set1_epi16(4);
4969 let r = _mm256_packs_epi16(a, b);
4970 #[rustfmt::skip]
4971 let e = _mm256_setr_epi8(
4972 2, 2, 2, 2, 2, 2, 2, 2,
4973 4, 4, 4, 4, 4, 4, 4, 4,
4974 2, 2, 2, 2, 2, 2, 2, 2,
4975 4, 4, 4, 4, 4, 4, 4, 4,
4976 );
4977
4978 assert_eq_m256i(r, e);
4979 }
4980
4981 #[simd_test(enable = "avx2")]
4982 unsafe fn test_mm256_packs_epi32() {
4983 let a = _mm256_set1_epi32(2);
4984 let b = _mm256_set1_epi32(4);
4985 let r = _mm256_packs_epi32(a, b);
4986 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4987
4988 assert_eq_m256i(r, e);
4989 }
4990
4991 #[simd_test(enable = "avx2")]
4992 unsafe fn test_mm256_packus_epi16() {
4993 let a = _mm256_set1_epi16(2);
4994 let b = _mm256_set1_epi16(4);
4995 let r = _mm256_packus_epi16(a, b);
4996 #[rustfmt::skip]
4997 let e = _mm256_setr_epi8(
4998 2, 2, 2, 2, 2, 2, 2, 2,
4999 4, 4, 4, 4, 4, 4, 4, 4,
5000 2, 2, 2, 2, 2, 2, 2, 2,
5001 4, 4, 4, 4, 4, 4, 4, 4,
5002 );
5003
5004 assert_eq_m256i(r, e);
5005 }
5006
5007 #[simd_test(enable = "avx2")]
5008 unsafe fn test_mm256_packus_epi32() {
5009 let a = _mm256_set1_epi32(2);
5010 let b = _mm256_set1_epi32(4);
5011 let r = _mm256_packus_epi32(a, b);
5012 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5013
5014 assert_eq_m256i(r, e);
5015 }
5016
5017 #[simd_test(enable = "avx2")]
5018 unsafe fn test_mm256_sad_epu8() {
5019 let a = _mm256_set1_epi8(2);
5020 let b = _mm256_set1_epi8(4);
5021 let r = _mm256_sad_epu8(a, b);
5022 let e = _mm256_set1_epi64x(16);
5023 assert_eq_m256i(r, e);
5024 }
5025
5026 #[simd_test(enable = "avx2")]
5027 unsafe fn test_mm256_shufflehi_epi16() {
5028 #[rustfmt::skip]
5029 let a = _mm256_setr_epi16(
5030 0, 1, 2, 3, 11, 22, 33, 44,
5031 4, 5, 6, 7, 55, 66, 77, 88,
5032 );
5033 #[rustfmt::skip]
5034 let e = _mm256_setr_epi16(
5035 0, 1, 2, 3, 44, 22, 22, 11,
5036 4, 5, 6, 7, 88, 66, 66, 55,
5037 );
5038 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5039 assert_eq_m256i(r, e);
5040 }
5041
5042 #[simd_test(enable = "avx2")]
5043 unsafe fn test_mm256_shufflelo_epi16() {
5044 #[rustfmt::skip]
5045 let a = _mm256_setr_epi16(
5046 11, 22, 33, 44, 0, 1, 2, 3,
5047 55, 66, 77, 88, 4, 5, 6, 7,
5048 );
5049 #[rustfmt::skip]
5050 let e = _mm256_setr_epi16(
5051 44, 22, 22, 11, 0, 1, 2, 3,
5052 88, 66, 66, 55, 4, 5, 6, 7,
5053 );
5054 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5055 assert_eq_m256i(r, e);
5056 }
5057
5058 #[simd_test(enable = "avx2")]
5059 unsafe fn test_mm256_sign_epi16() {
5060 let a = _mm256_set1_epi16(2);
5061 let b = _mm256_set1_epi16(-1);
5062 let r = _mm256_sign_epi16(a, b);
5063 let e = _mm256_set1_epi16(-2);
5064 assert_eq_m256i(r, e);
5065 }
5066
5067 #[simd_test(enable = "avx2")]
5068 unsafe fn test_mm256_sign_epi32() {
5069 let a = _mm256_set1_epi32(2);
5070 let b = _mm256_set1_epi32(-1);
5071 let r = _mm256_sign_epi32(a, b);
5072 let e = _mm256_set1_epi32(-2);
5073 assert_eq_m256i(r, e);
5074 }
5075
5076 #[simd_test(enable = "avx2")]
5077 unsafe fn test_mm256_sign_epi8() {
5078 let a = _mm256_set1_epi8(2);
5079 let b = _mm256_set1_epi8(-1);
5080 let r = _mm256_sign_epi8(a, b);
5081 let e = _mm256_set1_epi8(-2);
5082 assert_eq_m256i(r, e);
5083 }
5084
5085 #[simd_test(enable = "avx2")]
5086 unsafe fn test_mm256_sll_epi16() {
5087 let a = _mm256_set1_epi16(0xFF);
5088 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5089 let r = _mm256_sll_epi16(a, b);
5090 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5091 }
5092
5093 #[simd_test(enable = "avx2")]
5094 unsafe fn test_mm256_sll_epi32() {
5095 let a = _mm256_set1_epi32(0xFFFF);
5096 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5097 let r = _mm256_sll_epi32(a, b);
5098 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5099 }
5100
5101 #[simd_test(enable = "avx2")]
5102 unsafe fn test_mm256_sll_epi64() {
5103 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5104 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5105 let r = _mm256_sll_epi64(a, b);
5106 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5107 }
5108
5109 #[simd_test(enable = "avx2")]
5110 unsafe fn test_mm256_slli_epi16() {
5111 assert_eq_m256i(
5112 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5113 _mm256_set1_epi16(0xFF0),
5114 );
5115 }
5116
5117 #[simd_test(enable = "avx2")]
5118 unsafe fn test_mm256_slli_epi32() {
5119 assert_eq_m256i(
5120 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5121 _mm256_set1_epi32(0xFFFF0),
5122 );
5123 }
5124
5125 #[simd_test(enable = "avx2")]
5126 unsafe fn test_mm256_slli_epi64() {
5127 assert_eq_m256i(
5128 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5129 _mm256_set1_epi64x(0xFFFFFFFF0),
5130 );
5131 }
5132
5133 #[simd_test(enable = "avx2")]
5134 unsafe fn test_mm256_slli_si256() {
5135 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5136 let r = _mm256_slli_si256::<3>(a);
5137 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5138 }
5139
5140 #[simd_test(enable = "avx2")]
5141 unsafe fn test_mm_sllv_epi32() {
5142 let a = _mm_set1_epi32(2);
5143 let b = _mm_set1_epi32(1);
5144 let r = _mm_sllv_epi32(a, b);
5145 let e = _mm_set1_epi32(4);
5146 assert_eq_m128i(r, e);
5147 }
5148
5149 #[simd_test(enable = "avx2")]
5150 unsafe fn test_mm256_sllv_epi32() {
5151 let a = _mm256_set1_epi32(2);
5152 let b = _mm256_set1_epi32(1);
5153 let r = _mm256_sllv_epi32(a, b);
5154 let e = _mm256_set1_epi32(4);
5155 assert_eq_m256i(r, e);
5156 }
5157
5158 #[simd_test(enable = "avx2")]
5159 unsafe fn test_mm_sllv_epi64() {
5160 let a = _mm_set1_epi64x(2);
5161 let b = _mm_set1_epi64x(1);
5162 let r = _mm_sllv_epi64(a, b);
5163 let e = _mm_set1_epi64x(4);
5164 assert_eq_m128i(r, e);
5165 }
5166
5167 #[simd_test(enable = "avx2")]
5168 unsafe fn test_mm256_sllv_epi64() {
5169 let a = _mm256_set1_epi64x(2);
5170 let b = _mm256_set1_epi64x(1);
5171 let r = _mm256_sllv_epi64(a, b);
5172 let e = _mm256_set1_epi64x(4);
5173 assert_eq_m256i(r, e);
5174 }
5175
5176 #[simd_test(enable = "avx2")]
5177 unsafe fn test_mm256_sra_epi16() {
5178 let a = _mm256_set1_epi16(-1);
5179 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5180 let r = _mm256_sra_epi16(a, b);
5181 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5182 }
5183
5184 #[simd_test(enable = "avx2")]
5185 unsafe fn test_mm256_sra_epi32() {
5186 let a = _mm256_set1_epi32(-1);
5187 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5188 let r = _mm256_sra_epi32(a, b);
5189 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5190 }
5191
5192 #[simd_test(enable = "avx2")]
5193 unsafe fn test_mm256_srai_epi16() {
5194 assert_eq_m256i(
5195 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5196 _mm256_set1_epi16(-1),
5197 );
5198 }
5199
5200 #[simd_test(enable = "avx2")]
5201 unsafe fn test_mm256_srai_epi32() {
5202 assert_eq_m256i(
5203 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5204 _mm256_set1_epi32(-1),
5205 );
5206 }
5207
5208 #[simd_test(enable = "avx2")]
5209 unsafe fn test_mm_srav_epi32() {
5210 let a = _mm_set1_epi32(4);
5211 let count = _mm_set1_epi32(1);
5212 let r = _mm_srav_epi32(a, count);
5213 let e = _mm_set1_epi32(2);
5214 assert_eq_m128i(r, e);
5215 }
5216
5217 #[simd_test(enable = "avx2")]
5218 unsafe fn test_mm256_srav_epi32() {
5219 let a = _mm256_set1_epi32(4);
5220 let count = _mm256_set1_epi32(1);
5221 let r = _mm256_srav_epi32(a, count);
5222 let e = _mm256_set1_epi32(2);
5223 assert_eq_m256i(r, e);
5224 }
5225
5226 #[simd_test(enable = "avx2")]
5227 unsafe fn test_mm256_srli_si256() {
5228 #[rustfmt::skip]
5229 let a = _mm256_setr_epi8(
5230 1, 2, 3, 4, 5, 6, 7, 8,
5231 9, 10, 11, 12, 13, 14, 15, 16,
5232 17, 18, 19, 20, 21, 22, 23, 24,
5233 25, 26, 27, 28, 29, 30, 31, 32,
5234 );
5235 let r = _mm256_srli_si256::<3>(a);
5236 #[rustfmt::skip]
5237 let e = _mm256_setr_epi8(
5238 4, 5, 6, 7, 8, 9, 10, 11,
5239 12, 13, 14, 15, 16, 0, 0, 0,
5240 20, 21, 22, 23, 24, 25, 26, 27,
5241 28, 29, 30, 31, 32, 0, 0, 0,
5242 );
5243 assert_eq_m256i(r, e);
5244 }
5245
5246 #[simd_test(enable = "avx2")]
5247 unsafe fn test_mm256_srl_epi16() {
5248 let a = _mm256_set1_epi16(0xFF);
5249 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5250 let r = _mm256_srl_epi16(a, b);
5251 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5252 }
5253
5254 #[simd_test(enable = "avx2")]
5255 unsafe fn test_mm256_srl_epi32() {
5256 let a = _mm256_set1_epi32(0xFFFF);
5257 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5258 let r = _mm256_srl_epi32(a, b);
5259 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5260 }
5261
5262 #[simd_test(enable = "avx2")]
5263 unsafe fn test_mm256_srl_epi64() {
5264 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5265 let b = _mm_setr_epi64x(4, 0);
5266 let r = _mm256_srl_epi64(a, b);
5267 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5268 }
5269
5270 #[simd_test(enable = "avx2")]
5271 unsafe fn test_mm256_srli_epi16() {
5272 assert_eq_m256i(
5273 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5274 _mm256_set1_epi16(0xF),
5275 );
5276 }
5277
5278 #[simd_test(enable = "avx2")]
5279 unsafe fn test_mm256_srli_epi32() {
5280 assert_eq_m256i(
5281 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5282 _mm256_set1_epi32(0xFFF),
5283 );
5284 }
5285
5286 #[simd_test(enable = "avx2")]
5287 unsafe fn test_mm256_srli_epi64() {
5288 assert_eq_m256i(
5289 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5290 _mm256_set1_epi64x(0xFFFFFFF),
5291 );
5292 }
5293
5294 #[simd_test(enable = "avx2")]
5295 unsafe fn test_mm_srlv_epi32() {
5296 let a = _mm_set1_epi32(2);
5297 let count = _mm_set1_epi32(1);
5298 let r = _mm_srlv_epi32(a, count);
5299 let e = _mm_set1_epi32(1);
5300 assert_eq_m128i(r, e);
5301 }
5302
5303 #[simd_test(enable = "avx2")]
5304 unsafe fn test_mm256_srlv_epi32() {
5305 let a = _mm256_set1_epi32(2);
5306 let count = _mm256_set1_epi32(1);
5307 let r = _mm256_srlv_epi32(a, count);
5308 let e = _mm256_set1_epi32(1);
5309 assert_eq_m256i(r, e);
5310 }
5311
5312 #[simd_test(enable = "avx2")]
5313 unsafe fn test_mm_srlv_epi64() {
5314 let a = _mm_set1_epi64x(2);
5315 let count = _mm_set1_epi64x(1);
5316 let r = _mm_srlv_epi64(a, count);
5317 let e = _mm_set1_epi64x(1);
5318 assert_eq_m128i(r, e);
5319 }
5320
5321 #[simd_test(enable = "avx2")]
5322 unsafe fn test_mm256_srlv_epi64() {
5323 let a = _mm256_set1_epi64x(2);
5324 let count = _mm256_set1_epi64x(1);
5325 let r = _mm256_srlv_epi64(a, count);
5326 let e = _mm256_set1_epi64x(1);
5327 assert_eq_m256i(r, e);
5328 }
5329
5330 #[simd_test(enable = "avx2")]
5331 unsafe fn test_mm256_stream_load_si256() {
5332 let a = _mm256_set_epi64x(5, 6, 7, 8);
5333 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5334 assert_eq_m256i(a, r);
5335 }
5336
5337 #[simd_test(enable = "avx2")]
5338 unsafe fn test_mm256_sub_epi16() {
5339 let a = _mm256_set1_epi16(4);
5340 let b = _mm256_set1_epi16(2);
5341 let r = _mm256_sub_epi16(a, b);
5342 assert_eq_m256i(r, b);
5343 }
5344
5345 #[simd_test(enable = "avx2")]
5346 unsafe fn test_mm256_sub_epi32() {
5347 let a = _mm256_set1_epi32(4);
5348 let b = _mm256_set1_epi32(2);
5349 let r = _mm256_sub_epi32(a, b);
5350 assert_eq_m256i(r, b);
5351 }
5352
5353 #[simd_test(enable = "avx2")]
5354 unsafe fn test_mm256_sub_epi64() {
5355 let a = _mm256_set1_epi64x(4);
5356 let b = _mm256_set1_epi64x(2);
5357 let r = _mm256_sub_epi64(a, b);
5358 assert_eq_m256i(r, b);
5359 }
5360
5361 #[simd_test(enable = "avx2")]
5362 unsafe fn test_mm256_sub_epi8() {
5363 let a = _mm256_set1_epi8(4);
5364 let b = _mm256_set1_epi8(2);
5365 let r = _mm256_sub_epi8(a, b);
5366 assert_eq_m256i(r, b);
5367 }
5368
5369 #[simd_test(enable = "avx2")]
5370 unsafe fn test_mm256_subs_epi16() {
5371 let a = _mm256_set1_epi16(4);
5372 let b = _mm256_set1_epi16(2);
5373 let r = _mm256_subs_epi16(a, b);
5374 assert_eq_m256i(r, b);
5375 }
5376
5377 #[simd_test(enable = "avx2")]
5378 unsafe fn test_mm256_subs_epi8() {
5379 let a = _mm256_set1_epi8(4);
5380 let b = _mm256_set1_epi8(2);
5381 let r = _mm256_subs_epi8(a, b);
5382 assert_eq_m256i(r, b);
5383 }
5384
5385 #[simd_test(enable = "avx2")]
5386 unsafe fn test_mm256_subs_epu16() {
5387 let a = _mm256_set1_epi16(4);
5388 let b = _mm256_set1_epi16(2);
5389 let r = _mm256_subs_epu16(a, b);
5390 assert_eq_m256i(r, b);
5391 }
5392
5393 #[simd_test(enable = "avx2")]
5394 unsafe fn test_mm256_subs_epu8() {
5395 let a = _mm256_set1_epi8(4);
5396 let b = _mm256_set1_epi8(2);
5397 let r = _mm256_subs_epu8(a, b);
5398 assert_eq_m256i(r, b);
5399 }
5400
5401 #[simd_test(enable = "avx2")]
5402 unsafe fn test_mm256_xor_si256() {
5403 let a = _mm256_set1_epi8(5);
5404 let b = _mm256_set1_epi8(3);
5405 let r = _mm256_xor_si256(a, b);
5406 assert_eq_m256i(r, _mm256_set1_epi8(6));
5407 }
5408
5409 #[simd_test(enable = "avx2")]
5410 unsafe fn test_mm256_alignr_epi8() {
5411 #[rustfmt::skip]
5412 let a = _mm256_setr_epi8(
5413 1, 2, 3, 4, 5, 6, 7, 8,
5414 9, 10, 11, 12, 13, 14, 15, 16,
5415 17, 18, 19, 20, 21, 22, 23, 24,
5416 25, 26, 27, 28, 29, 30, 31, 32,
5417 );
5418 #[rustfmt::skip]
5419 let b = _mm256_setr_epi8(
5420 -1, -2, -3, -4, -5, -6, -7, -8,
5421 -9, -10, -11, -12, -13, -14, -15, -16,
5422 -17, -18, -19, -20, -21, -22, -23, -24,
5423 -25, -26, -27, -28, -29, -30, -31, -32,
5424 );
5425 let r = _mm256_alignr_epi8::<33>(a, b);
5426 assert_eq_m256i(r, _mm256_set1_epi8(0));
5427
5428 let r = _mm256_alignr_epi8::<17>(a, b);
5429 #[rustfmt::skip]
5430 let expected = _mm256_setr_epi8(
5431 2, 3, 4, 5, 6, 7, 8, 9,
5432 10, 11, 12, 13, 14, 15, 16, 0,
5433 18, 19, 20, 21, 22, 23, 24, 25,
5434 26, 27, 28, 29, 30, 31, 32, 0,
5435 );
5436 assert_eq_m256i(r, expected);
5437
5438 let r = _mm256_alignr_epi8::<4>(a, b);
5439 #[rustfmt::skip]
5440 let expected = _mm256_setr_epi8(
5441 -5, -6, -7, -8, -9, -10, -11, -12,
5442 -13, -14, -15, -16, 1, 2, 3, 4,
5443 -21, -22, -23, -24, -25, -26, -27, -28,
5444 -29, -30, -31, -32, 17, 18, 19, 20,
5445 );
5446 assert_eq_m256i(r, expected);
5447
5448 let r = _mm256_alignr_epi8::<15>(a, b);
5449 #[rustfmt::skip]
5450 let expected = _mm256_setr_epi8(
5451 -16, 1, 2, 3, 4, 5, 6, 7,
5452 8, 9, 10, 11, 12, 13, 14, 15,
5453 -32, 17, 18, 19, 20, 21, 22, 23,
5454 24, 25, 26, 27, 28, 29, 30, 31,
5455 );
5456 assert_eq_m256i(r, expected);
5457
5458 let r = _mm256_alignr_epi8::<0>(a, b);
5459 assert_eq_m256i(r, b);
5460
5461 let r = _mm256_alignr_epi8::<16>(a, b);
5462 assert_eq_m256i(r, a);
5463 }
5464
5465 #[simd_test(enable = "avx2")]
5466 unsafe fn test_mm256_shuffle_epi8() {
5467 #[rustfmt::skip]
5468 let a = _mm256_setr_epi8(
5469 1, 2, 3, 4, 5, 6, 7, 8,
5470 9, 10, 11, 12, 13, 14, 15, 16,
5471 17, 18, 19, 20, 21, 22, 23, 24,
5472 25, 26, 27, 28, 29, 30, 31, 32,
5473 );
5474 #[rustfmt::skip]
5475 let b = _mm256_setr_epi8(
5476 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5477 12, 5, 5, 10, 4, 1, 8, 0,
5478 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5479 12, 5, 5, 10, 4, 1, 8, 0,
5480 );
5481 #[rustfmt::skip]
5482 let expected = _mm256_setr_epi8(
5483 5, 0, 5, 4, 9, 13, 7, 4,
5484 13, 6, 6, 11, 5, 2, 9, 1,
5485 21, 0, 21, 20, 25, 29, 23, 20,
5486 29, 22, 22, 27, 21, 18, 25, 17,
5487 );
5488 let r = _mm256_shuffle_epi8(a, b);
5489 assert_eq_m256i(r, expected);
5490 }
5491
5492 #[simd_test(enable = "avx2")]
5493 unsafe fn test_mm256_permutevar8x32_epi32() {
5494 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5495 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5496 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5497 let r = _mm256_permutevar8x32_epi32(a, b);
5498 assert_eq_m256i(r, expected);
5499 }
5500
5501 #[simd_test(enable = "avx2")]
5502 unsafe fn test_mm256_permute4x64_epi64() {
5503 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5504 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5505 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5506 assert_eq_m256i(r, expected);
5507 }
5508
5509 #[simd_test(enable = "avx2")]
5510 unsafe fn test_mm256_permute2x128_si256() {
5511 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5512 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5513 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5514 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5515 assert_eq_m256i(r, e);
5516 }
5517
5518 #[simd_test(enable = "avx2")]
5519 unsafe fn test_mm256_permute4x64_pd() {
5520 let a = _mm256_setr_pd(1., 2., 3., 4.);
5521 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5522 let e = _mm256_setr_pd(4., 1., 2., 1.);
5523 assert_eq_m256d(r, e);
5524 }
5525
5526 #[simd_test(enable = "avx2")]
5527 unsafe fn test_mm256_permutevar8x32_ps() {
5528 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5529 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5530 let r = _mm256_permutevar8x32_ps(a, b);
5531 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5532 assert_eq_m256(r, e);
5533 }
5534
5535 #[simd_test(enable = "avx2")]
5536 unsafe fn test_mm_i32gather_epi32() {
5537 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5538 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5540 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5541 }
5542
5543 #[simd_test(enable = "avx2")]
5544 unsafe fn test_mm_mask_i32gather_epi32() {
5545 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5546 let r = _mm_mask_i32gather_epi32::<4>(
5548 _mm_set1_epi32(256),
5549 arr.as_ptr(),
5550 _mm_setr_epi32(0, 16, 64, 96),
5551 _mm_setr_epi32(-1, -1, -1, 0),
5552 );
5553 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5554 }
5555
5556 #[simd_test(enable = "avx2")]
5557 unsafe fn test_mm256_i32gather_epi32() {
5558 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5559 let r =
5561 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5562 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5563 }
5564
5565 #[simd_test(enable = "avx2")]
5566 unsafe fn test_mm256_mask_i32gather_epi32() {
5567 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5568 let r = _mm256_mask_i32gather_epi32::<4>(
5570 _mm256_set1_epi32(256),
5571 arr.as_ptr(),
5572 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5573 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5574 );
5575 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5576 }
5577
5578 #[simd_test(enable = "avx2")]
5579 unsafe fn test_mm_i32gather_ps() {
5580 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5581 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5583 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5584 }
5585
5586 #[simd_test(enable = "avx2")]
5587 unsafe fn test_mm_mask_i32gather_ps() {
5588 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5589 let r = _mm_mask_i32gather_ps::<4>(
5591 _mm_set1_ps(256.0),
5592 arr.as_ptr(),
5593 _mm_setr_epi32(0, 16, 64, 96),
5594 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5595 );
5596 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5597 }
5598
5599 #[simd_test(enable = "avx2")]
5600 unsafe fn test_mm256_i32gather_ps() {
5601 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5602 let r =
5604 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5605 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5606 }
5607
5608 #[simd_test(enable = "avx2")]
5609 unsafe fn test_mm256_mask_i32gather_ps() {
5610 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5611 let r = _mm256_mask_i32gather_ps::<4>(
5613 _mm256_set1_ps(256.0),
5614 arr.as_ptr(),
5615 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5616 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5617 );
5618 assert_eq_m256(
5619 r,
5620 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5621 );
5622 }
5623
5624 #[simd_test(enable = "avx2")]
5625 unsafe fn test_mm_i32gather_epi64() {
5626 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5627 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5629 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5630 }
5631
5632 #[simd_test(enable = "avx2")]
5633 unsafe fn test_mm_mask_i32gather_epi64() {
5634 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5635 let r = _mm_mask_i32gather_epi64::<8>(
5637 _mm_set1_epi64x(256),
5638 arr.as_ptr(),
5639 _mm_setr_epi32(16, 16, 16, 16),
5640 _mm_setr_epi64x(-1, 0),
5641 );
5642 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5643 }
5644
5645 #[simd_test(enable = "avx2")]
5646 unsafe fn test_mm256_i32gather_epi64() {
5647 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5648 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5650 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5651 }
5652
5653 #[simd_test(enable = "avx2")]
5654 unsafe fn test_mm256_mask_i32gather_epi64() {
5655 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5656 let r = _mm256_mask_i32gather_epi64::<8>(
5658 _mm256_set1_epi64x(256),
5659 arr.as_ptr(),
5660 _mm_setr_epi32(0, 16, 64, 96),
5661 _mm256_setr_epi64x(-1, -1, -1, 0),
5662 );
5663 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5664 }
5665
5666 #[simd_test(enable = "avx2")]
5667 unsafe fn test_mm_i32gather_pd() {
5668 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5669 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5671 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5672 }
5673
5674 #[simd_test(enable = "avx2")]
5675 unsafe fn test_mm_mask_i32gather_pd() {
5676 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5677 let r = _mm_mask_i32gather_pd::<8>(
5679 _mm_set1_pd(256.0),
5680 arr.as_ptr(),
5681 _mm_setr_epi32(16, 16, 16, 16),
5682 _mm_setr_pd(-1.0, 0.0),
5683 );
5684 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5685 }
5686
5687 #[simd_test(enable = "avx2")]
5688 unsafe fn test_mm256_i32gather_pd() {
5689 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5690 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5692 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5693 }
5694
5695 #[simd_test(enable = "avx2")]
5696 unsafe fn test_mm256_mask_i32gather_pd() {
5697 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5698 let r = _mm256_mask_i32gather_pd::<8>(
5700 _mm256_set1_pd(256.0),
5701 arr.as_ptr(),
5702 _mm_setr_epi32(0, 16, 64, 96),
5703 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5704 );
5705 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5706 }
5707
5708 #[simd_test(enable = "avx2")]
5709 unsafe fn test_mm_i64gather_epi32() {
5710 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5711 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5713 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5714 }
5715
5716 #[simd_test(enable = "avx2")]
5717 unsafe fn test_mm_mask_i64gather_epi32() {
5718 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5719 let r = _mm_mask_i64gather_epi32::<4>(
5721 _mm_set1_epi32(256),
5722 arr.as_ptr(),
5723 _mm_setr_epi64x(0, 16),
5724 _mm_setr_epi32(-1, 0, -1, 0),
5725 );
5726 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5727 }
5728
5729 #[simd_test(enable = "avx2")]
5730 unsafe fn test_mm256_i64gather_epi32() {
5731 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5732 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5734 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5735 }
5736
5737 #[simd_test(enable = "avx2")]
5738 unsafe fn test_mm256_mask_i64gather_epi32() {
5739 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5740 let r = _mm256_mask_i64gather_epi32::<4>(
5742 _mm_set1_epi32(256),
5743 arr.as_ptr(),
5744 _mm256_setr_epi64x(0, 16, 64, 96),
5745 _mm_setr_epi32(-1, -1, -1, 0),
5746 );
5747 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5748 }
5749
5750 #[simd_test(enable = "avx2")]
5751 unsafe fn test_mm_i64gather_ps() {
5752 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5753 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5755 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5756 }
5757
5758 #[simd_test(enable = "avx2")]
5759 unsafe fn test_mm_mask_i64gather_ps() {
5760 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5761 let r = _mm_mask_i64gather_ps::<4>(
5763 _mm_set1_ps(256.0),
5764 arr.as_ptr(),
5765 _mm_setr_epi64x(0, 16),
5766 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5767 );
5768 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5769 }
5770
5771 #[simd_test(enable = "avx2")]
5772 unsafe fn test_mm256_i64gather_ps() {
5773 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5774 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5776 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5777 }
5778
5779 #[simd_test(enable = "avx2")]
5780 unsafe fn test_mm256_mask_i64gather_ps() {
5781 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5782 let r = _mm256_mask_i64gather_ps::<4>(
5784 _mm_set1_ps(256.0),
5785 arr.as_ptr(),
5786 _mm256_setr_epi64x(0, 16, 64, 96),
5787 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5788 );
5789 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5790 }
5791
5792 #[simd_test(enable = "avx2")]
5793 unsafe fn test_mm_i64gather_epi64() {
5794 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5795 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5797 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5798 }
5799
5800 #[simd_test(enable = "avx2")]
5801 unsafe fn test_mm_mask_i64gather_epi64() {
5802 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5803 let r = _mm_mask_i64gather_epi64::<8>(
5805 _mm_set1_epi64x(256),
5806 arr.as_ptr(),
5807 _mm_setr_epi64x(16, 16),
5808 _mm_setr_epi64x(-1, 0),
5809 );
5810 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5811 }
5812
5813 #[simd_test(enable = "avx2")]
5814 unsafe fn test_mm256_i64gather_epi64() {
5815 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5816 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5818 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5819 }
5820
5821 #[simd_test(enable = "avx2")]
5822 unsafe fn test_mm256_mask_i64gather_epi64() {
5823 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5824 let r = _mm256_mask_i64gather_epi64::<8>(
5826 _mm256_set1_epi64x(256),
5827 arr.as_ptr(),
5828 _mm256_setr_epi64x(0, 16, 64, 96),
5829 _mm256_setr_epi64x(-1, -1, -1, 0),
5830 );
5831 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5832 }
5833
5834 #[simd_test(enable = "avx2")]
5835 unsafe fn test_mm_i64gather_pd() {
5836 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5837 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5839 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5840 }
5841
5842 #[simd_test(enable = "avx2")]
5843 unsafe fn test_mm_mask_i64gather_pd() {
5844 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5845 let r = _mm_mask_i64gather_pd::<8>(
5847 _mm_set1_pd(256.0),
5848 arr.as_ptr(),
5849 _mm_setr_epi64x(16, 16),
5850 _mm_setr_pd(-1.0, 0.0),
5851 );
5852 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5853 }
5854
5855 #[simd_test(enable = "avx2")]
5856 unsafe fn test_mm256_i64gather_pd() {
5857 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5858 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5860 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5861 }
5862
5863 #[simd_test(enable = "avx2")]
5864 unsafe fn test_mm256_mask_i64gather_pd() {
5865 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5866 let r = _mm256_mask_i64gather_pd::<8>(
5868 _mm256_set1_pd(256.0),
5869 arr.as_ptr(),
5870 _mm256_setr_epi64x(0, 16, 64, 96),
5871 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5872 );
5873 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5874 }
5875
5876 #[simd_test(enable = "avx")]
5877 unsafe fn test_mm256_extract_epi8() {
5878 #[rustfmt::skip]
5879 let a = _mm256_setr_epi8(
5880 -1, 1, 2, 3, 4, 5, 6, 7,
5881 8, 9, 10, 11, 12, 13, 14, 15,
5882 16, 17, 18, 19, 20, 21, 22, 23,
5883 24, 25, 26, 27, 28, 29, 30, 31
5884 );
5885 let r1 = _mm256_extract_epi8::<0>(a);
5886 let r2 = _mm256_extract_epi8::<3>(a);
5887 assert_eq!(r1, 0xFF);
5888 assert_eq!(r2, 3);
5889 }
5890
5891 #[simd_test(enable = "avx2")]
5892 unsafe fn test_mm256_extract_epi16() {
5893 #[rustfmt::skip]
5894 let a = _mm256_setr_epi16(
5895 -1, 1, 2, 3, 4, 5, 6, 7,
5896 8, 9, 10, 11, 12, 13, 14, 15,
5897 );
5898 let r1 = _mm256_extract_epi16::<0>(a);
5899 let r2 = _mm256_extract_epi16::<3>(a);
5900 assert_eq!(r1, 0xFFFF);
5901 assert_eq!(r2, 3);
5902 }
5903}