1use core::hint::unreachable_unchecked;
22
23use crate::core_arch::{simd::*, x86::*};
24use crate::intrinsics::simd::*;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[inline]
33#[target_feature(enable = "avx2")]
34#[cfg_attr(test, assert_instr(vpabsd))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
37 unsafe {
38 let a = a.as_i32x8();
39 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
40 transmute(r)
41 }
42}
43
44#[inline]
48#[target_feature(enable = "avx2")]
49#[cfg_attr(test, assert_instr(vpabsw))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52 unsafe {
53 let a = a.as_i16x16();
54 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
55 transmute(r)
56 }
57}
58
59#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
67 unsafe {
68 let a = a.as_i8x32();
69 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
70 transmute(r)
71 }
72}
73
74#[inline]
78#[target_feature(enable = "avx2")]
79#[cfg_attr(test, assert_instr(vpaddq))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
82 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
83}
84
85#[inline]
89#[target_feature(enable = "avx2")]
90#[cfg_attr(test, assert_instr(vpaddd))]
91#[stable(feature = "simd_x86", since = "1.27.0")]
92pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
93 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
94}
95
96#[inline]
100#[target_feature(enable = "avx2")]
101#[cfg_attr(test, assert_instr(vpaddw))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
104 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
105}
106
107#[inline]
111#[target_feature(enable = "avx2")]
112#[cfg_attr(test, assert_instr(vpaddb))]
113#[stable(feature = "simd_x86", since = "1.27.0")]
114pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
115 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
116}
117
118#[inline]
122#[target_feature(enable = "avx2")]
123#[cfg_attr(test, assert_instr(vpaddsb))]
124#[stable(feature = "simd_x86", since = "1.27.0")]
125pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
126 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
127}
128
129#[inline]
133#[target_feature(enable = "avx2")]
134#[cfg_attr(test, assert_instr(vpaddsw))]
135#[stable(feature = "simd_x86", since = "1.27.0")]
136pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
137 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
138}
139
140#[inline]
144#[target_feature(enable = "avx2")]
145#[cfg_attr(test, assert_instr(vpaddusb))]
146#[stable(feature = "simd_x86", since = "1.27.0")]
147pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
148 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
149}
150
151#[inline]
155#[target_feature(enable = "avx2")]
156#[cfg_attr(test, assert_instr(vpaddusw))]
157#[stable(feature = "simd_x86", since = "1.27.0")]
158pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
159 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
160}
161
162#[inline]
167#[target_feature(enable = "avx2")]
168#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
169#[rustc_legacy_const_generics(2)]
170#[stable(feature = "simd_x86", since = "1.27.0")]
171pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
172 static_assert_uimm_bits!(IMM8, 8);
173 unsafe {
174 if IMM8 >= 32 {
177 return _mm256_setzero_si256();
178 }
179 let (a, b) = if IMM8 > 16 {
182 (_mm256_setzero_si256(), a)
183 } else {
184 (a, b)
185 };
186
187 let a = a.as_i8x32();
188 let b = b.as_i8x32();
189
190 if IMM8 == 16 {
191 return transmute(a);
192 }
193
194 let r: i8x32 = match IMM8 % 16 {
195 0 => simd_shuffle!(
196 b,
197 a,
198 [
199 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
200 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
201 ],
202 ),
203 1 => simd_shuffle!(
204 b,
205 a,
206 [
207 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
208 23, 24, 25, 26, 27, 28, 29, 30, 31, 48,
209 ],
210 ),
211 2 => simd_shuffle!(
212 b,
213 a,
214 [
215 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23,
216 24, 25, 26, 27, 28, 29, 30, 31, 48, 49,
217 ],
218 ),
219 3 => simd_shuffle!(
220 b,
221 a,
222 [
223 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23,
224 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
225 ],
226 ),
227 4 => simd_shuffle!(
228 b,
229 a,
230 [
231 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24,
232 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
233 ],
234 ),
235 5 => simd_shuffle!(
236 b,
237 a,
238 [
239 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25,
240 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
241 ],
242 ),
243 6 => simd_shuffle!(
244 b,
245 a,
246 [
247 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26,
248 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
249 ],
250 ),
251 7 => simd_shuffle!(
252 b,
253 a,
254 [
255 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26,
256 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
257 ],
258 ),
259 8 => simd_shuffle!(
260 b,
261 a,
262 [
263 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27,
264 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
265 ],
266 ),
267 9 => simd_shuffle!(
268 b,
269 a,
270 [
271 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28,
272 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
273 ],
274 ),
275 10 => simd_shuffle!(
276 b,
277 a,
278 [
279 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29,
280 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
281 ],
282 ),
283 11 => simd_shuffle!(
284 b,
285 a,
286 [
287 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30,
288 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
289 ],
290 ),
291 12 => simd_shuffle!(
292 b,
293 a,
294 [
295 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31,
296 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
297 ],
298 ),
299 13 => simd_shuffle!(
300 b,
301 a,
302 [
303 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48,
304 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
305 ],
306 ),
307 14 => simd_shuffle!(
308 b,
309 a,
310 [
311 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49,
312 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
313 ],
314 ),
315 15 => simd_shuffle!(
316 b,
317 a,
318 [
319 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50,
320 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
321 ],
322 ),
323 _ => unreachable_unchecked(),
324 };
325 transmute(r)
326 }
327}
328
329#[inline]
334#[target_feature(enable = "avx2")]
335#[cfg_attr(test, assert_instr(vandps))]
336#[stable(feature = "simd_x86", since = "1.27.0")]
337pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
338 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
339}
340
341#[inline]
346#[target_feature(enable = "avx2")]
347#[cfg_attr(test, assert_instr(vandnps))]
348#[stable(feature = "simd_x86", since = "1.27.0")]
349pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
350 unsafe {
351 let all_ones = _mm256_set1_epi8(-1);
352 transmute(simd_and(
353 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
354 b.as_i64x4(),
355 ))
356 }
357}
358
359#[inline]
363#[target_feature(enable = "avx2")]
364#[cfg_attr(test, assert_instr(vpavgw))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
367 unsafe {
368 let a = simd_cast::<_, u32x16>(a.as_u16x16());
369 let b = simd_cast::<_, u32x16>(b.as_u16x16());
370 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
371 transmute(simd_cast::<_, u16x16>(r))
372 }
373}
374
375#[inline]
379#[target_feature(enable = "avx2")]
380#[cfg_attr(test, assert_instr(vpavgb))]
381#[stable(feature = "simd_x86", since = "1.27.0")]
382pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
383 unsafe {
384 let a = simd_cast::<_, u16x32>(a.as_u8x32());
385 let b = simd_cast::<_, u16x32>(b.as_u8x32());
386 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
387 transmute(simd_cast::<_, u8x32>(r))
388 }
389}
390
391#[inline]
395#[target_feature(enable = "avx2")]
396#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
397#[rustc_legacy_const_generics(2)]
398#[stable(feature = "simd_x86", since = "1.27.0")]
399pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
400 static_assert_uimm_bits!(IMM4, 4);
401 unsafe {
402 let a = a.as_i32x4();
403 let b = b.as_i32x4();
404 let r: i32x4 = simd_shuffle!(
405 a,
406 b,
407 [
408 [0, 4, 0, 4][IMM4 as usize & 0b11],
409 [1, 1, 5, 5][IMM4 as usize & 0b11],
410 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
411 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
412 ],
413 );
414 transmute(r)
415 }
416}
417
418#[inline]
422#[target_feature(enable = "avx2")]
423#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
424#[rustc_legacy_const_generics(2)]
425#[stable(feature = "simd_x86", since = "1.27.0")]
426pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
427 static_assert_uimm_bits!(IMM8, 8);
428 unsafe {
429 let a = a.as_i32x8();
430 let b = b.as_i32x8();
431 let r: i32x8 = simd_shuffle!(
432 a,
433 b,
434 [
435 [0, 8, 0, 8][IMM8 as usize & 0b11],
436 [1, 1, 9, 9][IMM8 as usize & 0b11],
437 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
438 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
439 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
440 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
441 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
442 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
443 ],
444 );
445 transmute(r)
446 }
447}
448
449#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
455#[rustc_legacy_const_generics(2)]
456#[stable(feature = "simd_x86", since = "1.27.0")]
457pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
458 static_assert_uimm_bits!(IMM8, 8);
459 unsafe {
460 let a = a.as_i16x16();
461 let b = b.as_i16x16();
462
463 let r: i16x16 = simd_shuffle!(
464 a,
465 b,
466 [
467 [0, 16, 0, 16][IMM8 as usize & 0b11],
468 [1, 1, 17, 17][IMM8 as usize & 0b11],
469 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
470 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
471 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
472 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
473 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
474 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
475 [8, 24, 8, 24][IMM8 as usize & 0b11],
476 [9, 9, 25, 25][IMM8 as usize & 0b11],
477 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
478 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
479 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
480 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
481 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
482 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
483 ],
484 );
485 transmute(r)
486 }
487}
488
489#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vpblendvb))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
497 unsafe {
498 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
499 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
500 }
501}
502
503#[inline]
508#[target_feature(enable = "avx2")]
509#[cfg_attr(test, assert_instr(vpbroadcastb))]
510#[stable(feature = "simd_x86", since = "1.27.0")]
511pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
512 unsafe {
513 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
514 transmute::<i8x16, _>(ret)
515 }
516}
517
518#[inline]
523#[target_feature(enable = "avx2")]
524#[cfg_attr(test, assert_instr(vpbroadcastb))]
525#[stable(feature = "simd_x86", since = "1.27.0")]
526pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
527 unsafe {
528 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
529 transmute::<i8x32, _>(ret)
530 }
531}
532
533#[inline]
540#[target_feature(enable = "avx2")]
541#[cfg_attr(test, assert_instr(vbroadcastss))]
542#[stable(feature = "simd_x86", since = "1.27.0")]
543pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
544 unsafe {
545 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
546 transmute::<i32x4, _>(ret)
547 }
548}
549
550#[inline]
557#[target_feature(enable = "avx2")]
558#[cfg_attr(test, assert_instr(vbroadcastss))]
559#[stable(feature = "simd_x86", since = "1.27.0")]
560pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
561 unsafe {
562 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
563 transmute::<i32x8, _>(ret)
564 }
565}
566
567#[inline]
572#[target_feature(enable = "avx2")]
573#[cfg_attr(test, assert_instr(vmovddup))]
576#[stable(feature = "simd_x86", since = "1.27.0")]
577pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
578 unsafe {
579 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
580 transmute::<i64x2, _>(ret)
581 }
582}
583
584#[inline]
589#[target_feature(enable = "avx2")]
590#[cfg_attr(test, assert_instr(vbroadcastsd))]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
593 unsafe {
594 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
595 transmute::<i64x4, _>(ret)
596 }
597}
598
599#[inline]
604#[target_feature(enable = "avx2")]
605#[cfg_attr(test, assert_instr(vmovddup))]
606#[stable(feature = "simd_x86", since = "1.27.0")]
607pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
608 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
609}
610
611#[inline]
616#[target_feature(enable = "avx2")]
617#[cfg_attr(test, assert_instr(vbroadcastsd))]
618#[stable(feature = "simd_x86", since = "1.27.0")]
619pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
620 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
621}
622
623#[inline]
628#[target_feature(enable = "avx2")]
629#[stable(feature = "simd_x86_updates", since = "1.82.0")]
630pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
631 unsafe {
632 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
633 transmute::<i64x4, _>(ret)
634 }
635}
636
637#[inline]
644#[target_feature(enable = "avx2")]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
647 unsafe {
648 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
649 transmute::<i64x4, _>(ret)
650 }
651}
652
653#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vbroadcastss))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
662 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
663}
664
665#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vbroadcastss))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
674 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
675}
676
677#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpbroadcastw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
686 unsafe {
687 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
688 transmute::<i16x8, _>(ret)
689 }
690}
691
692#[inline]
697#[target_feature(enable = "avx2")]
698#[cfg_attr(test, assert_instr(vpbroadcastw))]
699#[stable(feature = "simd_x86", since = "1.27.0")]
700pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
701 unsafe {
702 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
703 transmute::<i16x16, _>(ret)
704 }
705}
706
707#[inline]
711#[target_feature(enable = "avx2")]
712#[cfg_attr(test, assert_instr(vpcmpeqq))]
713#[stable(feature = "simd_x86", since = "1.27.0")]
714pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
715 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
716}
717
718#[inline]
722#[target_feature(enable = "avx2")]
723#[cfg_attr(test, assert_instr(vpcmpeqd))]
724#[stable(feature = "simd_x86", since = "1.27.0")]
725pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
726 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
727}
728
729#[inline]
733#[target_feature(enable = "avx2")]
734#[cfg_attr(test, assert_instr(vpcmpeqw))]
735#[stable(feature = "simd_x86", since = "1.27.0")]
736pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
737 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
738}
739
740#[inline]
744#[target_feature(enable = "avx2")]
745#[cfg_attr(test, assert_instr(vpcmpeqb))]
746#[stable(feature = "simd_x86", since = "1.27.0")]
747pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
748 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
749}
750
751#[inline]
755#[target_feature(enable = "avx2")]
756#[cfg_attr(test, assert_instr(vpcmpgtq))]
757#[stable(feature = "simd_x86", since = "1.27.0")]
758pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
759 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
760}
761
762#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpcmpgtd))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
770 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
771}
772
773#[inline]
777#[target_feature(enable = "avx2")]
778#[cfg_attr(test, assert_instr(vpcmpgtw))]
779#[stable(feature = "simd_x86", since = "1.27.0")]
780pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
781 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
782}
783
784#[inline]
788#[target_feature(enable = "avx2")]
789#[cfg_attr(test, assert_instr(vpcmpgtb))]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
792 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
793}
794
795#[inline]
799#[target_feature(enable = "avx2")]
800#[cfg_attr(test, assert_instr(vpmovsxwd))]
801#[stable(feature = "simd_x86", since = "1.27.0")]
802pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
803 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
804}
805
806#[inline]
810#[target_feature(enable = "avx2")]
811#[cfg_attr(test, assert_instr(vpmovsxwq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
814 unsafe {
815 let a = a.as_i16x8();
816 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
817 transmute::<i64x4, _>(simd_cast(v64))
818 }
819}
820
821#[inline]
825#[target_feature(enable = "avx2")]
826#[cfg_attr(test, assert_instr(vpmovsxdq))]
827#[stable(feature = "simd_x86", since = "1.27.0")]
828pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
829 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
830}
831
832#[inline]
836#[target_feature(enable = "avx2")]
837#[cfg_attr(test, assert_instr(vpmovsxbw))]
838#[stable(feature = "simd_x86", since = "1.27.0")]
839pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
840 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
841}
842
843#[inline]
847#[target_feature(enable = "avx2")]
848#[cfg_attr(test, assert_instr(vpmovsxbd))]
849#[stable(feature = "simd_x86", since = "1.27.0")]
850pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
851 unsafe {
852 let a = a.as_i8x16();
853 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
854 transmute::<i32x8, _>(simd_cast(v64))
855 }
856}
857
858#[inline]
862#[target_feature(enable = "avx2")]
863#[cfg_attr(test, assert_instr(vpmovsxbq))]
864#[stable(feature = "simd_x86", since = "1.27.0")]
865pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
866 unsafe {
867 let a = a.as_i8x16();
868 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
869 transmute::<i64x4, _>(simd_cast(v32))
870 }
871}
872
873#[inline]
878#[target_feature(enable = "avx2")]
879#[cfg_attr(test, assert_instr(vpmovzxwd))]
880#[stable(feature = "simd_x86", since = "1.27.0")]
881pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
882 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
883}
884
885#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vpmovzxwq))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
894 unsafe {
895 let a = a.as_u16x8();
896 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
897 transmute::<i64x4, _>(simd_cast(v64))
898 }
899}
900
901#[inline]
905#[target_feature(enable = "avx2")]
906#[cfg_attr(test, assert_instr(vpmovzxdq))]
907#[stable(feature = "simd_x86", since = "1.27.0")]
908pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
909 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
910}
911
912#[inline]
916#[target_feature(enable = "avx2")]
917#[cfg_attr(test, assert_instr(vpmovzxbw))]
918#[stable(feature = "simd_x86", since = "1.27.0")]
919pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
920 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
921}
922
923#[inline]
928#[target_feature(enable = "avx2")]
929#[cfg_attr(test, assert_instr(vpmovzxbd))]
930#[stable(feature = "simd_x86", since = "1.27.0")]
931pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
932 unsafe {
933 let a = a.as_u8x16();
934 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
935 transmute::<i32x8, _>(simd_cast(v64))
936 }
937}
938
939#[inline]
944#[target_feature(enable = "avx2")]
945#[cfg_attr(test, assert_instr(vpmovzxbq))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
948 unsafe {
949 let a = a.as_u8x16();
950 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
951 transmute::<i64x4, _>(simd_cast(v32))
952 }
953}
954
955#[inline]
959#[target_feature(enable = "avx2")]
960#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
961#[rustc_legacy_const_generics(1)]
962#[stable(feature = "simd_x86", since = "1.27.0")]
963pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
964 static_assert_uimm_bits!(IMM1, 1);
965 unsafe {
966 let a = a.as_i64x4();
967 let b = i64x4::ZERO;
968 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
969 transmute(dst)
970 }
971}
972
973#[inline]
977#[target_feature(enable = "avx2")]
978#[cfg_attr(test, assert_instr(vphaddw))]
979#[stable(feature = "simd_x86", since = "1.27.0")]
980pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
981 unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) }
982}
983
984#[inline]
988#[target_feature(enable = "avx2")]
989#[cfg_attr(test, assert_instr(vphaddd))]
990#[stable(feature = "simd_x86", since = "1.27.0")]
991pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
992 unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) }
993}
994
995#[inline]
1000#[target_feature(enable = "avx2")]
1001#[cfg_attr(test, assert_instr(vphaddsw))]
1002#[stable(feature = "simd_x86", since = "1.27.0")]
1003pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
1004 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
1005}
1006
1007#[inline]
1011#[target_feature(enable = "avx2")]
1012#[cfg_attr(test, assert_instr(vphsubw))]
1013#[stable(feature = "simd_x86", since = "1.27.0")]
1014pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1015 unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) }
1016}
1017
1018#[inline]
1022#[target_feature(enable = "avx2")]
1023#[cfg_attr(test, assert_instr(vphsubd))]
1024#[stable(feature = "simd_x86", since = "1.27.0")]
1025pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1026 unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) }
1027}
1028
1029#[inline]
1034#[target_feature(enable = "avx2")]
1035#[cfg_attr(test, assert_instr(vphsubsw))]
1036#[stable(feature = "simd_x86", since = "1.27.0")]
1037pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1038 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
1039}
1040
1041#[inline]
1047#[target_feature(enable = "avx2")]
1048#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1049#[rustc_legacy_const_generics(2)]
1050#[stable(feature = "simd_x86", since = "1.27.0")]
1051pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1052 slice: *const i32,
1053 offsets: __m128i,
1054) -> __m128i {
1055 static_assert_imm8_scale!(SCALE);
1056 let zero = i32x4::ZERO;
1057 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1058 let offsets = offsets.as_i32x4();
1059 let slice = slice as *const i8;
1060 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1061 transmute(r)
1062}
1063
1064#[inline]
1071#[target_feature(enable = "avx2")]
1072#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1073#[rustc_legacy_const_generics(4)]
1074#[stable(feature = "simd_x86", since = "1.27.0")]
1075pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1076 src: __m128i,
1077 slice: *const i32,
1078 offsets: __m128i,
1079 mask: __m128i,
1080) -> __m128i {
1081 static_assert_imm8_scale!(SCALE);
1082 let src = src.as_i32x4();
1083 let mask = mask.as_i32x4();
1084 let offsets = offsets.as_i32x4();
1085 let slice = slice as *const i8;
1086 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1087 transmute(r)
1088}
1089
1090#[inline]
1096#[target_feature(enable = "avx2")]
1097#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1098#[rustc_legacy_const_generics(2)]
1099#[stable(feature = "simd_x86", since = "1.27.0")]
1100pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1101 slice: *const i32,
1102 offsets: __m256i,
1103) -> __m256i {
1104 static_assert_imm8_scale!(SCALE);
1105 let zero = i32x8::ZERO;
1106 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1107 let offsets = offsets.as_i32x8();
1108 let slice = slice as *const i8;
1109 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1110 transmute(r)
1111}
1112
1113#[inline]
1120#[target_feature(enable = "avx2")]
1121#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1122#[rustc_legacy_const_generics(4)]
1123#[stable(feature = "simd_x86", since = "1.27.0")]
1124pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1125 src: __m256i,
1126 slice: *const i32,
1127 offsets: __m256i,
1128 mask: __m256i,
1129) -> __m256i {
1130 static_assert_imm8_scale!(SCALE);
1131 let src = src.as_i32x8();
1132 let mask = mask.as_i32x8();
1133 let offsets = offsets.as_i32x8();
1134 let slice = slice as *const i8;
1135 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1136 transmute(r)
1137}
1138
1139#[inline]
1145#[target_feature(enable = "avx2")]
1146#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1147#[rustc_legacy_const_generics(2)]
1148#[stable(feature = "simd_x86", since = "1.27.0")]
1149pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1150 static_assert_imm8_scale!(SCALE);
1151 let zero = _mm_setzero_ps();
1152 let neg_one = _mm_set1_ps(-1.0);
1153 let offsets = offsets.as_i32x4();
1154 let slice = slice as *const i8;
1155 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1156}
1157
1158#[inline]
1165#[target_feature(enable = "avx2")]
1166#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1167#[rustc_legacy_const_generics(4)]
1168#[stable(feature = "simd_x86", since = "1.27.0")]
1169pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1170 src: __m128,
1171 slice: *const f32,
1172 offsets: __m128i,
1173 mask: __m128,
1174) -> __m128 {
1175 static_assert_imm8_scale!(SCALE);
1176 let offsets = offsets.as_i32x4();
1177 let slice = slice as *const i8;
1178 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1179}
1180
1181#[inline]
1187#[target_feature(enable = "avx2")]
1188#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1189#[rustc_legacy_const_generics(2)]
1190#[stable(feature = "simd_x86", since = "1.27.0")]
1191pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1192 static_assert_imm8_scale!(SCALE);
1193 let zero = _mm256_setzero_ps();
1194 let neg_one = _mm256_set1_ps(-1.0);
1195 let offsets = offsets.as_i32x8();
1196 let slice = slice as *const i8;
1197 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1198}
1199
1200#[inline]
1207#[target_feature(enable = "avx2")]
1208#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1209#[rustc_legacy_const_generics(4)]
1210#[stable(feature = "simd_x86", since = "1.27.0")]
1211pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1212 src: __m256,
1213 slice: *const f32,
1214 offsets: __m256i,
1215 mask: __m256,
1216) -> __m256 {
1217 static_assert_imm8_scale!(SCALE);
1218 let offsets = offsets.as_i32x8();
1219 let slice = slice as *const i8;
1220 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1221}
1222
1223#[inline]
1229#[target_feature(enable = "avx2")]
1230#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1231#[rustc_legacy_const_generics(2)]
1232#[stable(feature = "simd_x86", since = "1.27.0")]
1233pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1234 slice: *const i64,
1235 offsets: __m128i,
1236) -> __m128i {
1237 static_assert_imm8_scale!(SCALE);
1238 let zero = i64x2::ZERO;
1239 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1240 let offsets = offsets.as_i32x4();
1241 let slice = slice as *const i8;
1242 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1243 transmute(r)
1244}
1245
1246#[inline]
1253#[target_feature(enable = "avx2")]
1254#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1255#[rustc_legacy_const_generics(4)]
1256#[stable(feature = "simd_x86", since = "1.27.0")]
1257pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1258 src: __m128i,
1259 slice: *const i64,
1260 offsets: __m128i,
1261 mask: __m128i,
1262) -> __m128i {
1263 static_assert_imm8_scale!(SCALE);
1264 let src = src.as_i64x2();
1265 let mask = mask.as_i64x2();
1266 let offsets = offsets.as_i32x4();
1267 let slice = slice as *const i8;
1268 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1269 transmute(r)
1270}
1271
1272#[inline]
1278#[target_feature(enable = "avx2")]
1279#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1280#[rustc_legacy_const_generics(2)]
1281#[stable(feature = "simd_x86", since = "1.27.0")]
1282pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1283 slice: *const i64,
1284 offsets: __m128i,
1285) -> __m256i {
1286 static_assert_imm8_scale!(SCALE);
1287 let zero = i64x4::ZERO;
1288 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1289 let offsets = offsets.as_i32x4();
1290 let slice = slice as *const i8;
1291 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1292 transmute(r)
1293}
1294
1295#[inline]
1302#[target_feature(enable = "avx2")]
1303#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1304#[rustc_legacy_const_generics(4)]
1305#[stable(feature = "simd_x86", since = "1.27.0")]
1306pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1307 src: __m256i,
1308 slice: *const i64,
1309 offsets: __m128i,
1310 mask: __m256i,
1311) -> __m256i {
1312 static_assert_imm8_scale!(SCALE);
1313 let src = src.as_i64x4();
1314 let mask = mask.as_i64x4();
1315 let offsets = offsets.as_i32x4();
1316 let slice = slice as *const i8;
1317 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1318 transmute(r)
1319}
1320
1321#[inline]
1327#[target_feature(enable = "avx2")]
1328#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1329#[rustc_legacy_const_generics(2)]
1330#[stable(feature = "simd_x86", since = "1.27.0")]
1331pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1332 static_assert_imm8_scale!(SCALE);
1333 let zero = _mm_setzero_pd();
1334 let neg_one = _mm_set1_pd(-1.0);
1335 let offsets = offsets.as_i32x4();
1336 let slice = slice as *const i8;
1337 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1338}
1339
1340#[inline]
1347#[target_feature(enable = "avx2")]
1348#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1349#[rustc_legacy_const_generics(4)]
1350#[stable(feature = "simd_x86", since = "1.27.0")]
1351pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1352 src: __m128d,
1353 slice: *const f64,
1354 offsets: __m128i,
1355 mask: __m128d,
1356) -> __m128d {
1357 static_assert_imm8_scale!(SCALE);
1358 let offsets = offsets.as_i32x4();
1359 let slice = slice as *const i8;
1360 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1361}
1362
1363#[inline]
1369#[target_feature(enable = "avx2")]
1370#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1371#[rustc_legacy_const_generics(2)]
1372#[stable(feature = "simd_x86", since = "1.27.0")]
1373pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1374 slice: *const f64,
1375 offsets: __m128i,
1376) -> __m256d {
1377 static_assert_imm8_scale!(SCALE);
1378 let zero = _mm256_setzero_pd();
1379 let neg_one = _mm256_set1_pd(-1.0);
1380 let offsets = offsets.as_i32x4();
1381 let slice = slice as *const i8;
1382 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1383}
1384
1385#[inline]
1392#[target_feature(enable = "avx2")]
1393#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1394#[rustc_legacy_const_generics(4)]
1395#[stable(feature = "simd_x86", since = "1.27.0")]
1396pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1397 src: __m256d,
1398 slice: *const f64,
1399 offsets: __m128i,
1400 mask: __m256d,
1401) -> __m256d {
1402 static_assert_imm8_scale!(SCALE);
1403 let offsets = offsets.as_i32x4();
1404 let slice = slice as *const i8;
1405 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1406}
1407
1408#[inline]
1414#[target_feature(enable = "avx2")]
1415#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1416#[rustc_legacy_const_generics(2)]
1417#[stable(feature = "simd_x86", since = "1.27.0")]
1418pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1419 slice: *const i32,
1420 offsets: __m128i,
1421) -> __m128i {
1422 static_assert_imm8_scale!(SCALE);
1423 let zero = i32x4::ZERO;
1424 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1425 let offsets = offsets.as_i64x2();
1426 let slice = slice as *const i8;
1427 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1428 transmute(r)
1429}
1430
1431#[inline]
1438#[target_feature(enable = "avx2")]
1439#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1440#[rustc_legacy_const_generics(4)]
1441#[stable(feature = "simd_x86", since = "1.27.0")]
1442pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1443 src: __m128i,
1444 slice: *const i32,
1445 offsets: __m128i,
1446 mask: __m128i,
1447) -> __m128i {
1448 static_assert_imm8_scale!(SCALE);
1449 let src = src.as_i32x4();
1450 let mask = mask.as_i32x4();
1451 let offsets = offsets.as_i64x2();
1452 let slice = slice as *const i8;
1453 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1454 transmute(r)
1455}
1456
1457#[inline]
1463#[target_feature(enable = "avx2")]
1464#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1465#[rustc_legacy_const_generics(2)]
1466#[stable(feature = "simd_x86", since = "1.27.0")]
1467pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1468 slice: *const i32,
1469 offsets: __m256i,
1470) -> __m128i {
1471 static_assert_imm8_scale!(SCALE);
1472 let zero = i32x4::ZERO;
1473 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1474 let offsets = offsets.as_i64x4();
1475 let slice = slice as *const i8;
1476 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1477 transmute(r)
1478}
1479
1480#[inline]
1487#[target_feature(enable = "avx2")]
1488#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1489#[rustc_legacy_const_generics(4)]
1490#[stable(feature = "simd_x86", since = "1.27.0")]
1491pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1492 src: __m128i,
1493 slice: *const i32,
1494 offsets: __m256i,
1495 mask: __m128i,
1496) -> __m128i {
1497 static_assert_imm8_scale!(SCALE);
1498 let src = src.as_i32x4();
1499 let mask = mask.as_i32x4();
1500 let offsets = offsets.as_i64x4();
1501 let slice = slice as *const i8;
1502 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1503 transmute(r)
1504}
1505
1506#[inline]
1512#[target_feature(enable = "avx2")]
1513#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1514#[rustc_legacy_const_generics(2)]
1515#[stable(feature = "simd_x86", since = "1.27.0")]
1516pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1517 static_assert_imm8_scale!(SCALE);
1518 let zero = _mm_setzero_ps();
1519 let neg_one = _mm_set1_ps(-1.0);
1520 let offsets = offsets.as_i64x2();
1521 let slice = slice as *const i8;
1522 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1523}
1524
1525#[inline]
1532#[target_feature(enable = "avx2")]
1533#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1534#[rustc_legacy_const_generics(4)]
1535#[stable(feature = "simd_x86", since = "1.27.0")]
1536pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1537 src: __m128,
1538 slice: *const f32,
1539 offsets: __m128i,
1540 mask: __m128,
1541) -> __m128 {
1542 static_assert_imm8_scale!(SCALE);
1543 let offsets = offsets.as_i64x2();
1544 let slice = slice as *const i8;
1545 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1546}
1547
1548#[inline]
1554#[target_feature(enable = "avx2")]
1555#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1556#[rustc_legacy_const_generics(2)]
1557#[stable(feature = "simd_x86", since = "1.27.0")]
1558pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1559 static_assert_imm8_scale!(SCALE);
1560 let zero = _mm_setzero_ps();
1561 let neg_one = _mm_set1_ps(-1.0);
1562 let offsets = offsets.as_i64x4();
1563 let slice = slice as *const i8;
1564 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1565}
1566
1567#[inline]
1574#[target_feature(enable = "avx2")]
1575#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1576#[rustc_legacy_const_generics(4)]
1577#[stable(feature = "simd_x86", since = "1.27.0")]
1578pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1579 src: __m128,
1580 slice: *const f32,
1581 offsets: __m256i,
1582 mask: __m128,
1583) -> __m128 {
1584 static_assert_imm8_scale!(SCALE);
1585 let offsets = offsets.as_i64x4();
1586 let slice = slice as *const i8;
1587 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1588}
1589
1590#[inline]
1596#[target_feature(enable = "avx2")]
1597#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1598#[rustc_legacy_const_generics(2)]
1599#[stable(feature = "simd_x86", since = "1.27.0")]
1600pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1601 slice: *const i64,
1602 offsets: __m128i,
1603) -> __m128i {
1604 static_assert_imm8_scale!(SCALE);
1605 let zero = i64x2::ZERO;
1606 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1607 let slice = slice as *const i8;
1608 let offsets = offsets.as_i64x2();
1609 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1610 transmute(r)
1611}
1612
1613#[inline]
1620#[target_feature(enable = "avx2")]
1621#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1622#[rustc_legacy_const_generics(4)]
1623#[stable(feature = "simd_x86", since = "1.27.0")]
1624pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1625 src: __m128i,
1626 slice: *const i64,
1627 offsets: __m128i,
1628 mask: __m128i,
1629) -> __m128i {
1630 static_assert_imm8_scale!(SCALE);
1631 let src = src.as_i64x2();
1632 let mask = mask.as_i64x2();
1633 let offsets = offsets.as_i64x2();
1634 let slice = slice as *const i8;
1635 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1636 transmute(r)
1637}
1638
1639#[inline]
1645#[target_feature(enable = "avx2")]
1646#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1647#[rustc_legacy_const_generics(2)]
1648#[stable(feature = "simd_x86", since = "1.27.0")]
1649pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1650 slice: *const i64,
1651 offsets: __m256i,
1652) -> __m256i {
1653 static_assert_imm8_scale!(SCALE);
1654 let zero = i64x4::ZERO;
1655 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1656 let slice = slice as *const i8;
1657 let offsets = offsets.as_i64x4();
1658 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1659 transmute(r)
1660}
1661
1662#[inline]
1669#[target_feature(enable = "avx2")]
1670#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1671#[rustc_legacy_const_generics(4)]
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1674 src: __m256i,
1675 slice: *const i64,
1676 offsets: __m256i,
1677 mask: __m256i,
1678) -> __m256i {
1679 static_assert_imm8_scale!(SCALE);
1680 let src = src.as_i64x4();
1681 let mask = mask.as_i64x4();
1682 let offsets = offsets.as_i64x4();
1683 let slice = slice as *const i8;
1684 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1685 transmute(r)
1686}
1687
1688#[inline]
1694#[target_feature(enable = "avx2")]
1695#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1696#[rustc_legacy_const_generics(2)]
1697#[stable(feature = "simd_x86", since = "1.27.0")]
1698pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1699 static_assert_imm8_scale!(SCALE);
1700 let zero = _mm_setzero_pd();
1701 let neg_one = _mm_set1_pd(-1.0);
1702 let slice = slice as *const i8;
1703 let offsets = offsets.as_i64x2();
1704 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1705}
1706
1707#[inline]
1714#[target_feature(enable = "avx2")]
1715#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1716#[rustc_legacy_const_generics(4)]
1717#[stable(feature = "simd_x86", since = "1.27.0")]
1718pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1719 src: __m128d,
1720 slice: *const f64,
1721 offsets: __m128i,
1722 mask: __m128d,
1723) -> __m128d {
1724 static_assert_imm8_scale!(SCALE);
1725 let slice = slice as *const i8;
1726 let offsets = offsets.as_i64x2();
1727 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1728}
1729
1730#[inline]
1736#[target_feature(enable = "avx2")]
1737#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1738#[rustc_legacy_const_generics(2)]
1739#[stable(feature = "simd_x86", since = "1.27.0")]
1740pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1741 slice: *const f64,
1742 offsets: __m256i,
1743) -> __m256d {
1744 static_assert_imm8_scale!(SCALE);
1745 let zero = _mm256_setzero_pd();
1746 let neg_one = _mm256_set1_pd(-1.0);
1747 let slice = slice as *const i8;
1748 let offsets = offsets.as_i64x4();
1749 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1750}
1751
1752#[inline]
1759#[target_feature(enable = "avx2")]
1760#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1761#[rustc_legacy_const_generics(4)]
1762#[stable(feature = "simd_x86", since = "1.27.0")]
1763pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1764 src: __m256d,
1765 slice: *const f64,
1766 offsets: __m256i,
1767 mask: __m256d,
1768) -> __m256d {
1769 static_assert_imm8_scale!(SCALE);
1770 let slice = slice as *const i8;
1771 let offsets = offsets.as_i64x4();
1772 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1773}
1774
1775#[inline]
1780#[target_feature(enable = "avx2")]
1781#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1782#[rustc_legacy_const_generics(2)]
1783#[stable(feature = "simd_x86", since = "1.27.0")]
1784pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1785 static_assert_uimm_bits!(IMM1, 1);
1786 unsafe {
1787 let a = a.as_i64x4();
1788 let b = _mm256_castsi128_si256(b).as_i64x4();
1789 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1790 transmute(dst)
1791 }
1792}
1793
1794#[inline]
1800#[target_feature(enable = "avx2")]
1801#[cfg_attr(test, assert_instr(vpmaddwd))]
1802#[stable(feature = "simd_x86", since = "1.27.0")]
1803pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1804 unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1805}
1806
1807#[inline]
1814#[target_feature(enable = "avx2")]
1815#[cfg_attr(test, assert_instr(vpmaddubsw))]
1816#[stable(feature = "simd_x86", since = "1.27.0")]
1817pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1818 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
1819}
1820
1821#[inline]
1827#[target_feature(enable = "avx2")]
1828#[cfg_attr(test, assert_instr(vpmaskmovd))]
1829#[stable(feature = "simd_x86", since = "1.27.0")]
1830pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1831 transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
1832}
1833
1834#[inline]
1840#[target_feature(enable = "avx2")]
1841#[cfg_attr(test, assert_instr(vpmaskmovd))]
1842#[stable(feature = "simd_x86", since = "1.27.0")]
1843pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1844 transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
1845}
1846
1847#[inline]
1853#[target_feature(enable = "avx2")]
1854#[cfg_attr(test, assert_instr(vpmaskmovq))]
1855#[stable(feature = "simd_x86", since = "1.27.0")]
1856pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1857 transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
1858}
1859
1860#[inline]
1866#[target_feature(enable = "avx2")]
1867#[cfg_attr(test, assert_instr(vpmaskmovq))]
1868#[stable(feature = "simd_x86", since = "1.27.0")]
1869pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1870 transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
1871}
1872
1873#[inline]
1879#[target_feature(enable = "avx2")]
1880#[cfg_attr(test, assert_instr(vpmaskmovd))]
1881#[stable(feature = "simd_x86", since = "1.27.0")]
1882pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1883 maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
1884}
1885
1886#[inline]
1892#[target_feature(enable = "avx2")]
1893#[cfg_attr(test, assert_instr(vpmaskmovd))]
1894#[stable(feature = "simd_x86", since = "1.27.0")]
1895pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1896 maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
1897}
1898
1899#[inline]
1905#[target_feature(enable = "avx2")]
1906#[cfg_attr(test, assert_instr(vpmaskmovq))]
1907#[stable(feature = "simd_x86", since = "1.27.0")]
1908pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1909 maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
1910}
1911
1912#[inline]
1918#[target_feature(enable = "avx2")]
1919#[cfg_attr(test, assert_instr(vpmaskmovq))]
1920#[stable(feature = "simd_x86", since = "1.27.0")]
1921pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1922 maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
1923}
1924
1925#[inline]
1930#[target_feature(enable = "avx2")]
1931#[cfg_attr(test, assert_instr(vpmaxsw))]
1932#[stable(feature = "simd_x86", since = "1.27.0")]
1933pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1934 unsafe {
1935 let a = a.as_i16x16();
1936 let b = b.as_i16x16();
1937 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1938 }
1939}
1940
1941#[inline]
1946#[target_feature(enable = "avx2")]
1947#[cfg_attr(test, assert_instr(vpmaxsd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1950 unsafe {
1951 let a = a.as_i32x8();
1952 let b = b.as_i32x8();
1953 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1954 }
1955}
1956
1957#[inline]
1962#[target_feature(enable = "avx2")]
1963#[cfg_attr(test, assert_instr(vpmaxsb))]
1964#[stable(feature = "simd_x86", since = "1.27.0")]
1965pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1966 unsafe {
1967 let a = a.as_i8x32();
1968 let b = b.as_i8x32();
1969 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1970 }
1971}
1972
1973#[inline]
1978#[target_feature(enable = "avx2")]
1979#[cfg_attr(test, assert_instr(vpmaxuw))]
1980#[stable(feature = "simd_x86", since = "1.27.0")]
1981pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1982 unsafe {
1983 let a = a.as_u16x16();
1984 let b = b.as_u16x16();
1985 transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1986 }
1987}
1988
1989#[inline]
1994#[target_feature(enable = "avx2")]
1995#[cfg_attr(test, assert_instr(vpmaxud))]
1996#[stable(feature = "simd_x86", since = "1.27.0")]
1997pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1998 unsafe {
1999 let a = a.as_u32x8();
2000 let b = b.as_u32x8();
2001 transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
2002 }
2003}
2004
2005#[inline]
2010#[target_feature(enable = "avx2")]
2011#[cfg_attr(test, assert_instr(vpmaxub))]
2012#[stable(feature = "simd_x86", since = "1.27.0")]
2013pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2014 unsafe {
2015 let a = a.as_u8x32();
2016 let b = b.as_u8x32();
2017 transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
2018 }
2019}
2020
2021#[inline]
2026#[target_feature(enable = "avx2")]
2027#[cfg_attr(test, assert_instr(vpminsw))]
2028#[stable(feature = "simd_x86", since = "1.27.0")]
2029pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2030 unsafe {
2031 let a = a.as_i16x16();
2032 let b = b.as_i16x16();
2033 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2034 }
2035}
2036
2037#[inline]
2042#[target_feature(enable = "avx2")]
2043#[cfg_attr(test, assert_instr(vpminsd))]
2044#[stable(feature = "simd_x86", since = "1.27.0")]
2045pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2046 unsafe {
2047 let a = a.as_i32x8();
2048 let b = b.as_i32x8();
2049 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2050 }
2051}
2052
2053#[inline]
2058#[target_feature(enable = "avx2")]
2059#[cfg_attr(test, assert_instr(vpminsb))]
2060#[stable(feature = "simd_x86", since = "1.27.0")]
2061pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2062 unsafe {
2063 let a = a.as_i8x32();
2064 let b = b.as_i8x32();
2065 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2066 }
2067}
2068
2069#[inline]
2074#[target_feature(enable = "avx2")]
2075#[cfg_attr(test, assert_instr(vpminuw))]
2076#[stable(feature = "simd_x86", since = "1.27.0")]
2077pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2078 unsafe {
2079 let a = a.as_u16x16();
2080 let b = b.as_u16x16();
2081 transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2082 }
2083}
2084
2085#[inline]
2090#[target_feature(enable = "avx2")]
2091#[cfg_attr(test, assert_instr(vpminud))]
2092#[stable(feature = "simd_x86", since = "1.27.0")]
2093pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2094 unsafe {
2095 let a = a.as_u32x8();
2096 let b = b.as_u32x8();
2097 transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2098 }
2099}
2100
2101#[inline]
2106#[target_feature(enable = "avx2")]
2107#[cfg_attr(test, assert_instr(vpminub))]
2108#[stable(feature = "simd_x86", since = "1.27.0")]
2109pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2110 unsafe {
2111 let a = a.as_u8x32();
2112 let b = b.as_u8x32();
2113 transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2114 }
2115}
2116
2117#[inline]
2122#[target_feature(enable = "avx2")]
2123#[cfg_attr(test, assert_instr(vpmovmskb))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2126 unsafe {
2127 let z = i8x32::ZERO;
2128 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2129 simd_bitmask::<_, u32>(m) as i32
2130 }
2131}
2132
2133#[inline]
2143#[target_feature(enable = "avx2")]
2144#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2145#[rustc_legacy_const_generics(2)]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2148 static_assert_uimm_bits!(IMM8, 8);
2149 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) }
2150}
2151
2152#[inline]
2159#[target_feature(enable = "avx2")]
2160#[cfg_attr(test, assert_instr(vpmuldq))]
2161#[stable(feature = "simd_x86", since = "1.27.0")]
2162pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2163 unsafe {
2164 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2165 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2166 transmute(simd_mul(a, b))
2167 }
2168}
2169
2170#[inline]
2177#[target_feature(enable = "avx2")]
2178#[cfg_attr(test, assert_instr(vpmuludq))]
2179#[stable(feature = "simd_x86", since = "1.27.0")]
2180pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2181 unsafe {
2182 let a = a.as_u64x4();
2183 let b = b.as_u64x4();
2184 let mask = u64x4::splat(u32::MAX.into());
2185 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2186 }
2187}
2188
2189#[inline]
2195#[target_feature(enable = "avx2")]
2196#[cfg_attr(test, assert_instr(vpmulhw))]
2197#[stable(feature = "simd_x86", since = "1.27.0")]
2198pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2199 unsafe {
2200 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2201 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2202 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2203 transmute(simd_cast::<i32x16, i16x16>(r))
2204 }
2205}
2206
2207#[inline]
2213#[target_feature(enable = "avx2")]
2214#[cfg_attr(test, assert_instr(vpmulhuw))]
2215#[stable(feature = "simd_x86", since = "1.27.0")]
2216pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2217 unsafe {
2218 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2219 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2220 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2221 transmute(simd_cast::<u32x16, u16x16>(r))
2222 }
2223}
2224
2225#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpmullw))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2235 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2236}
2237
2238#[inline]
2244#[target_feature(enable = "avx2")]
2245#[cfg_attr(test, assert_instr(vpmulld))]
2246#[stable(feature = "simd_x86", since = "1.27.0")]
2247pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2248 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2249}
2250
2251#[inline]
2258#[target_feature(enable = "avx2")]
2259#[cfg_attr(test, assert_instr(vpmulhrsw))]
2260#[stable(feature = "simd_x86", since = "1.27.0")]
2261pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2262 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2263}
2264
2265#[inline]
2270#[target_feature(enable = "avx2")]
2271#[cfg_attr(test, assert_instr(vorps))]
2272#[stable(feature = "simd_x86", since = "1.27.0")]
2273pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2274 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2275}
2276
2277#[inline]
2282#[target_feature(enable = "avx2")]
2283#[cfg_attr(test, assert_instr(vpacksswb))]
2284#[stable(feature = "simd_x86", since = "1.27.0")]
2285pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2286 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2287}
2288
2289#[inline]
2294#[target_feature(enable = "avx2")]
2295#[cfg_attr(test, assert_instr(vpackssdw))]
2296#[stable(feature = "simd_x86", since = "1.27.0")]
2297pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2298 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2299}
2300
2301#[inline]
2306#[target_feature(enable = "avx2")]
2307#[cfg_attr(test, assert_instr(vpackuswb))]
2308#[stable(feature = "simd_x86", since = "1.27.0")]
2309pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2310 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2311}
2312
2313#[inline]
2318#[target_feature(enable = "avx2")]
2319#[cfg_attr(test, assert_instr(vpackusdw))]
2320#[stable(feature = "simd_x86", since = "1.27.0")]
2321pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2322 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2323}
2324
2325#[inline]
2332#[target_feature(enable = "avx2")]
2333#[cfg_attr(test, assert_instr(vpermps))]
2334#[stable(feature = "simd_x86", since = "1.27.0")]
2335pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2336 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2337}
2338
2339#[inline]
2343#[target_feature(enable = "avx2")]
2344#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2345#[rustc_legacy_const_generics(1)]
2346#[stable(feature = "simd_x86", since = "1.27.0")]
2347pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2348 static_assert_uimm_bits!(IMM8, 8);
2349 unsafe {
2350 let zero = i64x4::ZERO;
2351 let r: i64x4 = simd_shuffle!(
2352 a.as_i64x4(),
2353 zero,
2354 [
2355 IMM8 as u32 & 0b11,
2356 (IMM8 as u32 >> 2) & 0b11,
2357 (IMM8 as u32 >> 4) & 0b11,
2358 (IMM8 as u32 >> 6) & 0b11,
2359 ],
2360 );
2361 transmute(r)
2362 }
2363}
2364
2365#[inline]
2369#[target_feature(enable = "avx2")]
2370#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2371#[rustc_legacy_const_generics(2)]
2372#[stable(feature = "simd_x86", since = "1.27.0")]
2373pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2374 static_assert_uimm_bits!(IMM8, 8);
2375 unsafe { transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) }
2376}
2377
2378#[inline]
2383#[target_feature(enable = "avx2")]
2384#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2385#[rustc_legacy_const_generics(1)]
2386#[stable(feature = "simd_x86", since = "1.27.0")]
2387pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2388 static_assert_uimm_bits!(IMM8, 8);
2389 unsafe {
2390 simd_shuffle!(
2391 a,
2392 _mm256_undefined_pd(),
2393 [
2394 IMM8 as u32 & 0b11,
2395 (IMM8 as u32 >> 2) & 0b11,
2396 (IMM8 as u32 >> 4) & 0b11,
2397 (IMM8 as u32 >> 6) & 0b11,
2398 ],
2399 )
2400 }
2401}
2402
2403#[inline]
2408#[target_feature(enable = "avx2")]
2409#[cfg_attr(test, assert_instr(vpermps))]
2410#[stable(feature = "simd_x86", since = "1.27.0")]
2411pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2412 unsafe { permps(a, idx.as_i32x8()) }
2413}
2414
2415#[inline]
2422#[target_feature(enable = "avx2")]
2423#[cfg_attr(test, assert_instr(vpsadbw))]
2424#[stable(feature = "simd_x86", since = "1.27.0")]
2425pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2426 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2427}
2428
2429#[inline]
2460#[target_feature(enable = "avx2")]
2461#[cfg_attr(test, assert_instr(vpshufb))]
2462#[stable(feature = "simd_x86", since = "1.27.0")]
2463pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2464 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2465}
2466
2467#[inline]
2498#[target_feature(enable = "avx2")]
2499#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2500#[rustc_legacy_const_generics(1)]
2501#[stable(feature = "simd_x86", since = "1.27.0")]
2502pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2503 static_assert_uimm_bits!(MASK, 8);
2504 unsafe {
2505 let r: i32x8 = simd_shuffle!(
2506 a.as_i32x8(),
2507 a.as_i32x8(),
2508 [
2509 MASK as u32 & 0b11,
2510 (MASK as u32 >> 2) & 0b11,
2511 (MASK as u32 >> 4) & 0b11,
2512 (MASK as u32 >> 6) & 0b11,
2513 (MASK as u32 & 0b11) + 4,
2514 ((MASK as u32 >> 2) & 0b11) + 4,
2515 ((MASK as u32 >> 4) & 0b11) + 4,
2516 ((MASK as u32 >> 6) & 0b11) + 4,
2517 ],
2518 );
2519 transmute(r)
2520 }
2521}
2522
2523#[inline]
2529#[target_feature(enable = "avx2")]
2530#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2531#[rustc_legacy_const_generics(1)]
2532#[stable(feature = "simd_x86", since = "1.27.0")]
2533pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2534 static_assert_uimm_bits!(IMM8, 8);
2535 unsafe {
2536 let a = a.as_i16x16();
2537 let r: i16x16 = simd_shuffle!(
2538 a,
2539 a,
2540 [
2541 0,
2542 1,
2543 2,
2544 3,
2545 4 + (IMM8 as u32 & 0b11),
2546 4 + ((IMM8 as u32 >> 2) & 0b11),
2547 4 + ((IMM8 as u32 >> 4) & 0b11),
2548 4 + ((IMM8 as u32 >> 6) & 0b11),
2549 8,
2550 9,
2551 10,
2552 11,
2553 12 + (IMM8 as u32 & 0b11),
2554 12 + ((IMM8 as u32 >> 2) & 0b11),
2555 12 + ((IMM8 as u32 >> 4) & 0b11),
2556 12 + ((IMM8 as u32 >> 6) & 0b11),
2557 ],
2558 );
2559 transmute(r)
2560 }
2561}
2562
2563#[inline]
2569#[target_feature(enable = "avx2")]
2570#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2571#[rustc_legacy_const_generics(1)]
2572#[stable(feature = "simd_x86", since = "1.27.0")]
2573pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2574 static_assert_uimm_bits!(IMM8, 8);
2575 unsafe {
2576 let a = a.as_i16x16();
2577 let r: i16x16 = simd_shuffle!(
2578 a,
2579 a,
2580 [
2581 0 + (IMM8 as u32 & 0b11),
2582 0 + ((IMM8 as u32 >> 2) & 0b11),
2583 0 + ((IMM8 as u32 >> 4) & 0b11),
2584 0 + ((IMM8 as u32 >> 6) & 0b11),
2585 4,
2586 5,
2587 6,
2588 7,
2589 8 + (IMM8 as u32 & 0b11),
2590 8 + ((IMM8 as u32 >> 2) & 0b11),
2591 8 + ((IMM8 as u32 >> 4) & 0b11),
2592 8 + ((IMM8 as u32 >> 6) & 0b11),
2593 12,
2594 13,
2595 14,
2596 15,
2597 ],
2598 );
2599 transmute(r)
2600 }
2601}
2602
2603#[inline]
2609#[target_feature(enable = "avx2")]
2610#[cfg_attr(test, assert_instr(vpsignw))]
2611#[stable(feature = "simd_x86", since = "1.27.0")]
2612pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2613 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2614}
2615
2616#[inline]
2622#[target_feature(enable = "avx2")]
2623#[cfg_attr(test, assert_instr(vpsignd))]
2624#[stable(feature = "simd_x86", since = "1.27.0")]
2625pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2626 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2627}
2628
2629#[inline]
2635#[target_feature(enable = "avx2")]
2636#[cfg_attr(test, assert_instr(vpsignb))]
2637#[stable(feature = "simd_x86", since = "1.27.0")]
2638pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2639 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2640}
2641
2642#[inline]
2647#[target_feature(enable = "avx2")]
2648#[cfg_attr(test, assert_instr(vpsllw))]
2649#[stable(feature = "simd_x86", since = "1.27.0")]
2650pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2651 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2652}
2653
2654#[inline]
2659#[target_feature(enable = "avx2")]
2660#[cfg_attr(test, assert_instr(vpslld))]
2661#[stable(feature = "simd_x86", since = "1.27.0")]
2662pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2663 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2664}
2665
2666#[inline]
2671#[target_feature(enable = "avx2")]
2672#[cfg_attr(test, assert_instr(vpsllq))]
2673#[stable(feature = "simd_x86", since = "1.27.0")]
2674pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2675 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2676}
2677
2678#[inline]
2683#[target_feature(enable = "avx2")]
2684#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2685#[rustc_legacy_const_generics(1)]
2686#[stable(feature = "simd_x86", since = "1.27.0")]
2687pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2688 static_assert_uimm_bits!(IMM8, 8);
2689 unsafe {
2690 if IMM8 >= 16 {
2691 _mm256_setzero_si256()
2692 } else {
2693 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2694 }
2695 }
2696}
2697
2698#[inline]
2703#[target_feature(enable = "avx2")]
2704#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2705#[rustc_legacy_const_generics(1)]
2706#[stable(feature = "simd_x86", since = "1.27.0")]
2707pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2708 unsafe {
2709 static_assert_uimm_bits!(IMM8, 8);
2710 if IMM8 >= 32 {
2711 _mm256_setzero_si256()
2712 } else {
2713 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2714 }
2715 }
2716}
2717
2718#[inline]
2723#[target_feature(enable = "avx2")]
2724#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2725#[rustc_legacy_const_generics(1)]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2728 unsafe {
2729 static_assert_uimm_bits!(IMM8, 8);
2730 if IMM8 >= 64 {
2731 _mm256_setzero_si256()
2732 } else {
2733 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2734 }
2735 }
2736}
2737
2738#[inline]
2742#[target_feature(enable = "avx2")]
2743#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2744#[rustc_legacy_const_generics(1)]
2745#[stable(feature = "simd_x86", since = "1.27.0")]
2746pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2747 static_assert_uimm_bits!(IMM8, 8);
2748 _mm256_bslli_epi128::<IMM8>(a)
2749}
2750
2751#[inline]
2755#[target_feature(enable = "avx2")]
2756#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2757#[rustc_legacy_const_generics(1)]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2759pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2760 static_assert_uimm_bits!(IMM8, 8);
2761 const fn mask(shift: i32, i: u32) -> u32 {
2762 let shift = shift as u32 & 0xff;
2763 if shift > 15 || i % 16 < shift {
2764 0
2765 } else {
2766 32 + (i - shift)
2767 }
2768 }
2769 unsafe {
2770 let a = a.as_i8x32();
2771 let r: i8x32 = simd_shuffle!(
2772 i8x32::ZERO,
2773 a,
2774 [
2775 mask(IMM8, 0),
2776 mask(IMM8, 1),
2777 mask(IMM8, 2),
2778 mask(IMM8, 3),
2779 mask(IMM8, 4),
2780 mask(IMM8, 5),
2781 mask(IMM8, 6),
2782 mask(IMM8, 7),
2783 mask(IMM8, 8),
2784 mask(IMM8, 9),
2785 mask(IMM8, 10),
2786 mask(IMM8, 11),
2787 mask(IMM8, 12),
2788 mask(IMM8, 13),
2789 mask(IMM8, 14),
2790 mask(IMM8, 15),
2791 mask(IMM8, 16),
2792 mask(IMM8, 17),
2793 mask(IMM8, 18),
2794 mask(IMM8, 19),
2795 mask(IMM8, 20),
2796 mask(IMM8, 21),
2797 mask(IMM8, 22),
2798 mask(IMM8, 23),
2799 mask(IMM8, 24),
2800 mask(IMM8, 25),
2801 mask(IMM8, 26),
2802 mask(IMM8, 27),
2803 mask(IMM8, 28),
2804 mask(IMM8, 29),
2805 mask(IMM8, 30),
2806 mask(IMM8, 31),
2807 ],
2808 );
2809 transmute(r)
2810 }
2811}
2812
2813#[inline]
2819#[target_feature(enable = "avx2")]
2820#[cfg_attr(test, assert_instr(vpsllvd))]
2821#[stable(feature = "simd_x86", since = "1.27.0")]
2822pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2823 unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
2824}
2825
2826#[inline]
2832#[target_feature(enable = "avx2")]
2833#[cfg_attr(test, assert_instr(vpsllvd))]
2834#[stable(feature = "simd_x86", since = "1.27.0")]
2835pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2836 unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
2837}
2838
2839#[inline]
2845#[target_feature(enable = "avx2")]
2846#[cfg_attr(test, assert_instr(vpsllvq))]
2847#[stable(feature = "simd_x86", since = "1.27.0")]
2848pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2849 unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
2850}
2851
2852#[inline]
2858#[target_feature(enable = "avx2")]
2859#[cfg_attr(test, assert_instr(vpsllvq))]
2860#[stable(feature = "simd_x86", since = "1.27.0")]
2861pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2862 unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
2863}
2864
2865#[inline]
2870#[target_feature(enable = "avx2")]
2871#[cfg_attr(test, assert_instr(vpsraw))]
2872#[stable(feature = "simd_x86", since = "1.27.0")]
2873pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2874 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2875}
2876
2877#[inline]
2882#[target_feature(enable = "avx2")]
2883#[cfg_attr(test, assert_instr(vpsrad))]
2884#[stable(feature = "simd_x86", since = "1.27.0")]
2885pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2886 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2887}
2888
2889#[inline]
2894#[target_feature(enable = "avx2")]
2895#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2896#[rustc_legacy_const_generics(1)]
2897#[stable(feature = "simd_x86", since = "1.27.0")]
2898pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2899 static_assert_uimm_bits!(IMM8, 8);
2900 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2901}
2902
2903#[inline]
2908#[target_feature(enable = "avx2")]
2909#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2910#[rustc_legacy_const_generics(1)]
2911#[stable(feature = "simd_x86", since = "1.27.0")]
2912pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2913 static_assert_uimm_bits!(IMM8, 8);
2914 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2915}
2916
2917#[inline]
2922#[target_feature(enable = "avx2")]
2923#[cfg_attr(test, assert_instr(vpsravd))]
2924#[stable(feature = "simd_x86", since = "1.27.0")]
2925pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2926 unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
2927}
2928
2929#[inline]
2934#[target_feature(enable = "avx2")]
2935#[cfg_attr(test, assert_instr(vpsravd))]
2936#[stable(feature = "simd_x86", since = "1.27.0")]
2937pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2938 unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
2939}
2940
2941#[inline]
2945#[target_feature(enable = "avx2")]
2946#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2947#[rustc_legacy_const_generics(1)]
2948#[stable(feature = "simd_x86", since = "1.27.0")]
2949pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2950 static_assert_uimm_bits!(IMM8, 8);
2951 _mm256_bsrli_epi128::<IMM8>(a)
2952}
2953
2954#[inline]
2958#[target_feature(enable = "avx2")]
2959#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2960#[rustc_legacy_const_generics(1)]
2961#[stable(feature = "simd_x86", since = "1.27.0")]
2962pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2963 static_assert_uimm_bits!(IMM8, 8);
2964 unsafe {
2965 let a = a.as_i8x32();
2966 let zero = i8x32::ZERO;
2967 let r: i8x32 = match IMM8 % 16 {
2968 0 => simd_shuffle!(
2969 a,
2970 zero,
2971 [
2972 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
2973 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2974 ],
2975 ),
2976 1 => simd_shuffle!(
2977 a,
2978 zero,
2979 [
2980 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
2981 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
2982 ],
2983 ),
2984 2 => simd_shuffle!(
2985 a,
2986 zero,
2987 [
2988 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23,
2989 24, 25, 26, 27, 28, 29, 30, 31, 32, 32,
2990 ],
2991 ),
2992 3 => simd_shuffle!(
2993 a,
2994 zero,
2995 [
2996 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23,
2997 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32,
2998 ],
2999 ),
3000 4 => simd_shuffle!(
3001 a,
3002 zero,
3003 [
3004 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24,
3005 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32,
3006 ],
3007 ),
3008 5 => simd_shuffle!(
3009 a,
3010 zero,
3011 [
3012 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25,
3013 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32,
3014 ],
3015 ),
3016 6 => simd_shuffle!(
3017 a,
3018 zero,
3019 [
3020 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26,
3021 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32,
3022 ],
3023 ),
3024 7 => simd_shuffle!(
3025 a,
3026 zero,
3027 [
3028 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26,
3029 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32,
3030 ],
3031 ),
3032 8 => simd_shuffle!(
3033 a,
3034 zero,
3035 [
3036 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27,
3037 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32,
3038 ],
3039 ),
3040 9 => simd_shuffle!(
3041 a,
3042 zero,
3043 [
3044 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28,
3045 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3046 ],
3047 ),
3048 10 => simd_shuffle!(
3049 a,
3050 zero,
3051 [
3052 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29,
3053 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3054 ],
3055 ),
3056 11 => simd_shuffle!(
3057 a,
3058 zero,
3059 [
3060 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30,
3061 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3062 ],
3063 ),
3064 12 => simd_shuffle!(
3065 a,
3066 zero,
3067 [
3068 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31,
3069 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3070 ],
3071 ),
3072 13 => simd_shuffle!(
3073 a,
3074 zero,
3075 [
3076 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32,
3077 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3078 ],
3079 ),
3080 14 => simd_shuffle!(
3081 a,
3082 zero,
3083 [
3084 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32,
3085 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3086 ],
3087 ),
3088 15 => simd_shuffle!(
3089 a,
3090 zero,
3091 [
3092 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32,
3093 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3094 ],
3095 ),
3096 _ => zero,
3097 };
3098 transmute(r)
3099 }
3100}
3101
3102#[inline]
3107#[target_feature(enable = "avx2")]
3108#[cfg_attr(test, assert_instr(vpsrlw))]
3109#[stable(feature = "simd_x86", since = "1.27.0")]
3110pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3111 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3112}
3113
3114#[inline]
3119#[target_feature(enable = "avx2")]
3120#[cfg_attr(test, assert_instr(vpsrld))]
3121#[stable(feature = "simd_x86", since = "1.27.0")]
3122pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3123 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3124}
3125
3126#[inline]
3131#[target_feature(enable = "avx2")]
3132#[cfg_attr(test, assert_instr(vpsrlq))]
3133#[stable(feature = "simd_x86", since = "1.27.0")]
3134pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3135 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3136}
3137
3138#[inline]
3143#[target_feature(enable = "avx2")]
3144#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3145#[rustc_legacy_const_generics(1)]
3146#[stable(feature = "simd_x86", since = "1.27.0")]
3147pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3148 static_assert_uimm_bits!(IMM8, 8);
3149 unsafe {
3150 if IMM8 >= 16 {
3151 _mm256_setzero_si256()
3152 } else {
3153 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3154 }
3155 }
3156}
3157
3158#[inline]
3163#[target_feature(enable = "avx2")]
3164#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3165#[rustc_legacy_const_generics(1)]
3166#[stable(feature = "simd_x86", since = "1.27.0")]
3167pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3168 static_assert_uimm_bits!(IMM8, 8);
3169 unsafe {
3170 if IMM8 >= 32 {
3171 _mm256_setzero_si256()
3172 } else {
3173 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3174 }
3175 }
3176}
3177
3178#[inline]
3183#[target_feature(enable = "avx2")]
3184#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3185#[rustc_legacy_const_generics(1)]
3186#[stable(feature = "simd_x86", since = "1.27.0")]
3187pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3188 static_assert_uimm_bits!(IMM8, 8);
3189 unsafe {
3190 if IMM8 >= 64 {
3191 _mm256_setzero_si256()
3192 } else {
3193 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3194 }
3195 }
3196}
3197
3198#[inline]
3203#[target_feature(enable = "avx2")]
3204#[cfg_attr(test, assert_instr(vpsrlvd))]
3205#[stable(feature = "simd_x86", since = "1.27.0")]
3206pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3207 unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
3208}
3209
3210#[inline]
3215#[target_feature(enable = "avx2")]
3216#[cfg_attr(test, assert_instr(vpsrlvd))]
3217#[stable(feature = "simd_x86", since = "1.27.0")]
3218pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3219 unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
3220}
3221
3222#[inline]
3227#[target_feature(enable = "avx2")]
3228#[cfg_attr(test, assert_instr(vpsrlvq))]
3229#[stable(feature = "simd_x86", since = "1.27.0")]
3230pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3231 unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
3232}
3233
3234#[inline]
3239#[target_feature(enable = "avx2")]
3240#[cfg_attr(test, assert_instr(vpsrlvq))]
3241#[stable(feature = "simd_x86", since = "1.27.0")]
3242pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3243 unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
3244}
3245
3246#[inline]
3252#[target_feature(enable = "avx2")]
3253#[cfg_attr(test, assert_instr(vmovntdqa))]
3254#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3255pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3256 let dst: __m256i;
3257 crate::arch::asm!(
3258 vpl!("vmovntdqa {a}"),
3259 a = out(ymm_reg) dst,
3260 p = in(reg) mem_addr,
3261 options(pure, readonly, nostack, preserves_flags),
3262 );
3263 dst
3264}
3265
3266#[inline]
3270#[target_feature(enable = "avx2")]
3271#[cfg_attr(test, assert_instr(vpsubw))]
3272#[stable(feature = "simd_x86", since = "1.27.0")]
3273pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3274 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3275}
3276
3277#[inline]
3281#[target_feature(enable = "avx2")]
3282#[cfg_attr(test, assert_instr(vpsubd))]
3283#[stable(feature = "simd_x86", since = "1.27.0")]
3284pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3285 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3286}
3287
3288#[inline]
3292#[target_feature(enable = "avx2")]
3293#[cfg_attr(test, assert_instr(vpsubq))]
3294#[stable(feature = "simd_x86", since = "1.27.0")]
3295pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3296 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3297}
3298
3299#[inline]
3303#[target_feature(enable = "avx2")]
3304#[cfg_attr(test, assert_instr(vpsubb))]
3305#[stable(feature = "simd_x86", since = "1.27.0")]
3306pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3307 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3308}
3309
3310#[inline]
3315#[target_feature(enable = "avx2")]
3316#[cfg_attr(test, assert_instr(vpsubsw))]
3317#[stable(feature = "simd_x86", since = "1.27.0")]
3318pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3319 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3320}
3321
3322#[inline]
3327#[target_feature(enable = "avx2")]
3328#[cfg_attr(test, assert_instr(vpsubsb))]
3329#[stable(feature = "simd_x86", since = "1.27.0")]
3330pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3331 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3332}
3333
3334#[inline]
3339#[target_feature(enable = "avx2")]
3340#[cfg_attr(test, assert_instr(vpsubusw))]
3341#[stable(feature = "simd_x86", since = "1.27.0")]
3342pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3343 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3344}
3345
3346#[inline]
3351#[target_feature(enable = "avx2")]
3352#[cfg_attr(test, assert_instr(vpsubusb))]
3353#[stable(feature = "simd_x86", since = "1.27.0")]
3354pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3355 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3356}
3357
3358#[inline]
3398#[target_feature(enable = "avx2")]
3399#[cfg_attr(test, assert_instr(vpunpckhbw))]
3400#[stable(feature = "simd_x86", since = "1.27.0")]
3401pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3402 unsafe {
3403 #[rustfmt::skip]
3404 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3405 8, 40, 9, 41, 10, 42, 11, 43,
3406 12, 44, 13, 45, 14, 46, 15, 47,
3407 24, 56, 25, 57, 26, 58, 27, 59,
3408 28, 60, 29, 61, 30, 62, 31, 63,
3409 ]);
3410 transmute(r)
3411 }
3412}
3413
3414#[inline]
3453#[target_feature(enable = "avx2")]
3454#[cfg_attr(test, assert_instr(vpunpcklbw))]
3455#[stable(feature = "simd_x86", since = "1.27.0")]
3456pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3457 unsafe {
3458 #[rustfmt::skip]
3459 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3460 0, 32, 1, 33, 2, 34, 3, 35,
3461 4, 36, 5, 37, 6, 38, 7, 39,
3462 16, 48, 17, 49, 18, 50, 19, 51,
3463 20, 52, 21, 53, 22, 54, 23, 55,
3464 ]);
3465 transmute(r)
3466 }
3467}
3468
3469#[inline]
3504#[target_feature(enable = "avx2")]
3505#[cfg_attr(test, assert_instr(vpunpckhwd))]
3506#[stable(feature = "simd_x86", since = "1.27.0")]
3507pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3508 unsafe {
3509 let r: i16x16 = simd_shuffle!(
3510 a.as_i16x16(),
3511 b.as_i16x16(),
3512 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3513 );
3514 transmute(r)
3515 }
3516}
3517
3518#[inline]
3554#[target_feature(enable = "avx2")]
3555#[cfg_attr(test, assert_instr(vpunpcklwd))]
3556#[stable(feature = "simd_x86", since = "1.27.0")]
3557pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3558 unsafe {
3559 let r: i16x16 = simd_shuffle!(
3560 a.as_i16x16(),
3561 b.as_i16x16(),
3562 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3563 );
3564 transmute(r)
3565 }
3566}
3567
3568#[inline]
3597#[target_feature(enable = "avx2")]
3598#[cfg_attr(test, assert_instr(vunpckhps))]
3599#[stable(feature = "simd_x86", since = "1.27.0")]
3600pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3601 unsafe {
3602 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3603 transmute(r)
3604 }
3605}
3606
3607#[inline]
3636#[target_feature(enable = "avx2")]
3637#[cfg_attr(test, assert_instr(vunpcklps))]
3638#[stable(feature = "simd_x86", since = "1.27.0")]
3639pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3640 unsafe {
3641 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3642 transmute(r)
3643 }
3644}
3645
3646#[inline]
3675#[target_feature(enable = "avx2")]
3676#[cfg_attr(test, assert_instr(vunpckhpd))]
3677#[stable(feature = "simd_x86", since = "1.27.0")]
3678pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3679 unsafe {
3680 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3681 transmute(r)
3682 }
3683}
3684
3685#[inline]
3714#[target_feature(enable = "avx2")]
3715#[cfg_attr(test, assert_instr(vunpcklpd))]
3716#[stable(feature = "simd_x86", since = "1.27.0")]
3717pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3718 unsafe {
3719 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3720 transmute(r)
3721 }
3722}
3723
3724#[inline]
3729#[target_feature(enable = "avx2")]
3730#[cfg_attr(test, assert_instr(vxorps))]
3731#[stable(feature = "simd_x86", since = "1.27.0")]
3732pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3733 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3734}
3735
3736#[inline]
3743#[target_feature(enable = "avx2")]
3744#[rustc_legacy_const_generics(1)]
3746#[stable(feature = "simd_x86", since = "1.27.0")]
3747pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3748 static_assert_uimm_bits!(INDEX, 5);
3749 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3750}
3751
3752#[inline]
3759#[target_feature(enable = "avx2")]
3760#[rustc_legacy_const_generics(1)]
3762#[stable(feature = "simd_x86", since = "1.27.0")]
3763pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3764 static_assert_uimm_bits!(INDEX, 4);
3765 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3766}
3767
3768#[allow(improper_ctypes)]
3769unsafe extern "C" {
3770 #[link_name = "llvm.x86.avx2.phadd.w"]
3771 fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3772 #[link_name = "llvm.x86.avx2.phadd.d"]
3773 fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3774 #[link_name = "llvm.x86.avx2.phadd.sw"]
3775 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3776 #[link_name = "llvm.x86.avx2.phsub.w"]
3777 fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3778 #[link_name = "llvm.x86.avx2.phsub.d"]
3779 fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3780 #[link_name = "llvm.x86.avx2.phsub.sw"]
3781 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3782 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3783 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3784 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3785 fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3786 #[link_name = "llvm.x86.avx2.maskload.d"]
3787 fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3788 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3789 fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3790 #[link_name = "llvm.x86.avx2.maskload.q"]
3791 fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3792 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3793 fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3794 #[link_name = "llvm.x86.avx2.maskstore.d"]
3795 fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3796 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3797 fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3798 #[link_name = "llvm.x86.avx2.maskstore.q"]
3799 fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3800 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3801 fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3802 #[link_name = "llvm.x86.avx2.mpsadbw"]
3803 fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3804 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3805 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3806 #[link_name = "llvm.x86.avx2.packsswb"]
3807 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3808 #[link_name = "llvm.x86.avx2.packssdw"]
3809 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3810 #[link_name = "llvm.x86.avx2.packuswb"]
3811 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3812 #[link_name = "llvm.x86.avx2.packusdw"]
3813 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3814 #[link_name = "llvm.x86.avx2.psad.bw"]
3815 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3816 #[link_name = "llvm.x86.avx2.psign.b"]
3817 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3818 #[link_name = "llvm.x86.avx2.psign.w"]
3819 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3820 #[link_name = "llvm.x86.avx2.psign.d"]
3821 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3822 #[link_name = "llvm.x86.avx2.psll.w"]
3823 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3824 #[link_name = "llvm.x86.avx2.psll.d"]
3825 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3826 #[link_name = "llvm.x86.avx2.psll.q"]
3827 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3828 #[link_name = "llvm.x86.avx2.psllv.d"]
3829 fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3830 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3831 fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3832 #[link_name = "llvm.x86.avx2.psllv.q"]
3833 fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3834 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3835 fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3836 #[link_name = "llvm.x86.avx2.psra.w"]
3837 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3838 #[link_name = "llvm.x86.avx2.psra.d"]
3839 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3840 #[link_name = "llvm.x86.avx2.psrav.d"]
3841 fn psravd(a: i32x4, count: i32x4) -> i32x4;
3842 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3843 fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3844 #[link_name = "llvm.x86.avx2.psrl.w"]
3845 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3846 #[link_name = "llvm.x86.avx2.psrl.d"]
3847 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3848 #[link_name = "llvm.x86.avx2.psrl.q"]
3849 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3850 #[link_name = "llvm.x86.avx2.psrlv.d"]
3851 fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3852 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3853 fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3854 #[link_name = "llvm.x86.avx2.psrlv.q"]
3855 fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3856 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3857 fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3858 #[link_name = "llvm.x86.avx2.pshuf.b"]
3859 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3860 #[link_name = "llvm.x86.avx2.permd"]
3861 fn permd(a: u32x8, b: u32x8) -> u32x8;
3862 #[link_name = "llvm.x86.avx2.permps"]
3863 fn permps(a: __m256, b: i32x8) -> __m256;
3864 #[link_name = "llvm.x86.avx2.vperm2i128"]
3865 fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3866 #[link_name = "llvm.x86.avx2.gather.d.d"]
3867 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3868 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3869 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3870 #[link_name = "llvm.x86.avx2.gather.d.q"]
3871 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3872 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3873 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3874 #[link_name = "llvm.x86.avx2.gather.q.d"]
3875 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3876 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3877 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3878 #[link_name = "llvm.x86.avx2.gather.q.q"]
3879 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3880 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3881 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3882 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3883 fn pgatherdpd(
3884 src: __m128d,
3885 slice: *const i8,
3886 offsets: i32x4,
3887 mask: __m128d,
3888 scale: i8,
3889 ) -> __m128d;
3890 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3891 fn vpgatherdpd(
3892 src: __m256d,
3893 slice: *const i8,
3894 offsets: i32x4,
3895 mask: __m256d,
3896 scale: i8,
3897 ) -> __m256d;
3898 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3899 fn pgatherqpd(
3900 src: __m128d,
3901 slice: *const i8,
3902 offsets: i64x2,
3903 mask: __m128d,
3904 scale: i8,
3905 ) -> __m128d;
3906 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3907 fn vpgatherqpd(
3908 src: __m256d,
3909 slice: *const i8,
3910 offsets: i64x4,
3911 mask: __m256d,
3912 scale: i8,
3913 ) -> __m256d;
3914 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3915 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3916 -> __m128;
3917 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3918 fn vpgatherdps(
3919 src: __m256,
3920 slice: *const i8,
3921 offsets: i32x8,
3922 mask: __m256,
3923 scale: i8,
3924 ) -> __m256;
3925 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3926 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3927 -> __m128;
3928 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3929 fn vpgatherqps(
3930 src: __m128,
3931 slice: *const i8,
3932 offsets: i64x4,
3933 mask: __m128,
3934 scale: i8,
3935 ) -> __m128;
3936}
3937
3938#[cfg(test)]
3939mod tests {
3940
3941 use stdarch_test::simd_test;
3942
3943 use crate::core_arch::x86::*;
3944
3945 #[simd_test(enable = "avx2")]
3946 unsafe fn test_mm256_abs_epi32() {
3947 #[rustfmt::skip]
3948 let a = _mm256_setr_epi32(
3949 0, 1, -1, i32::MAX,
3950 i32::MIN, 100, -100, -32,
3951 );
3952 let r = _mm256_abs_epi32(a);
3953 #[rustfmt::skip]
3954 let e = _mm256_setr_epi32(
3955 0, 1, 1, i32::MAX,
3956 i32::MAX.wrapping_add(1), 100, 100, 32,
3957 );
3958 assert_eq_m256i(r, e);
3959 }
3960
3961 #[simd_test(enable = "avx2")]
3962 unsafe fn test_mm256_abs_epi16() {
3963 #[rustfmt::skip]
3964 let a = _mm256_setr_epi16(
3965 0, 1, -1, 2, -2, 3, -3, 4,
3966 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3967 );
3968 let r = _mm256_abs_epi16(a);
3969 #[rustfmt::skip]
3970 let e = _mm256_setr_epi16(
3971 0, 1, 1, 2, 2, 3, 3, 4,
3972 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3973 );
3974 assert_eq_m256i(r, e);
3975 }
3976
3977 #[simd_test(enable = "avx2")]
3978 unsafe fn test_mm256_abs_epi8() {
3979 #[rustfmt::skip]
3980 let a = _mm256_setr_epi8(
3981 0, 1, -1, 2, -2, 3, -3, 4,
3982 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3983 0, 1, -1, 2, -2, 3, -3, 4,
3984 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3985 );
3986 let r = _mm256_abs_epi8(a);
3987 #[rustfmt::skip]
3988 let e = _mm256_setr_epi8(
3989 0, 1, 1, 2, 2, 3, 3, 4,
3990 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3991 0, 1, 1, 2, 2, 3, 3, 4,
3992 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3993 );
3994 assert_eq_m256i(r, e);
3995 }
3996
3997 #[simd_test(enable = "avx2")]
3998 unsafe fn test_mm256_add_epi64() {
3999 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4000 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
4001 let r = _mm256_add_epi64(a, b);
4002 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4003 assert_eq_m256i(r, e);
4004 }
4005
4006 #[simd_test(enable = "avx2")]
4007 unsafe fn test_mm256_add_epi32() {
4008 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4009 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4010 let r = _mm256_add_epi32(a, b);
4011 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4012 assert_eq_m256i(r, e);
4013 }
4014
4015 #[simd_test(enable = "avx2")]
4016 unsafe fn test_mm256_add_epi16() {
4017 #[rustfmt::skip]
4018 let a = _mm256_setr_epi16(
4019 0, 1, 2, 3, 4, 5, 6, 7,
4020 8, 9, 10, 11, 12, 13, 14, 15,
4021 );
4022 #[rustfmt::skip]
4023 let b = _mm256_setr_epi16(
4024 0, 1, 2, 3, 4, 5, 6, 7,
4025 8, 9, 10, 11, 12, 13, 14, 15,
4026 );
4027 let r = _mm256_add_epi16(a, b);
4028 #[rustfmt::skip]
4029 let e = _mm256_setr_epi16(
4030 0, 2, 4, 6, 8, 10, 12, 14,
4031 16, 18, 20, 22, 24, 26, 28, 30,
4032 );
4033 assert_eq_m256i(r, e);
4034 }
4035
4036 #[simd_test(enable = "avx2")]
4037 unsafe fn test_mm256_add_epi8() {
4038 #[rustfmt::skip]
4039 let a = _mm256_setr_epi8(
4040 0, 1, 2, 3, 4, 5, 6, 7,
4041 8, 9, 10, 11, 12, 13, 14, 15,
4042 16, 17, 18, 19, 20, 21, 22, 23,
4043 24, 25, 26, 27, 28, 29, 30, 31,
4044 );
4045 #[rustfmt::skip]
4046 let b = _mm256_setr_epi8(
4047 0, 1, 2, 3, 4, 5, 6, 7,
4048 8, 9, 10, 11, 12, 13, 14, 15,
4049 16, 17, 18, 19, 20, 21, 22, 23,
4050 24, 25, 26, 27, 28, 29, 30, 31,
4051 );
4052 let r = _mm256_add_epi8(a, b);
4053 #[rustfmt::skip]
4054 let e = _mm256_setr_epi8(
4055 0, 2, 4, 6, 8, 10, 12, 14,
4056 16, 18, 20, 22, 24, 26, 28, 30,
4057 32, 34, 36, 38, 40, 42, 44, 46,
4058 48, 50, 52, 54, 56, 58, 60, 62,
4059 );
4060 assert_eq_m256i(r, e);
4061 }
4062
4063 #[simd_test(enable = "avx2")]
4064 unsafe fn test_mm256_adds_epi8() {
4065 #[rustfmt::skip]
4066 let a = _mm256_setr_epi8(
4067 0, 1, 2, 3, 4, 5, 6, 7,
4068 8, 9, 10, 11, 12, 13, 14, 15,
4069 16, 17, 18, 19, 20, 21, 22, 23,
4070 24, 25, 26, 27, 28, 29, 30, 31,
4071 );
4072 #[rustfmt::skip]
4073 let b = _mm256_setr_epi8(
4074 32, 33, 34, 35, 36, 37, 38, 39,
4075 40, 41, 42, 43, 44, 45, 46, 47,
4076 48, 49, 50, 51, 52, 53, 54, 55,
4077 56, 57, 58, 59, 60, 61, 62, 63,
4078 );
4079 let r = _mm256_adds_epi8(a, b);
4080 #[rustfmt::skip]
4081 let e = _mm256_setr_epi8(
4082 32, 34, 36, 38, 40, 42, 44, 46,
4083 48, 50, 52, 54, 56, 58, 60, 62,
4084 64, 66, 68, 70, 72, 74, 76, 78,
4085 80, 82, 84, 86, 88, 90, 92, 94,
4086 );
4087 assert_eq_m256i(r, e);
4088 }
4089
4090 #[simd_test(enable = "avx2")]
4091 unsafe fn test_mm256_adds_epi8_saturate_positive() {
4092 let a = _mm256_set1_epi8(0x7F);
4093 let b = _mm256_set1_epi8(1);
4094 let r = _mm256_adds_epi8(a, b);
4095 assert_eq_m256i(r, a);
4096 }
4097
4098 #[simd_test(enable = "avx2")]
4099 unsafe fn test_mm256_adds_epi8_saturate_negative() {
4100 let a = _mm256_set1_epi8(-0x80);
4101 let b = _mm256_set1_epi8(-1);
4102 let r = _mm256_adds_epi8(a, b);
4103 assert_eq_m256i(r, a);
4104 }
4105
4106 #[simd_test(enable = "avx2")]
4107 unsafe fn test_mm256_adds_epi16() {
4108 #[rustfmt::skip]
4109 let a = _mm256_setr_epi16(
4110 0, 1, 2, 3, 4, 5, 6, 7,
4111 8, 9, 10, 11, 12, 13, 14, 15,
4112 );
4113 #[rustfmt::skip]
4114 let b = _mm256_setr_epi16(
4115 32, 33, 34, 35, 36, 37, 38, 39,
4116 40, 41, 42, 43, 44, 45, 46, 47,
4117 );
4118 let r = _mm256_adds_epi16(a, b);
4119 #[rustfmt::skip]
4120 let e = _mm256_setr_epi16(
4121 32, 34, 36, 38, 40, 42, 44, 46,
4122 48, 50, 52, 54, 56, 58, 60, 62,
4123 );
4124
4125 assert_eq_m256i(r, e);
4126 }
4127
4128 #[simd_test(enable = "avx2")]
4129 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4130 let a = _mm256_set1_epi16(0x7FFF);
4131 let b = _mm256_set1_epi16(1);
4132 let r = _mm256_adds_epi16(a, b);
4133 assert_eq_m256i(r, a);
4134 }
4135
4136 #[simd_test(enable = "avx2")]
4137 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4138 let a = _mm256_set1_epi16(-0x8000);
4139 let b = _mm256_set1_epi16(-1);
4140 let r = _mm256_adds_epi16(a, b);
4141 assert_eq_m256i(r, a);
4142 }
4143
4144 #[simd_test(enable = "avx2")]
4145 unsafe fn test_mm256_adds_epu8() {
4146 #[rustfmt::skip]
4147 let a = _mm256_setr_epi8(
4148 0, 1, 2, 3, 4, 5, 6, 7,
4149 8, 9, 10, 11, 12, 13, 14, 15,
4150 16, 17, 18, 19, 20, 21, 22, 23,
4151 24, 25, 26, 27, 28, 29, 30, 31,
4152 );
4153 #[rustfmt::skip]
4154 let b = _mm256_setr_epi8(
4155 32, 33, 34, 35, 36, 37, 38, 39,
4156 40, 41, 42, 43, 44, 45, 46, 47,
4157 48, 49, 50, 51, 52, 53, 54, 55,
4158 56, 57, 58, 59, 60, 61, 62, 63,
4159 );
4160 let r = _mm256_adds_epu8(a, b);
4161 #[rustfmt::skip]
4162 let e = _mm256_setr_epi8(
4163 32, 34, 36, 38, 40, 42, 44, 46,
4164 48, 50, 52, 54, 56, 58, 60, 62,
4165 64, 66, 68, 70, 72, 74, 76, 78,
4166 80, 82, 84, 86, 88, 90, 92, 94,
4167 );
4168 assert_eq_m256i(r, e);
4169 }
4170
4171 #[simd_test(enable = "avx2")]
4172 unsafe fn test_mm256_adds_epu8_saturate() {
4173 let a = _mm256_set1_epi8(!0);
4174 let b = _mm256_set1_epi8(1);
4175 let r = _mm256_adds_epu8(a, b);
4176 assert_eq_m256i(r, a);
4177 }
4178
4179 #[simd_test(enable = "avx2")]
4180 unsafe fn test_mm256_adds_epu16() {
4181 #[rustfmt::skip]
4182 let a = _mm256_setr_epi16(
4183 0, 1, 2, 3, 4, 5, 6, 7,
4184 8, 9, 10, 11, 12, 13, 14, 15,
4185 );
4186 #[rustfmt::skip]
4187 let b = _mm256_setr_epi16(
4188 32, 33, 34, 35, 36, 37, 38, 39,
4189 40, 41, 42, 43, 44, 45, 46, 47,
4190 );
4191 let r = _mm256_adds_epu16(a, b);
4192 #[rustfmt::skip]
4193 let e = _mm256_setr_epi16(
4194 32, 34, 36, 38, 40, 42, 44, 46,
4195 48, 50, 52, 54, 56, 58, 60, 62,
4196 );
4197
4198 assert_eq_m256i(r, e);
4199 }
4200
4201 #[simd_test(enable = "avx2")]
4202 unsafe fn test_mm256_adds_epu16_saturate() {
4203 let a = _mm256_set1_epi16(!0);
4204 let b = _mm256_set1_epi16(1);
4205 let r = _mm256_adds_epu16(a, b);
4206 assert_eq_m256i(r, a);
4207 }
4208
4209 #[simd_test(enable = "avx2")]
4210 unsafe fn test_mm256_and_si256() {
4211 let a = _mm256_set1_epi8(5);
4212 let b = _mm256_set1_epi8(3);
4213 let got = _mm256_and_si256(a, b);
4214 assert_eq_m256i(got, _mm256_set1_epi8(1));
4215 }
4216
4217 #[simd_test(enable = "avx2")]
4218 unsafe fn test_mm256_andnot_si256() {
4219 let a = _mm256_set1_epi8(5);
4220 let b = _mm256_set1_epi8(3);
4221 let got = _mm256_andnot_si256(a, b);
4222 assert_eq_m256i(got, _mm256_set1_epi8(2));
4223 }
4224
4225 #[simd_test(enable = "avx2")]
4226 unsafe fn test_mm256_avg_epu8() {
4227 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4228 let r = _mm256_avg_epu8(a, b);
4229 assert_eq_m256i(r, _mm256_set1_epi8(6));
4230 }
4231
4232 #[simd_test(enable = "avx2")]
4233 unsafe fn test_mm256_avg_epu16() {
4234 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4235 let r = _mm256_avg_epu16(a, b);
4236 assert_eq_m256i(r, _mm256_set1_epi16(6));
4237 }
4238
4239 #[simd_test(enable = "avx2")]
4240 unsafe fn test_mm_blend_epi32() {
4241 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4242 let e = _mm_setr_epi32(9, 3, 3, 3);
4243 let r = _mm_blend_epi32::<0x01>(a, b);
4244 assert_eq_m128i(r, e);
4245
4246 let r = _mm_blend_epi32::<0x0E>(b, a);
4247 assert_eq_m128i(r, e);
4248 }
4249
4250 #[simd_test(enable = "avx2")]
4251 unsafe fn test_mm256_blend_epi32() {
4252 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4253 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4254 let r = _mm256_blend_epi32::<0x01>(a, b);
4255 assert_eq_m256i(r, e);
4256
4257 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4258 let r = _mm256_blend_epi32::<0x82>(a, b);
4259 assert_eq_m256i(r, e);
4260
4261 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4262 let r = _mm256_blend_epi32::<0x7C>(a, b);
4263 assert_eq_m256i(r, e);
4264 }
4265
4266 #[simd_test(enable = "avx2")]
4267 unsafe fn test_mm256_blend_epi16() {
4268 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4269 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4270 let r = _mm256_blend_epi16::<0x01>(a, b);
4271 assert_eq_m256i(r, e);
4272
4273 let r = _mm256_blend_epi16::<0xFE>(b, a);
4274 assert_eq_m256i(r, e);
4275 }
4276
4277 #[simd_test(enable = "avx2")]
4278 unsafe fn test_mm256_blendv_epi8() {
4279 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4280 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4281 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4282 let r = _mm256_blendv_epi8(a, b, mask);
4283 assert_eq_m256i(r, e);
4284 }
4285
4286 #[simd_test(enable = "avx2")]
4287 unsafe fn test_mm_broadcastb_epi8() {
4288 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4289 let res = _mm_broadcastb_epi8(a);
4290 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4291 }
4292
4293 #[simd_test(enable = "avx2")]
4294 unsafe fn test_mm256_broadcastb_epi8() {
4295 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4296 let res = _mm256_broadcastb_epi8(a);
4297 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4298 }
4299
4300 #[simd_test(enable = "avx2")]
4301 unsafe fn test_mm_broadcastd_epi32() {
4302 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4303 let res = _mm_broadcastd_epi32(a);
4304 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4305 }
4306
4307 #[simd_test(enable = "avx2")]
4308 unsafe fn test_mm256_broadcastd_epi32() {
4309 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4310 let res = _mm256_broadcastd_epi32(a);
4311 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4312 }
4313
4314 #[simd_test(enable = "avx2")]
4315 unsafe fn test_mm_broadcastq_epi64() {
4316 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4317 let res = _mm_broadcastq_epi64(a);
4318 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4319 }
4320
4321 #[simd_test(enable = "avx2")]
4322 unsafe fn test_mm256_broadcastq_epi64() {
4323 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4324 let res = _mm256_broadcastq_epi64(a);
4325 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4326 }
4327
4328 #[simd_test(enable = "avx2")]
4329 unsafe fn test_mm_broadcastsd_pd() {
4330 let a = _mm_setr_pd(6.88, 3.44);
4331 let res = _mm_broadcastsd_pd(a);
4332 assert_eq_m128d(res, _mm_set1_pd(6.88));
4333 }
4334
4335 #[simd_test(enable = "avx2")]
4336 unsafe fn test_mm256_broadcastsd_pd() {
4337 let a = _mm_setr_pd(6.88, 3.44);
4338 let res = _mm256_broadcastsd_pd(a);
4339 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4340 }
4341
4342 #[simd_test(enable = "avx2")]
4343 unsafe fn test_mm_broadcastsi128_si256() {
4344 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4345 let res = _mm_broadcastsi128_si256(a);
4346 let retval = _mm256_setr_epi64x(
4347 0x0987654321012334,
4348 0x5678909876543210,
4349 0x0987654321012334,
4350 0x5678909876543210,
4351 );
4352 assert_eq_m256i(res, retval);
4353 }
4354
4355 #[simd_test(enable = "avx2")]
4356 unsafe fn test_mm256_broadcastsi128_si256() {
4357 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4358 let res = _mm256_broadcastsi128_si256(a);
4359 let retval = _mm256_setr_epi64x(
4360 0x0987654321012334,
4361 0x5678909876543210,
4362 0x0987654321012334,
4363 0x5678909876543210,
4364 );
4365 assert_eq_m256i(res, retval);
4366 }
4367
4368 #[simd_test(enable = "avx2")]
4369 unsafe fn test_mm_broadcastss_ps() {
4370 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4371 let res = _mm_broadcastss_ps(a);
4372 assert_eq_m128(res, _mm_set1_ps(6.88));
4373 }
4374
4375 #[simd_test(enable = "avx2")]
4376 unsafe fn test_mm256_broadcastss_ps() {
4377 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4378 let res = _mm256_broadcastss_ps(a);
4379 assert_eq_m256(res, _mm256_set1_ps(6.88));
4380 }
4381
4382 #[simd_test(enable = "avx2")]
4383 unsafe fn test_mm_broadcastw_epi16() {
4384 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4385 let res = _mm_broadcastw_epi16(a);
4386 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4387 }
4388
4389 #[simd_test(enable = "avx2")]
4390 unsafe fn test_mm256_broadcastw_epi16() {
4391 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4392 let res = _mm256_broadcastw_epi16(a);
4393 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4394 }
4395
4396 #[simd_test(enable = "avx2")]
4397 unsafe fn test_mm256_cmpeq_epi8() {
4398 #[rustfmt::skip]
4399 let a = _mm256_setr_epi8(
4400 0, 1, 2, 3, 4, 5, 6, 7,
4401 8, 9, 10, 11, 12, 13, 14, 15,
4402 16, 17, 18, 19, 20, 21, 22, 23,
4403 24, 25, 26, 27, 28, 29, 30, 31,
4404 );
4405 #[rustfmt::skip]
4406 let b = _mm256_setr_epi8(
4407 31, 30, 2, 28, 27, 26, 25, 24,
4408 23, 22, 21, 20, 19, 18, 17, 16,
4409 15, 14, 13, 12, 11, 10, 9, 8,
4410 7, 6, 5, 4, 3, 2, 1, 0,
4411 );
4412 let r = _mm256_cmpeq_epi8(a, b);
4413 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4414 }
4415
4416 #[simd_test(enable = "avx2")]
4417 unsafe fn test_mm256_cmpeq_epi16() {
4418 #[rustfmt::skip]
4419 let a = _mm256_setr_epi16(
4420 0, 1, 2, 3, 4, 5, 6, 7,
4421 8, 9, 10, 11, 12, 13, 14, 15,
4422 );
4423 #[rustfmt::skip]
4424 let b = _mm256_setr_epi16(
4425 15, 14, 2, 12, 11, 10, 9, 8,
4426 7, 6, 5, 4, 3, 2, 1, 0,
4427 );
4428 let r = _mm256_cmpeq_epi16(a, b);
4429 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4430 }
4431
4432 #[simd_test(enable = "avx2")]
4433 unsafe fn test_mm256_cmpeq_epi32() {
4434 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4435 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4436 let r = _mm256_cmpeq_epi32(a, b);
4437 let e = _mm256_set1_epi32(0);
4438 let e = _mm256_insert_epi32::<2>(e, !0);
4439 assert_eq_m256i(r, e);
4440 }
4441
4442 #[simd_test(enable = "avx2")]
4443 unsafe fn test_mm256_cmpeq_epi64() {
4444 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4445 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4446 let r = _mm256_cmpeq_epi64(a, b);
4447 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4448 }
4449
4450 #[simd_test(enable = "avx2")]
4451 unsafe fn test_mm256_cmpgt_epi8() {
4452 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4453 let b = _mm256_set1_epi8(0);
4454 let r = _mm256_cmpgt_epi8(a, b);
4455 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4456 }
4457
4458 #[simd_test(enable = "avx2")]
4459 unsafe fn test_mm256_cmpgt_epi16() {
4460 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4461 let b = _mm256_set1_epi16(0);
4462 let r = _mm256_cmpgt_epi16(a, b);
4463 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4464 }
4465
4466 #[simd_test(enable = "avx2")]
4467 unsafe fn test_mm256_cmpgt_epi32() {
4468 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4469 let b = _mm256_set1_epi32(0);
4470 let r = _mm256_cmpgt_epi32(a, b);
4471 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4472 }
4473
4474 #[simd_test(enable = "avx2")]
4475 unsafe fn test_mm256_cmpgt_epi64() {
4476 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4477 let b = _mm256_set1_epi64x(0);
4478 let r = _mm256_cmpgt_epi64(a, b);
4479 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4480 }
4481
4482 #[simd_test(enable = "avx2")]
4483 unsafe fn test_mm256_cvtepi8_epi16() {
4484 #[rustfmt::skip]
4485 let a = _mm_setr_epi8(
4486 0, 0, -1, 1, -2, 2, -3, 3,
4487 -4, 4, -5, 5, -6, 6, -7, 7,
4488 );
4489 #[rustfmt::skip]
4490 let r = _mm256_setr_epi16(
4491 0, 0, -1, 1, -2, 2, -3, 3,
4492 -4, 4, -5, 5, -6, 6, -7, 7,
4493 );
4494 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4495 }
4496
4497 #[simd_test(enable = "avx2")]
4498 unsafe fn test_mm256_cvtepi8_epi32() {
4499 #[rustfmt::skip]
4500 let a = _mm_setr_epi8(
4501 0, 0, -1, 1, -2, 2, -3, 3,
4502 -4, 4, -5, 5, -6, 6, -7, 7,
4503 );
4504 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4505 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4506 }
4507
4508 #[simd_test(enable = "avx2")]
4509 unsafe fn test_mm256_cvtepi8_epi64() {
4510 #[rustfmt::skip]
4511 let a = _mm_setr_epi8(
4512 0, 0, -1, 1, -2, 2, -3, 3,
4513 -4, 4, -5, 5, -6, 6, -7, 7,
4514 );
4515 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4516 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4517 }
4518
4519 #[simd_test(enable = "avx2")]
4520 unsafe fn test_mm256_cvtepi16_epi32() {
4521 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4522 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4523 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4524 }
4525
4526 #[simd_test(enable = "avx2")]
4527 unsafe fn test_mm256_cvtepi16_epi64() {
4528 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4529 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4530 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4531 }
4532
4533 #[simd_test(enable = "avx2")]
4534 unsafe fn test_mm256_cvtepi32_epi64() {
4535 let a = _mm_setr_epi32(0, 0, -1, 1);
4536 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4537 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4538 }
4539
4540 #[simd_test(enable = "avx2")]
4541 unsafe fn test_mm256_cvtepu16_epi32() {
4542 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4543 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4544 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4545 }
4546
4547 #[simd_test(enable = "avx2")]
4548 unsafe fn test_mm256_cvtepu16_epi64() {
4549 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4550 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4551 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4552 }
4553
4554 #[simd_test(enable = "avx2")]
4555 unsafe fn test_mm256_cvtepu32_epi64() {
4556 let a = _mm_setr_epi32(0, 1, 2, 3);
4557 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4558 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4559 }
4560
4561 #[simd_test(enable = "avx2")]
4562 unsafe fn test_mm256_cvtepu8_epi16() {
4563 #[rustfmt::skip]
4564 let a = _mm_setr_epi8(
4565 0, 1, 2, 3, 4, 5, 6, 7,
4566 8, 9, 10, 11, 12, 13, 14, 15,
4567 );
4568 #[rustfmt::skip]
4569 let r = _mm256_setr_epi16(
4570 0, 1, 2, 3, 4, 5, 6, 7,
4571 8, 9, 10, 11, 12, 13, 14, 15,
4572 );
4573 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4574 }
4575
4576 #[simd_test(enable = "avx2")]
4577 unsafe fn test_mm256_cvtepu8_epi32() {
4578 #[rustfmt::skip]
4579 let a = _mm_setr_epi8(
4580 0, 1, 2, 3, 4, 5, 6, 7,
4581 8, 9, 10, 11, 12, 13, 14, 15,
4582 );
4583 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4584 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4585 }
4586
4587 #[simd_test(enable = "avx2")]
4588 unsafe fn test_mm256_cvtepu8_epi64() {
4589 #[rustfmt::skip]
4590 let a = _mm_setr_epi8(
4591 0, 1, 2, 3, 4, 5, 6, 7,
4592 8, 9, 10, 11, 12, 13, 14, 15,
4593 );
4594 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4595 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4596 }
4597
4598 #[simd_test(enable = "avx2")]
4599 unsafe fn test_mm256_extracti128_si256() {
4600 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4601 let r = _mm256_extracti128_si256::<1>(a);
4602 let e = _mm_setr_epi64x(3, 4);
4603 assert_eq_m128i(r, e);
4604 }
4605
4606 #[simd_test(enable = "avx2")]
4607 unsafe fn test_mm256_hadd_epi16() {
4608 let a = _mm256_set1_epi16(2);
4609 let b = _mm256_set1_epi16(4);
4610 let r = _mm256_hadd_epi16(a, b);
4611 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4612 assert_eq_m256i(r, e);
4613 }
4614
4615 #[simd_test(enable = "avx2")]
4616 unsafe fn test_mm256_hadd_epi32() {
4617 let a = _mm256_set1_epi32(2);
4618 let b = _mm256_set1_epi32(4);
4619 let r = _mm256_hadd_epi32(a, b);
4620 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4621 assert_eq_m256i(r, e);
4622 }
4623
4624 #[simd_test(enable = "avx2")]
4625 unsafe fn test_mm256_hadds_epi16() {
4626 let a = _mm256_set1_epi16(2);
4627 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4628 let a = _mm256_insert_epi16::<1>(a, 1);
4629 let b = _mm256_set1_epi16(4);
4630 let r = _mm256_hadds_epi16(a, b);
4631 #[rustfmt::skip]
4632 let e = _mm256_setr_epi16(
4633 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4634 4, 4, 4, 4, 8, 8, 8, 8,
4635 );
4636 assert_eq_m256i(r, e);
4637 }
4638
4639 #[simd_test(enable = "avx2")]
4640 unsafe fn test_mm256_hsub_epi16() {
4641 let a = _mm256_set1_epi16(2);
4642 let b = _mm256_set1_epi16(4);
4643 let r = _mm256_hsub_epi16(a, b);
4644 let e = _mm256_set1_epi16(0);
4645 assert_eq_m256i(r, e);
4646 }
4647
4648 #[simd_test(enable = "avx2")]
4649 unsafe fn test_mm256_hsub_epi32() {
4650 let a = _mm256_set1_epi32(2);
4651 let b = _mm256_set1_epi32(4);
4652 let r = _mm256_hsub_epi32(a, b);
4653 let e = _mm256_set1_epi32(0);
4654 assert_eq_m256i(r, e);
4655 }
4656
4657 #[simd_test(enable = "avx2")]
4658 unsafe fn test_mm256_hsubs_epi16() {
4659 let a = _mm256_set1_epi16(2);
4660 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4661 let a = _mm256_insert_epi16::<1>(a, -1);
4662 let b = _mm256_set1_epi16(4);
4663 let r = _mm256_hsubs_epi16(a, b);
4664 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4665 assert_eq_m256i(r, e);
4666 }
4667
4668 #[simd_test(enable = "avx2")]
4669 unsafe fn test_mm256_madd_epi16() {
4670 let a = _mm256_set1_epi16(2);
4671 let b = _mm256_set1_epi16(4);
4672 let r = _mm256_madd_epi16(a, b);
4673 let e = _mm256_set1_epi32(16);
4674 assert_eq_m256i(r, e);
4675 }
4676
4677 #[simd_test(enable = "avx2")]
4678 unsafe fn test_mm256_inserti128_si256() {
4679 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4680 let b = _mm_setr_epi64x(7, 8);
4681 let r = _mm256_inserti128_si256::<1>(a, b);
4682 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4683 assert_eq_m256i(r, e);
4684 }
4685
4686 #[simd_test(enable = "avx2")]
4687 unsafe fn test_mm256_maddubs_epi16() {
4688 let a = _mm256_set1_epi8(2);
4689 let b = _mm256_set1_epi8(4);
4690 let r = _mm256_maddubs_epi16(a, b);
4691 let e = _mm256_set1_epi16(16);
4692 assert_eq_m256i(r, e);
4693 }
4694
4695 #[simd_test(enable = "avx2")]
4696 unsafe fn test_mm_maskload_epi32() {
4697 let nums = [1, 2, 3, 4];
4698 let a = &nums as *const i32;
4699 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4700 let r = _mm_maskload_epi32(a, mask);
4701 let e = _mm_setr_epi32(1, 0, 0, 4);
4702 assert_eq_m128i(r, e);
4703 }
4704
4705 #[simd_test(enable = "avx2")]
4706 unsafe fn test_mm256_maskload_epi32() {
4707 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4708 let a = &nums as *const i32;
4709 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4710 let r = _mm256_maskload_epi32(a, mask);
4711 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4712 assert_eq_m256i(r, e);
4713 }
4714
4715 #[simd_test(enable = "avx2")]
4716 unsafe fn test_mm_maskload_epi64() {
4717 let nums = [1_i64, 2_i64];
4718 let a = &nums as *const i64;
4719 let mask = _mm_setr_epi64x(0, -1);
4720 let r = _mm_maskload_epi64(a, mask);
4721 let e = _mm_setr_epi64x(0, 2);
4722 assert_eq_m128i(r, e);
4723 }
4724
4725 #[simd_test(enable = "avx2")]
4726 unsafe fn test_mm256_maskload_epi64() {
4727 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4728 let a = &nums as *const i64;
4729 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4730 let r = _mm256_maskload_epi64(a, mask);
4731 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4732 assert_eq_m256i(r, e);
4733 }
4734
4735 #[simd_test(enable = "avx2")]
4736 unsafe fn test_mm_maskstore_epi32() {
4737 let a = _mm_setr_epi32(1, 2, 3, 4);
4738 let mut arr = [-1, -1, -1, -1];
4739 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4740 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4741 let e = [1, -1, -1, 4];
4742 assert_eq!(arr, e);
4743 }
4744
4745 #[simd_test(enable = "avx2")]
4746 unsafe fn test_mm256_maskstore_epi32() {
4747 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4748 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4749 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4750 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4751 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4752 assert_eq!(arr, e);
4753 }
4754
4755 #[simd_test(enable = "avx2")]
4756 unsafe fn test_mm_maskstore_epi64() {
4757 let a = _mm_setr_epi64x(1_i64, 2_i64);
4758 let mut arr = [-1_i64, -1_i64];
4759 let mask = _mm_setr_epi64x(0, -1);
4760 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4761 let e = [-1, 2];
4762 assert_eq!(arr, e);
4763 }
4764
4765 #[simd_test(enable = "avx2")]
4766 unsafe fn test_mm256_maskstore_epi64() {
4767 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4768 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4769 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4770 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4771 let e = [-1, 2, 3, -1];
4772 assert_eq!(arr, e);
4773 }
4774
4775 #[simd_test(enable = "avx2")]
4776 unsafe fn test_mm256_max_epi16() {
4777 let a = _mm256_set1_epi16(2);
4778 let b = _mm256_set1_epi16(4);
4779 let r = _mm256_max_epi16(a, b);
4780 assert_eq_m256i(r, b);
4781 }
4782
4783 #[simd_test(enable = "avx2")]
4784 unsafe fn test_mm256_max_epi32() {
4785 let a = _mm256_set1_epi32(2);
4786 let b = _mm256_set1_epi32(4);
4787 let r = _mm256_max_epi32(a, b);
4788 assert_eq_m256i(r, b);
4789 }
4790
4791 #[simd_test(enable = "avx2")]
4792 unsafe fn test_mm256_max_epi8() {
4793 let a = _mm256_set1_epi8(2);
4794 let b = _mm256_set1_epi8(4);
4795 let r = _mm256_max_epi8(a, b);
4796 assert_eq_m256i(r, b);
4797 }
4798
4799 #[simd_test(enable = "avx2")]
4800 unsafe fn test_mm256_max_epu16() {
4801 let a = _mm256_set1_epi16(2);
4802 let b = _mm256_set1_epi16(4);
4803 let r = _mm256_max_epu16(a, b);
4804 assert_eq_m256i(r, b);
4805 }
4806
4807 #[simd_test(enable = "avx2")]
4808 unsafe fn test_mm256_max_epu32() {
4809 let a = _mm256_set1_epi32(2);
4810 let b = _mm256_set1_epi32(4);
4811 let r = _mm256_max_epu32(a, b);
4812 assert_eq_m256i(r, b);
4813 }
4814
4815 #[simd_test(enable = "avx2")]
4816 unsafe fn test_mm256_max_epu8() {
4817 let a = _mm256_set1_epi8(2);
4818 let b = _mm256_set1_epi8(4);
4819 let r = _mm256_max_epu8(a, b);
4820 assert_eq_m256i(r, b);
4821 }
4822
4823 #[simd_test(enable = "avx2")]
4824 unsafe fn test_mm256_min_epi16() {
4825 let a = _mm256_set1_epi16(2);
4826 let b = _mm256_set1_epi16(4);
4827 let r = _mm256_min_epi16(a, b);
4828 assert_eq_m256i(r, a);
4829 }
4830
4831 #[simd_test(enable = "avx2")]
4832 unsafe fn test_mm256_min_epi32() {
4833 let a = _mm256_set1_epi32(2);
4834 let b = _mm256_set1_epi32(4);
4835 let r = _mm256_min_epi32(a, b);
4836 assert_eq_m256i(r, a);
4837 }
4838
4839 #[simd_test(enable = "avx2")]
4840 unsafe fn test_mm256_min_epi8() {
4841 let a = _mm256_set1_epi8(2);
4842 let b = _mm256_set1_epi8(4);
4843 let r = _mm256_min_epi8(a, b);
4844 assert_eq_m256i(r, a);
4845 }
4846
4847 #[simd_test(enable = "avx2")]
4848 unsafe fn test_mm256_min_epu16() {
4849 let a = _mm256_set1_epi16(2);
4850 let b = _mm256_set1_epi16(4);
4851 let r = _mm256_min_epu16(a, b);
4852 assert_eq_m256i(r, a);
4853 }
4854
4855 #[simd_test(enable = "avx2")]
4856 unsafe fn test_mm256_min_epu32() {
4857 let a = _mm256_set1_epi32(2);
4858 let b = _mm256_set1_epi32(4);
4859 let r = _mm256_min_epu32(a, b);
4860 assert_eq_m256i(r, a);
4861 }
4862
4863 #[simd_test(enable = "avx2")]
4864 unsafe fn test_mm256_min_epu8() {
4865 let a = _mm256_set1_epi8(2);
4866 let b = _mm256_set1_epi8(4);
4867 let r = _mm256_min_epu8(a, b);
4868 assert_eq_m256i(r, a);
4869 }
4870
4871 #[simd_test(enable = "avx2")]
4872 unsafe fn test_mm256_movemask_epi8() {
4873 let a = _mm256_set1_epi8(-1);
4874 let r = _mm256_movemask_epi8(a);
4875 let e = -1;
4876 assert_eq!(r, e);
4877 }
4878
4879 #[simd_test(enable = "avx2")]
4880 unsafe fn test_mm256_mpsadbw_epu8() {
4881 let a = _mm256_set1_epi8(2);
4882 let b = _mm256_set1_epi8(4);
4883 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4884 let e = _mm256_set1_epi16(8);
4885 assert_eq_m256i(r, e);
4886 }
4887
4888 #[simd_test(enable = "avx2")]
4889 unsafe fn test_mm256_mul_epi32() {
4890 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4891 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4892 let r = _mm256_mul_epi32(a, b);
4893 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4894 assert_eq_m256i(r, e);
4895 }
4896
4897 #[simd_test(enable = "avx2")]
4898 unsafe fn test_mm256_mul_epu32() {
4899 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4900 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4901 let r = _mm256_mul_epu32(a, b);
4902 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4903 assert_eq_m256i(r, e);
4904 }
4905
4906 #[simd_test(enable = "avx2")]
4907 unsafe fn test_mm256_mulhi_epi16() {
4908 let a = _mm256_set1_epi16(6535);
4909 let b = _mm256_set1_epi16(6535);
4910 let r = _mm256_mulhi_epi16(a, b);
4911 let e = _mm256_set1_epi16(651);
4912 assert_eq_m256i(r, e);
4913 }
4914
4915 #[simd_test(enable = "avx2")]
4916 unsafe fn test_mm256_mulhi_epu16() {
4917 let a = _mm256_set1_epi16(6535);
4918 let b = _mm256_set1_epi16(6535);
4919 let r = _mm256_mulhi_epu16(a, b);
4920 let e = _mm256_set1_epi16(651);
4921 assert_eq_m256i(r, e);
4922 }
4923
4924 #[simd_test(enable = "avx2")]
4925 unsafe fn test_mm256_mullo_epi16() {
4926 let a = _mm256_set1_epi16(2);
4927 let b = _mm256_set1_epi16(4);
4928 let r = _mm256_mullo_epi16(a, b);
4929 let e = _mm256_set1_epi16(8);
4930 assert_eq_m256i(r, e);
4931 }
4932
4933 #[simd_test(enable = "avx2")]
4934 unsafe fn test_mm256_mullo_epi32() {
4935 let a = _mm256_set1_epi32(2);
4936 let b = _mm256_set1_epi32(4);
4937 let r = _mm256_mullo_epi32(a, b);
4938 let e = _mm256_set1_epi32(8);
4939 assert_eq_m256i(r, e);
4940 }
4941
4942 #[simd_test(enable = "avx2")]
4943 unsafe fn test_mm256_mulhrs_epi16() {
4944 let a = _mm256_set1_epi16(2);
4945 let b = _mm256_set1_epi16(4);
4946 let r = _mm256_mullo_epi16(a, b);
4947 let e = _mm256_set1_epi16(8);
4948 assert_eq_m256i(r, e);
4949 }
4950
4951 #[simd_test(enable = "avx2")]
4952 unsafe fn test_mm256_or_si256() {
4953 let a = _mm256_set1_epi8(-1);
4954 let b = _mm256_set1_epi8(0);
4955 let r = _mm256_or_si256(a, b);
4956 assert_eq_m256i(r, a);
4957 }
4958
4959 #[simd_test(enable = "avx2")]
4960 unsafe fn test_mm256_packs_epi16() {
4961 let a = _mm256_set1_epi16(2);
4962 let b = _mm256_set1_epi16(4);
4963 let r = _mm256_packs_epi16(a, b);
4964 #[rustfmt::skip]
4965 let e = _mm256_setr_epi8(
4966 2, 2, 2, 2, 2, 2, 2, 2,
4967 4, 4, 4, 4, 4, 4, 4, 4,
4968 2, 2, 2, 2, 2, 2, 2, 2,
4969 4, 4, 4, 4, 4, 4, 4, 4,
4970 );
4971
4972 assert_eq_m256i(r, e);
4973 }
4974
4975 #[simd_test(enable = "avx2")]
4976 unsafe fn test_mm256_packs_epi32() {
4977 let a = _mm256_set1_epi32(2);
4978 let b = _mm256_set1_epi32(4);
4979 let r = _mm256_packs_epi32(a, b);
4980 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4981
4982 assert_eq_m256i(r, e);
4983 }
4984
4985 #[simd_test(enable = "avx2")]
4986 unsafe fn test_mm256_packus_epi16() {
4987 let a = _mm256_set1_epi16(2);
4988 let b = _mm256_set1_epi16(4);
4989 let r = _mm256_packus_epi16(a, b);
4990 #[rustfmt::skip]
4991 let e = _mm256_setr_epi8(
4992 2, 2, 2, 2, 2, 2, 2, 2,
4993 4, 4, 4, 4, 4, 4, 4, 4,
4994 2, 2, 2, 2, 2, 2, 2, 2,
4995 4, 4, 4, 4, 4, 4, 4, 4,
4996 );
4997
4998 assert_eq_m256i(r, e);
4999 }
5000
5001 #[simd_test(enable = "avx2")]
5002 unsafe fn test_mm256_packus_epi32() {
5003 let a = _mm256_set1_epi32(2);
5004 let b = _mm256_set1_epi32(4);
5005 let r = _mm256_packus_epi32(a, b);
5006 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5007
5008 assert_eq_m256i(r, e);
5009 }
5010
5011 #[simd_test(enable = "avx2")]
5012 unsafe fn test_mm256_sad_epu8() {
5013 let a = _mm256_set1_epi8(2);
5014 let b = _mm256_set1_epi8(4);
5015 let r = _mm256_sad_epu8(a, b);
5016 let e = _mm256_set1_epi64x(16);
5017 assert_eq_m256i(r, e);
5018 }
5019
5020 #[simd_test(enable = "avx2")]
5021 unsafe fn test_mm256_shufflehi_epi16() {
5022 #[rustfmt::skip]
5023 let a = _mm256_setr_epi16(
5024 0, 1, 2, 3, 11, 22, 33, 44,
5025 4, 5, 6, 7, 55, 66, 77, 88,
5026 );
5027 #[rustfmt::skip]
5028 let e = _mm256_setr_epi16(
5029 0, 1, 2, 3, 44, 22, 22, 11,
5030 4, 5, 6, 7, 88, 66, 66, 55,
5031 );
5032 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5033 assert_eq_m256i(r, e);
5034 }
5035
5036 #[simd_test(enable = "avx2")]
5037 unsafe fn test_mm256_shufflelo_epi16() {
5038 #[rustfmt::skip]
5039 let a = _mm256_setr_epi16(
5040 11, 22, 33, 44, 0, 1, 2, 3,
5041 55, 66, 77, 88, 4, 5, 6, 7,
5042 );
5043 #[rustfmt::skip]
5044 let e = _mm256_setr_epi16(
5045 44, 22, 22, 11, 0, 1, 2, 3,
5046 88, 66, 66, 55, 4, 5, 6, 7,
5047 );
5048 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5049 assert_eq_m256i(r, e);
5050 }
5051
5052 #[simd_test(enable = "avx2")]
5053 unsafe fn test_mm256_sign_epi16() {
5054 let a = _mm256_set1_epi16(2);
5055 let b = _mm256_set1_epi16(-1);
5056 let r = _mm256_sign_epi16(a, b);
5057 let e = _mm256_set1_epi16(-2);
5058 assert_eq_m256i(r, e);
5059 }
5060
5061 #[simd_test(enable = "avx2")]
5062 unsafe fn test_mm256_sign_epi32() {
5063 let a = _mm256_set1_epi32(2);
5064 let b = _mm256_set1_epi32(-1);
5065 let r = _mm256_sign_epi32(a, b);
5066 let e = _mm256_set1_epi32(-2);
5067 assert_eq_m256i(r, e);
5068 }
5069
5070 #[simd_test(enable = "avx2")]
5071 unsafe fn test_mm256_sign_epi8() {
5072 let a = _mm256_set1_epi8(2);
5073 let b = _mm256_set1_epi8(-1);
5074 let r = _mm256_sign_epi8(a, b);
5075 let e = _mm256_set1_epi8(-2);
5076 assert_eq_m256i(r, e);
5077 }
5078
5079 #[simd_test(enable = "avx2")]
5080 unsafe fn test_mm256_sll_epi16() {
5081 let a = _mm256_set1_epi16(0xFF);
5082 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5083 let r = _mm256_sll_epi16(a, b);
5084 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5085 }
5086
5087 #[simd_test(enable = "avx2")]
5088 unsafe fn test_mm256_sll_epi32() {
5089 let a = _mm256_set1_epi32(0xFFFF);
5090 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5091 let r = _mm256_sll_epi32(a, b);
5092 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5093 }
5094
5095 #[simd_test(enable = "avx2")]
5096 unsafe fn test_mm256_sll_epi64() {
5097 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5098 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5099 let r = _mm256_sll_epi64(a, b);
5100 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5101 }
5102
5103 #[simd_test(enable = "avx2")]
5104 unsafe fn test_mm256_slli_epi16() {
5105 assert_eq_m256i(
5106 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5107 _mm256_set1_epi16(0xFF0),
5108 );
5109 }
5110
5111 #[simd_test(enable = "avx2")]
5112 unsafe fn test_mm256_slli_epi32() {
5113 assert_eq_m256i(
5114 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5115 _mm256_set1_epi32(0xFFFF0),
5116 );
5117 }
5118
5119 #[simd_test(enable = "avx2")]
5120 unsafe fn test_mm256_slli_epi64() {
5121 assert_eq_m256i(
5122 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5123 _mm256_set1_epi64x(0xFFFFFFFF0),
5124 );
5125 }
5126
5127 #[simd_test(enable = "avx2")]
5128 unsafe fn test_mm256_slli_si256() {
5129 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5130 let r = _mm256_slli_si256::<3>(a);
5131 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5132 }
5133
5134 #[simd_test(enable = "avx2")]
5135 unsafe fn test_mm_sllv_epi32() {
5136 let a = _mm_set1_epi32(2);
5137 let b = _mm_set1_epi32(1);
5138 let r = _mm_sllv_epi32(a, b);
5139 let e = _mm_set1_epi32(4);
5140 assert_eq_m128i(r, e);
5141 }
5142
5143 #[simd_test(enable = "avx2")]
5144 unsafe fn test_mm256_sllv_epi32() {
5145 let a = _mm256_set1_epi32(2);
5146 let b = _mm256_set1_epi32(1);
5147 let r = _mm256_sllv_epi32(a, b);
5148 let e = _mm256_set1_epi32(4);
5149 assert_eq_m256i(r, e);
5150 }
5151
5152 #[simd_test(enable = "avx2")]
5153 unsafe fn test_mm_sllv_epi64() {
5154 let a = _mm_set1_epi64x(2);
5155 let b = _mm_set1_epi64x(1);
5156 let r = _mm_sllv_epi64(a, b);
5157 let e = _mm_set1_epi64x(4);
5158 assert_eq_m128i(r, e);
5159 }
5160
5161 #[simd_test(enable = "avx2")]
5162 unsafe fn test_mm256_sllv_epi64() {
5163 let a = _mm256_set1_epi64x(2);
5164 let b = _mm256_set1_epi64x(1);
5165 let r = _mm256_sllv_epi64(a, b);
5166 let e = _mm256_set1_epi64x(4);
5167 assert_eq_m256i(r, e);
5168 }
5169
5170 #[simd_test(enable = "avx2")]
5171 unsafe fn test_mm256_sra_epi16() {
5172 let a = _mm256_set1_epi16(-1);
5173 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5174 let r = _mm256_sra_epi16(a, b);
5175 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5176 }
5177
5178 #[simd_test(enable = "avx2")]
5179 unsafe fn test_mm256_sra_epi32() {
5180 let a = _mm256_set1_epi32(-1);
5181 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5182 let r = _mm256_sra_epi32(a, b);
5183 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5184 }
5185
5186 #[simd_test(enable = "avx2")]
5187 unsafe fn test_mm256_srai_epi16() {
5188 assert_eq_m256i(
5189 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5190 _mm256_set1_epi16(-1),
5191 );
5192 }
5193
5194 #[simd_test(enable = "avx2")]
5195 unsafe fn test_mm256_srai_epi32() {
5196 assert_eq_m256i(
5197 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5198 _mm256_set1_epi32(-1),
5199 );
5200 }
5201
5202 #[simd_test(enable = "avx2")]
5203 unsafe fn test_mm_srav_epi32() {
5204 let a = _mm_set1_epi32(4);
5205 let count = _mm_set1_epi32(1);
5206 let r = _mm_srav_epi32(a, count);
5207 let e = _mm_set1_epi32(2);
5208 assert_eq_m128i(r, e);
5209 }
5210
5211 #[simd_test(enable = "avx2")]
5212 unsafe fn test_mm256_srav_epi32() {
5213 let a = _mm256_set1_epi32(4);
5214 let count = _mm256_set1_epi32(1);
5215 let r = _mm256_srav_epi32(a, count);
5216 let e = _mm256_set1_epi32(2);
5217 assert_eq_m256i(r, e);
5218 }
5219
5220 #[simd_test(enable = "avx2")]
5221 unsafe fn test_mm256_srli_si256() {
5222 #[rustfmt::skip]
5223 let a = _mm256_setr_epi8(
5224 1, 2, 3, 4, 5, 6, 7, 8,
5225 9, 10, 11, 12, 13, 14, 15, 16,
5226 17, 18, 19, 20, 21, 22, 23, 24,
5227 25, 26, 27, 28, 29, 30, 31, 32,
5228 );
5229 let r = _mm256_srli_si256::<3>(a);
5230 #[rustfmt::skip]
5231 let e = _mm256_setr_epi8(
5232 4, 5, 6, 7, 8, 9, 10, 11,
5233 12, 13, 14, 15, 16, 0, 0, 0,
5234 20, 21, 22, 23, 24, 25, 26, 27,
5235 28, 29, 30, 31, 32, 0, 0, 0,
5236 );
5237 assert_eq_m256i(r, e);
5238 }
5239
5240 #[simd_test(enable = "avx2")]
5241 unsafe fn test_mm256_srl_epi16() {
5242 let a = _mm256_set1_epi16(0xFF);
5243 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5244 let r = _mm256_srl_epi16(a, b);
5245 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5246 }
5247
5248 #[simd_test(enable = "avx2")]
5249 unsafe fn test_mm256_srl_epi32() {
5250 let a = _mm256_set1_epi32(0xFFFF);
5251 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5252 let r = _mm256_srl_epi32(a, b);
5253 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5254 }
5255
5256 #[simd_test(enable = "avx2")]
5257 unsafe fn test_mm256_srl_epi64() {
5258 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5259 let b = _mm_setr_epi64x(4, 0);
5260 let r = _mm256_srl_epi64(a, b);
5261 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5262 }
5263
5264 #[simd_test(enable = "avx2")]
5265 unsafe fn test_mm256_srli_epi16() {
5266 assert_eq_m256i(
5267 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5268 _mm256_set1_epi16(0xF),
5269 );
5270 }
5271
5272 #[simd_test(enable = "avx2")]
5273 unsafe fn test_mm256_srli_epi32() {
5274 assert_eq_m256i(
5275 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5276 _mm256_set1_epi32(0xFFF),
5277 );
5278 }
5279
5280 #[simd_test(enable = "avx2")]
5281 unsafe fn test_mm256_srli_epi64() {
5282 assert_eq_m256i(
5283 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5284 _mm256_set1_epi64x(0xFFFFFFF),
5285 );
5286 }
5287
5288 #[simd_test(enable = "avx2")]
5289 unsafe fn test_mm_srlv_epi32() {
5290 let a = _mm_set1_epi32(2);
5291 let count = _mm_set1_epi32(1);
5292 let r = _mm_srlv_epi32(a, count);
5293 let e = _mm_set1_epi32(1);
5294 assert_eq_m128i(r, e);
5295 }
5296
5297 #[simd_test(enable = "avx2")]
5298 unsafe fn test_mm256_srlv_epi32() {
5299 let a = _mm256_set1_epi32(2);
5300 let count = _mm256_set1_epi32(1);
5301 let r = _mm256_srlv_epi32(a, count);
5302 let e = _mm256_set1_epi32(1);
5303 assert_eq_m256i(r, e);
5304 }
5305
5306 #[simd_test(enable = "avx2")]
5307 unsafe fn test_mm_srlv_epi64() {
5308 let a = _mm_set1_epi64x(2);
5309 let count = _mm_set1_epi64x(1);
5310 let r = _mm_srlv_epi64(a, count);
5311 let e = _mm_set1_epi64x(1);
5312 assert_eq_m128i(r, e);
5313 }
5314
5315 #[simd_test(enable = "avx2")]
5316 unsafe fn test_mm256_srlv_epi64() {
5317 let a = _mm256_set1_epi64x(2);
5318 let count = _mm256_set1_epi64x(1);
5319 let r = _mm256_srlv_epi64(a, count);
5320 let e = _mm256_set1_epi64x(1);
5321 assert_eq_m256i(r, e);
5322 }
5323
5324 #[simd_test(enable = "avx2")]
5325 unsafe fn test_mm256_stream_load_si256() {
5326 let a = _mm256_set_epi64x(5, 6, 7, 8);
5327 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5328 assert_eq_m256i(a, r);
5329 }
5330
5331 #[simd_test(enable = "avx2")]
5332 unsafe fn test_mm256_sub_epi16() {
5333 let a = _mm256_set1_epi16(4);
5334 let b = _mm256_set1_epi16(2);
5335 let r = _mm256_sub_epi16(a, b);
5336 assert_eq_m256i(r, b);
5337 }
5338
5339 #[simd_test(enable = "avx2")]
5340 unsafe fn test_mm256_sub_epi32() {
5341 let a = _mm256_set1_epi32(4);
5342 let b = _mm256_set1_epi32(2);
5343 let r = _mm256_sub_epi32(a, b);
5344 assert_eq_m256i(r, b);
5345 }
5346
5347 #[simd_test(enable = "avx2")]
5348 unsafe fn test_mm256_sub_epi64() {
5349 let a = _mm256_set1_epi64x(4);
5350 let b = _mm256_set1_epi64x(2);
5351 let r = _mm256_sub_epi64(a, b);
5352 assert_eq_m256i(r, b);
5353 }
5354
5355 #[simd_test(enable = "avx2")]
5356 unsafe fn test_mm256_sub_epi8() {
5357 let a = _mm256_set1_epi8(4);
5358 let b = _mm256_set1_epi8(2);
5359 let r = _mm256_sub_epi8(a, b);
5360 assert_eq_m256i(r, b);
5361 }
5362
5363 #[simd_test(enable = "avx2")]
5364 unsafe fn test_mm256_subs_epi16() {
5365 let a = _mm256_set1_epi16(4);
5366 let b = _mm256_set1_epi16(2);
5367 let r = _mm256_subs_epi16(a, b);
5368 assert_eq_m256i(r, b);
5369 }
5370
5371 #[simd_test(enable = "avx2")]
5372 unsafe fn test_mm256_subs_epi8() {
5373 let a = _mm256_set1_epi8(4);
5374 let b = _mm256_set1_epi8(2);
5375 let r = _mm256_subs_epi8(a, b);
5376 assert_eq_m256i(r, b);
5377 }
5378
5379 #[simd_test(enable = "avx2")]
5380 unsafe fn test_mm256_subs_epu16() {
5381 let a = _mm256_set1_epi16(4);
5382 let b = _mm256_set1_epi16(2);
5383 let r = _mm256_subs_epu16(a, b);
5384 assert_eq_m256i(r, b);
5385 }
5386
5387 #[simd_test(enable = "avx2")]
5388 unsafe fn test_mm256_subs_epu8() {
5389 let a = _mm256_set1_epi8(4);
5390 let b = _mm256_set1_epi8(2);
5391 let r = _mm256_subs_epu8(a, b);
5392 assert_eq_m256i(r, b);
5393 }
5394
5395 #[simd_test(enable = "avx2")]
5396 unsafe fn test_mm256_xor_si256() {
5397 let a = _mm256_set1_epi8(5);
5398 let b = _mm256_set1_epi8(3);
5399 let r = _mm256_xor_si256(a, b);
5400 assert_eq_m256i(r, _mm256_set1_epi8(6));
5401 }
5402
5403 #[simd_test(enable = "avx2")]
5404 unsafe fn test_mm256_alignr_epi8() {
5405 #[rustfmt::skip]
5406 let a = _mm256_setr_epi8(
5407 1, 2, 3, 4, 5, 6, 7, 8,
5408 9, 10, 11, 12, 13, 14, 15, 16,
5409 17, 18, 19, 20, 21, 22, 23, 24,
5410 25, 26, 27, 28, 29, 30, 31, 32,
5411 );
5412 #[rustfmt::skip]
5413 let b = _mm256_setr_epi8(
5414 -1, -2, -3, -4, -5, -6, -7, -8,
5415 -9, -10, -11, -12, -13, -14, -15, -16,
5416 -17, -18, -19, -20, -21, -22, -23, -24,
5417 -25, -26, -27, -28, -29, -30, -31, -32,
5418 );
5419 let r = _mm256_alignr_epi8::<33>(a, b);
5420 assert_eq_m256i(r, _mm256_set1_epi8(0));
5421
5422 let r = _mm256_alignr_epi8::<17>(a, b);
5423 #[rustfmt::skip]
5424 let expected = _mm256_setr_epi8(
5425 2, 3, 4, 5, 6, 7, 8, 9,
5426 10, 11, 12, 13, 14, 15, 16, 0,
5427 18, 19, 20, 21, 22, 23, 24, 25,
5428 26, 27, 28, 29, 30, 31, 32, 0,
5429 );
5430 assert_eq_m256i(r, expected);
5431
5432 let r = _mm256_alignr_epi8::<4>(a, b);
5433 #[rustfmt::skip]
5434 let expected = _mm256_setr_epi8(
5435 -5, -6, -7, -8, -9, -10, -11, -12,
5436 -13, -14, -15, -16, 1, 2, 3, 4,
5437 -21, -22, -23, -24, -25, -26, -27, -28,
5438 -29, -30, -31, -32, 17, 18, 19, 20,
5439 );
5440 assert_eq_m256i(r, expected);
5441
5442 let r = _mm256_alignr_epi8::<15>(a, b);
5443 #[rustfmt::skip]
5444 let expected = _mm256_setr_epi8(
5445 -16, 1, 2, 3, 4, 5, 6, 7,
5446 8, 9, 10, 11, 12, 13, 14, 15,
5447 -32, 17, 18, 19, 20, 21, 22, 23,
5448 24, 25, 26, 27, 28, 29, 30, 31,
5449 );
5450 assert_eq_m256i(r, expected);
5451
5452 let r = _mm256_alignr_epi8::<0>(a, b);
5453 assert_eq_m256i(r, b);
5454
5455 let r = _mm256_alignr_epi8::<16>(a, b);
5456 assert_eq_m256i(r, a);
5457 }
5458
5459 #[simd_test(enable = "avx2")]
5460 unsafe fn test_mm256_shuffle_epi8() {
5461 #[rustfmt::skip]
5462 let a = _mm256_setr_epi8(
5463 1, 2, 3, 4, 5, 6, 7, 8,
5464 9, 10, 11, 12, 13, 14, 15, 16,
5465 17, 18, 19, 20, 21, 22, 23, 24,
5466 25, 26, 27, 28, 29, 30, 31, 32,
5467 );
5468 #[rustfmt::skip]
5469 let b = _mm256_setr_epi8(
5470 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5471 12, 5, 5, 10, 4, 1, 8, 0,
5472 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5473 12, 5, 5, 10, 4, 1, 8, 0,
5474 );
5475 #[rustfmt::skip]
5476 let expected = _mm256_setr_epi8(
5477 5, 0, 5, 4, 9, 13, 7, 4,
5478 13, 6, 6, 11, 5, 2, 9, 1,
5479 21, 0, 21, 20, 25, 29, 23, 20,
5480 29, 22, 22, 27, 21, 18, 25, 17,
5481 );
5482 let r = _mm256_shuffle_epi8(a, b);
5483 assert_eq_m256i(r, expected);
5484 }
5485
5486 #[simd_test(enable = "avx2")]
5487 unsafe fn test_mm256_permutevar8x32_epi32() {
5488 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5489 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5490 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5491 let r = _mm256_permutevar8x32_epi32(a, b);
5492 assert_eq_m256i(r, expected);
5493 }
5494
5495 #[simd_test(enable = "avx2")]
5496 unsafe fn test_mm256_permute4x64_epi64() {
5497 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5498 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5499 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5500 assert_eq_m256i(r, expected);
5501 }
5502
5503 #[simd_test(enable = "avx2")]
5504 unsafe fn test_mm256_permute2x128_si256() {
5505 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5506 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5507 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5508 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5509 assert_eq_m256i(r, e);
5510 }
5511
5512 #[simd_test(enable = "avx2")]
5513 unsafe fn test_mm256_permute4x64_pd() {
5514 let a = _mm256_setr_pd(1., 2., 3., 4.);
5515 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5516 let e = _mm256_setr_pd(4., 1., 2., 1.);
5517 assert_eq_m256d(r, e);
5518 }
5519
5520 #[simd_test(enable = "avx2")]
5521 unsafe fn test_mm256_permutevar8x32_ps() {
5522 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5523 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5524 let r = _mm256_permutevar8x32_ps(a, b);
5525 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5526 assert_eq_m256(r, e);
5527 }
5528
5529 #[simd_test(enable = "avx2")]
5530 unsafe fn test_mm_i32gather_epi32() {
5531 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5532 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5534 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5535 }
5536
5537 #[simd_test(enable = "avx2")]
5538 unsafe fn test_mm_mask_i32gather_epi32() {
5539 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5540 let r = _mm_mask_i32gather_epi32::<4>(
5542 _mm_set1_epi32(256),
5543 arr.as_ptr(),
5544 _mm_setr_epi32(0, 16, 64, 96),
5545 _mm_setr_epi32(-1, -1, -1, 0),
5546 );
5547 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5548 }
5549
5550 #[simd_test(enable = "avx2")]
5551 unsafe fn test_mm256_i32gather_epi32() {
5552 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5553 let r =
5555 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5556 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5557 }
5558
5559 #[simd_test(enable = "avx2")]
5560 unsafe fn test_mm256_mask_i32gather_epi32() {
5561 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5562 let r = _mm256_mask_i32gather_epi32::<4>(
5564 _mm256_set1_epi32(256),
5565 arr.as_ptr(),
5566 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5567 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5568 );
5569 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5570 }
5571
5572 #[simd_test(enable = "avx2")]
5573 unsafe fn test_mm_i32gather_ps() {
5574 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5575 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5577 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5578 }
5579
5580 #[simd_test(enable = "avx2")]
5581 unsafe fn test_mm_mask_i32gather_ps() {
5582 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5583 let r = _mm_mask_i32gather_ps::<4>(
5585 _mm_set1_ps(256.0),
5586 arr.as_ptr(),
5587 _mm_setr_epi32(0, 16, 64, 96),
5588 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5589 );
5590 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5591 }
5592
5593 #[simd_test(enable = "avx2")]
5594 unsafe fn test_mm256_i32gather_ps() {
5595 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5596 let r =
5598 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5599 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5600 }
5601
5602 #[simd_test(enable = "avx2")]
5603 unsafe fn test_mm256_mask_i32gather_ps() {
5604 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5605 let r = _mm256_mask_i32gather_ps::<4>(
5607 _mm256_set1_ps(256.0),
5608 arr.as_ptr(),
5609 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5610 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5611 );
5612 assert_eq_m256(
5613 r,
5614 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5615 );
5616 }
5617
5618 #[simd_test(enable = "avx2")]
5619 unsafe fn test_mm_i32gather_epi64() {
5620 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5621 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5623 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5624 }
5625
5626 #[simd_test(enable = "avx2")]
5627 unsafe fn test_mm_mask_i32gather_epi64() {
5628 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5629 let r = _mm_mask_i32gather_epi64::<8>(
5631 _mm_set1_epi64x(256),
5632 arr.as_ptr(),
5633 _mm_setr_epi32(16, 16, 16, 16),
5634 _mm_setr_epi64x(-1, 0),
5635 );
5636 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5637 }
5638
5639 #[simd_test(enable = "avx2")]
5640 unsafe fn test_mm256_i32gather_epi64() {
5641 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5642 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5644 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5645 }
5646
5647 #[simd_test(enable = "avx2")]
5648 unsafe fn test_mm256_mask_i32gather_epi64() {
5649 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5650 let r = _mm256_mask_i32gather_epi64::<8>(
5652 _mm256_set1_epi64x(256),
5653 arr.as_ptr(),
5654 _mm_setr_epi32(0, 16, 64, 96),
5655 _mm256_setr_epi64x(-1, -1, -1, 0),
5656 );
5657 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5658 }
5659
5660 #[simd_test(enable = "avx2")]
5661 unsafe fn test_mm_i32gather_pd() {
5662 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5663 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5665 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5666 }
5667
5668 #[simd_test(enable = "avx2")]
5669 unsafe fn test_mm_mask_i32gather_pd() {
5670 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5671 let r = _mm_mask_i32gather_pd::<8>(
5673 _mm_set1_pd(256.0),
5674 arr.as_ptr(),
5675 _mm_setr_epi32(16, 16, 16, 16),
5676 _mm_setr_pd(-1.0, 0.0),
5677 );
5678 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5679 }
5680
5681 #[simd_test(enable = "avx2")]
5682 unsafe fn test_mm256_i32gather_pd() {
5683 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5684 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5686 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5687 }
5688
5689 #[simd_test(enable = "avx2")]
5690 unsafe fn test_mm256_mask_i32gather_pd() {
5691 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5692 let r = _mm256_mask_i32gather_pd::<8>(
5694 _mm256_set1_pd(256.0),
5695 arr.as_ptr(),
5696 _mm_setr_epi32(0, 16, 64, 96),
5697 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5698 );
5699 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5700 }
5701
5702 #[simd_test(enable = "avx2")]
5703 unsafe fn test_mm_i64gather_epi32() {
5704 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5705 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5707 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5708 }
5709
5710 #[simd_test(enable = "avx2")]
5711 unsafe fn test_mm_mask_i64gather_epi32() {
5712 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5713 let r = _mm_mask_i64gather_epi32::<4>(
5715 _mm_set1_epi32(256),
5716 arr.as_ptr(),
5717 _mm_setr_epi64x(0, 16),
5718 _mm_setr_epi32(-1, 0, -1, 0),
5719 );
5720 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5721 }
5722
5723 #[simd_test(enable = "avx2")]
5724 unsafe fn test_mm256_i64gather_epi32() {
5725 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5726 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5728 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5729 }
5730
5731 #[simd_test(enable = "avx2")]
5732 unsafe fn test_mm256_mask_i64gather_epi32() {
5733 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5734 let r = _mm256_mask_i64gather_epi32::<4>(
5736 _mm_set1_epi32(256),
5737 arr.as_ptr(),
5738 _mm256_setr_epi64x(0, 16, 64, 96),
5739 _mm_setr_epi32(-1, -1, -1, 0),
5740 );
5741 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5742 }
5743
5744 #[simd_test(enable = "avx2")]
5745 unsafe fn test_mm_i64gather_ps() {
5746 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5747 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5749 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5750 }
5751
5752 #[simd_test(enable = "avx2")]
5753 unsafe fn test_mm_mask_i64gather_ps() {
5754 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5755 let r = _mm_mask_i64gather_ps::<4>(
5757 _mm_set1_ps(256.0),
5758 arr.as_ptr(),
5759 _mm_setr_epi64x(0, 16),
5760 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5761 );
5762 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5763 }
5764
5765 #[simd_test(enable = "avx2")]
5766 unsafe fn test_mm256_i64gather_ps() {
5767 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5768 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5770 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5771 }
5772
5773 #[simd_test(enable = "avx2")]
5774 unsafe fn test_mm256_mask_i64gather_ps() {
5775 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5776 let r = _mm256_mask_i64gather_ps::<4>(
5778 _mm_set1_ps(256.0),
5779 arr.as_ptr(),
5780 _mm256_setr_epi64x(0, 16, 64, 96),
5781 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5782 );
5783 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5784 }
5785
5786 #[simd_test(enable = "avx2")]
5787 unsafe fn test_mm_i64gather_epi64() {
5788 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5789 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5791 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5792 }
5793
5794 #[simd_test(enable = "avx2")]
5795 unsafe fn test_mm_mask_i64gather_epi64() {
5796 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5797 let r = _mm_mask_i64gather_epi64::<8>(
5799 _mm_set1_epi64x(256),
5800 arr.as_ptr(),
5801 _mm_setr_epi64x(16, 16),
5802 _mm_setr_epi64x(-1, 0),
5803 );
5804 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5805 }
5806
5807 #[simd_test(enable = "avx2")]
5808 unsafe fn test_mm256_i64gather_epi64() {
5809 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5810 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5812 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5813 }
5814
5815 #[simd_test(enable = "avx2")]
5816 unsafe fn test_mm256_mask_i64gather_epi64() {
5817 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5818 let r = _mm256_mask_i64gather_epi64::<8>(
5820 _mm256_set1_epi64x(256),
5821 arr.as_ptr(),
5822 _mm256_setr_epi64x(0, 16, 64, 96),
5823 _mm256_setr_epi64x(-1, -1, -1, 0),
5824 );
5825 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5826 }
5827
5828 #[simd_test(enable = "avx2")]
5829 unsafe fn test_mm_i64gather_pd() {
5830 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5831 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5833 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5834 }
5835
5836 #[simd_test(enable = "avx2")]
5837 unsafe fn test_mm_mask_i64gather_pd() {
5838 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5839 let r = _mm_mask_i64gather_pd::<8>(
5841 _mm_set1_pd(256.0),
5842 arr.as_ptr(),
5843 _mm_setr_epi64x(16, 16),
5844 _mm_setr_pd(-1.0, 0.0),
5845 );
5846 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5847 }
5848
5849 #[simd_test(enable = "avx2")]
5850 unsafe fn test_mm256_i64gather_pd() {
5851 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5852 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5854 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5855 }
5856
5857 #[simd_test(enable = "avx2")]
5858 unsafe fn test_mm256_mask_i64gather_pd() {
5859 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5860 let r = _mm256_mask_i64gather_pd::<8>(
5862 _mm256_set1_pd(256.0),
5863 arr.as_ptr(),
5864 _mm256_setr_epi64x(0, 16, 64, 96),
5865 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5866 );
5867 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5868 }
5869
5870 #[simd_test(enable = "avx")]
5871 unsafe fn test_mm256_extract_epi8() {
5872 #[rustfmt::skip]
5873 let a = _mm256_setr_epi8(
5874 -1, 1, 2, 3, 4, 5, 6, 7,
5875 8, 9, 10, 11, 12, 13, 14, 15,
5876 16, 17, 18, 19, 20, 21, 22, 23,
5877 24, 25, 26, 27, 28, 29, 30, 31
5878 );
5879 let r1 = _mm256_extract_epi8::<0>(a);
5880 let r2 = _mm256_extract_epi8::<3>(a);
5881 assert_eq!(r1, 0xFF);
5882 assert_eq!(r2, 3);
5883 }
5884
5885 #[simd_test(enable = "avx2")]
5886 unsafe fn test_mm256_extract_epi16() {
5887 #[rustfmt::skip]
5888 let a = _mm256_setr_epi16(
5889 -1, 1, 2, 3, 4, 5, 6, 7,
5890 8, 9, 10, 11, 12, 13, 14, 15,
5891 );
5892 let r1 = _mm256_extract_epi16::<0>(a);
5893 let r2 = _mm256_extract_epi16::<3>(a);
5894 assert_eq!(r1, 0xFFFF);
5895 assert_eq!(r2, 3);
5896 }
5897}