1use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9#[stable(feature = "simd_x86", since = "1.27.0")]
12pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
13#[stable(feature = "simd_x86", since = "1.27.0")]
15pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
16#[stable(feature = "simd_x86", since = "1.27.0")]
18pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
19#[stable(feature = "simd_x86", since = "1.27.0")]
21pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
22#[stable(feature = "simd_x86", since = "1.27.0")]
24pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
25#[stable(feature = "simd_x86", since = "1.27.0")]
27pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
28#[stable(feature = "simd_x86", since = "1.27.0")]
30pub const _MM_FROUND_NO_EXC: i32 = 0x08;
31#[stable(feature = "simd_x86", since = "1.27.0")]
33pub const _MM_FROUND_NINT: i32 = 0x00;
34#[stable(feature = "simd_x86", since = "1.27.0")]
36pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
37#[stable(feature = "simd_x86", since = "1.27.0")]
39pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
40#[stable(feature = "simd_x86", since = "1.27.0")]
42pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
43#[stable(feature = "simd_x86", since = "1.27.0")]
46pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
47#[stable(feature = "simd_x86", since = "1.27.0")]
49pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
50
51#[inline]
59#[target_feature(enable = "sse4.1")]
60#[cfg_attr(test, assert_instr(pblendvb))]
61#[stable(feature = "simd_x86", since = "1.27.0")]
62#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
63pub const fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
64 unsafe {
65 let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO);
66 transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
67 }
68}
69
70#[inline]
78#[target_feature(enable = "sse4.1")]
79#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
80#[rustc_legacy_const_generics(2)]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
84 static_assert_uimm_bits!(IMM8, 8);
85 unsafe {
86 transmute::<i16x8, _>(simd_shuffle!(
87 a.as_i16x8(),
88 b.as_i16x8(),
89 [
90 [0, 8][IMM8 as usize & 1],
91 [1, 9][(IMM8 >> 1) as usize & 1],
92 [2, 10][(IMM8 >> 2) as usize & 1],
93 [3, 11][(IMM8 >> 3) as usize & 1],
94 [4, 12][(IMM8 >> 4) as usize & 1],
95 [5, 13][(IMM8 >> 5) as usize & 1],
96 [6, 14][(IMM8 >> 6) as usize & 1],
97 [7, 15][(IMM8 >> 7) as usize & 1],
98 ]
99 ))
100 }
101}
102
103#[inline]
108#[target_feature(enable = "sse4.1")]
109#[cfg_attr(test, assert_instr(blendvpd))]
110#[stable(feature = "simd_x86", since = "1.27.0")]
111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
112pub const fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
113 unsafe {
114 let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO);
115 transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
116 }
117}
118
119#[inline]
124#[target_feature(enable = "sse4.1")]
125#[cfg_attr(test, assert_instr(blendvps))]
126#[stable(feature = "simd_x86", since = "1.27.0")]
127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
128pub const fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
129 unsafe {
130 let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO);
131 transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
132 }
133}
134
135#[inline]
140#[target_feature(enable = "sse4.1")]
141#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
145#[rustc_legacy_const_generics(2)]
146#[stable(feature = "simd_x86", since = "1.27.0")]
147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
148pub const fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
149 static_assert_uimm_bits!(IMM2, 2);
150 unsafe {
151 transmute::<f64x2, _>(simd_shuffle!(
152 a.as_f64x2(),
153 b.as_f64x2(),
154 [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
155 ))
156 }
157}
158
159#[inline]
164#[target_feature(enable = "sse4.1")]
165#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
166#[rustc_legacy_const_generics(2)]
167#[stable(feature = "simd_x86", since = "1.27.0")]
168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
169pub const fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
170 static_assert_uimm_bits!(IMM4, 4);
171 unsafe {
172 transmute::<f32x4, _>(simd_shuffle!(
173 a.as_f32x4(),
174 b.as_f32x4(),
175 [
176 [0, 4][IMM4 as usize & 1],
177 [1, 5][(IMM4 >> 1) as usize & 1],
178 [2, 6][(IMM4 >> 2) as usize & 1],
179 [3, 7][(IMM4 >> 3) as usize & 1],
180 ]
181 ))
182 }
183}
184
185#[inline]
212#[target_feature(enable = "sse4.1")]
213#[cfg_attr(test, assert_instr(extractps, IMM8 = 0))]
214#[rustc_legacy_const_generics(1)]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
217pub const fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
218 static_assert_uimm_bits!(IMM8, 2);
219 unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 }
220}
221
222#[inline]
229#[target_feature(enable = "sse4.1")]
230#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
231#[rustc_legacy_const_generics(1)]
232#[stable(feature = "simd_x86", since = "1.27.0")]
233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
234pub const fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
235 static_assert_uimm_bits!(IMM8, 4);
236 unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 }
237}
238
239#[inline]
243#[target_feature(enable = "sse4.1")]
244#[cfg_attr(test, assert_instr(extractps, IMM8 = 1))]
245#[rustc_legacy_const_generics(1)]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
248pub const fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
249 static_assert_uimm_bits!(IMM8, 2);
250 unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) }
251}
252
253#[inline]
278#[target_feature(enable = "sse4.1")]
279#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
280#[rustc_legacy_const_generics(2)]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
283 static_assert_uimm_bits!(IMM8, 8);
284 unsafe { insertps(a, b, IMM8 as u8) }
285}
286
287#[inline]
292#[target_feature(enable = "sse4.1")]
293#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
294#[rustc_legacy_const_generics(2)]
295#[stable(feature = "simd_x86", since = "1.27.0")]
296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
297pub const fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
298 static_assert_uimm_bits!(IMM8, 4);
299 unsafe { transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) }
300}
301
302#[inline]
307#[target_feature(enable = "sse4.1")]
308#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
309#[rustc_legacy_const_generics(2)]
310#[stable(feature = "simd_x86", since = "1.27.0")]
311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
312pub const fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
313 static_assert_uimm_bits!(IMM8, 2);
314 unsafe { transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) }
315}
316
317#[inline]
322#[target_feature(enable = "sse4.1")]
323#[cfg_attr(test, assert_instr(pmaxsb))]
324#[stable(feature = "simd_x86", since = "1.27.0")]
325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
326pub const fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
327 unsafe { simd_imax(a.as_i8x16(), b.as_i8x16()).as_m128i() }
328}
329
330#[inline]
335#[target_feature(enable = "sse4.1")]
336#[cfg_attr(test, assert_instr(pmaxuw))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
339pub const fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
340 unsafe { simd_imax(a.as_u16x8(), b.as_u16x8()).as_m128i() }
341}
342
343#[inline]
348#[target_feature(enable = "sse4.1")]
349#[cfg_attr(test, assert_instr(pmaxsd))]
350#[stable(feature = "simd_x86", since = "1.27.0")]
351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
352pub const fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
353 unsafe { simd_imax(a.as_i32x4(), b.as_i32x4()).as_m128i() }
354}
355
356#[inline]
361#[target_feature(enable = "sse4.1")]
362#[cfg_attr(test, assert_instr(pmaxud))]
363#[stable(feature = "simd_x86", since = "1.27.0")]
364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
365pub const fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { simd_imax(a.as_u32x4(), b.as_u32x4()).as_m128i() }
367}
368
369#[inline]
374#[target_feature(enable = "sse4.1")]
375#[cfg_attr(test, assert_instr(pminsb))]
376#[stable(feature = "simd_x86", since = "1.27.0")]
377#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
378pub const fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
379 unsafe { simd_imin(a.as_i8x16(), b.as_i8x16()).as_m128i() }
380}
381
382#[inline]
387#[target_feature(enable = "sse4.1")]
388#[cfg_attr(test, assert_instr(pminuw))]
389#[stable(feature = "simd_x86", since = "1.27.0")]
390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
391pub const fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
392 unsafe { simd_imin(a.as_u16x8(), b.as_u16x8()).as_m128i() }
393}
394
395#[inline]
400#[target_feature(enable = "sse4.1")]
401#[cfg_attr(test, assert_instr(pminsd))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
405 unsafe { simd_imin(a.as_i32x4(), b.as_i32x4()).as_m128i() }
406}
407
408#[inline]
413#[target_feature(enable = "sse4.1")]
414#[cfg_attr(test, assert_instr(pminud))]
415#[stable(feature = "simd_x86", since = "1.27.0")]
416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
417pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
418 unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() }
419}
420
421#[inline]
426#[target_feature(enable = "sse4.1")]
427#[cfg_attr(test, assert_instr(packusdw))]
428#[stable(feature = "simd_x86", since = "1.27.0")]
429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
430pub const fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
431 unsafe {
432 let max = simd_splat(u16::MAX as i32);
433 let min = simd_splat(u16::MIN as i32);
434
435 let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min)
436 .as_m128i()
437 .as_i16x8();
438 let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min)
439 .as_m128i()
440 .as_i16x8();
441
442 const IDXS: [u32; 8] = [0, 2, 4, 6, 8, 10, 12, 14];
445 let result: i16x8 = simd_shuffle!(clamped_a, clamped_b, IDXS);
446
447 result.as_m128i()
448 }
449}
450
451#[inline]
455#[target_feature(enable = "sse4.1")]
456#[cfg_attr(test, assert_instr(pcmpeqq))]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
459pub const fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
460 unsafe { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) }
461}
462
463#[inline]
467#[target_feature(enable = "sse4.1")]
468#[cfg_attr(test, assert_instr(pmovsxbw))]
469#[stable(feature = "simd_x86", since = "1.27.0")]
470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
471pub const fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
472 unsafe {
473 let a = a.as_i8x16();
474 let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
475 transmute(simd_cast::<_, i16x8>(a))
476 }
477}
478
479#[inline]
483#[target_feature(enable = "sse4.1")]
484#[cfg_attr(test, assert_instr(pmovsxbd))]
485#[stable(feature = "simd_x86", since = "1.27.0")]
486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
487pub const fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
488 unsafe {
489 let a = a.as_i8x16();
490 let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
491 transmute(simd_cast::<_, i32x4>(a))
492 }
493}
494
495#[inline]
500#[target_feature(enable = "sse4.1")]
501#[cfg_attr(test, assert_instr(pmovsxbq))]
502#[stable(feature = "simd_x86", since = "1.27.0")]
503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
504pub const fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
505 unsafe {
506 let a = a.as_i8x16();
507 let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
508 transmute(simd_cast::<_, i64x2>(a))
509 }
510}
511
512#[inline]
516#[target_feature(enable = "sse4.1")]
517#[cfg_attr(test, assert_instr(pmovsxwd))]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
520pub const fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
521 unsafe {
522 let a = a.as_i16x8();
523 let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
524 transmute(simd_cast::<_, i32x4>(a))
525 }
526}
527
528#[inline]
532#[target_feature(enable = "sse4.1")]
533#[cfg_attr(test, assert_instr(pmovsxwq))]
534#[stable(feature = "simd_x86", since = "1.27.0")]
535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
536pub const fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
537 unsafe {
538 let a = a.as_i16x8();
539 let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
540 transmute(simd_cast::<_, i64x2>(a))
541 }
542}
543
544#[inline]
548#[target_feature(enable = "sse4.1")]
549#[cfg_attr(test, assert_instr(pmovsxdq))]
550#[stable(feature = "simd_x86", since = "1.27.0")]
551#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
552pub const fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
553 unsafe {
554 let a = a.as_i32x4();
555 let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
556 transmute(simd_cast::<_, i64x2>(a))
557 }
558}
559
560#[inline]
564#[target_feature(enable = "sse4.1")]
565#[cfg_attr(test, assert_instr(pmovzxbw))]
566#[stable(feature = "simd_x86", since = "1.27.0")]
567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
568pub const fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
569 unsafe {
570 let a = a.as_u8x16();
571 let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
572 transmute(simd_cast::<_, i16x8>(a))
573 }
574}
575
576#[inline]
580#[target_feature(enable = "sse4.1")]
581#[cfg_attr(test, assert_instr(pmovzxbd))]
582#[stable(feature = "simd_x86", since = "1.27.0")]
583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
584pub const fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
585 unsafe {
586 let a = a.as_u8x16();
587 let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
588 transmute(simd_cast::<_, i32x4>(a))
589 }
590}
591
592#[inline]
596#[target_feature(enable = "sse4.1")]
597#[cfg_attr(test, assert_instr(pmovzxbq))]
598#[stable(feature = "simd_x86", since = "1.27.0")]
599#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
600pub const fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
601 unsafe {
602 let a = a.as_u8x16();
603 let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
604 transmute(simd_cast::<_, i64x2>(a))
605 }
606}
607
608#[inline]
613#[target_feature(enable = "sse4.1")]
614#[cfg_attr(test, assert_instr(pmovzxwd))]
615#[stable(feature = "simd_x86", since = "1.27.0")]
616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
617pub const fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
618 unsafe {
619 let a = a.as_u16x8();
620 let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
621 transmute(simd_cast::<_, i32x4>(a))
622 }
623}
624
625#[inline]
630#[target_feature(enable = "sse4.1")]
631#[cfg_attr(test, assert_instr(pmovzxwq))]
632#[stable(feature = "simd_x86", since = "1.27.0")]
633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
634pub const fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
635 unsafe {
636 let a = a.as_u16x8();
637 let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
638 transmute(simd_cast::<_, i64x2>(a))
639 }
640}
641
642#[inline]
647#[target_feature(enable = "sse4.1")]
648#[cfg_attr(test, assert_instr(pmovzxdq))]
649#[stable(feature = "simd_x86", since = "1.27.0")]
650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
651pub const fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
652 unsafe {
653 let a = a.as_u32x4();
654 let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
655 transmute(simd_cast::<_, i64x2>(a))
656 }
657}
658
659#[inline]
669#[target_feature(enable = "sse4.1")]
670#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
671#[rustc_legacy_const_generics(2)]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673pub fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
674 unsafe {
675 static_assert_uimm_bits!(IMM8, 8);
676 dppd(a, b, IMM8 as u8)
677 }
678}
679
680#[inline]
690#[target_feature(enable = "sse4.1")]
691#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
692#[rustc_legacy_const_generics(2)]
693#[stable(feature = "simd_x86", since = "1.27.0")]
694pub fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
695 static_assert_uimm_bits!(IMM8, 8);
696 unsafe { dpps(a, b, IMM8 as u8) }
697}
698
699#[inline]
705#[target_feature(enable = "sse4.1")]
706#[cfg_attr(test, assert_instr(roundpd))]
707#[stable(feature = "simd_x86", since = "1.27.0")]
708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
709pub const fn _mm_floor_pd(a: __m128d) -> __m128d {
710 unsafe { simd_floor(a) }
711}
712
713#[inline]
719#[target_feature(enable = "sse4.1")]
720#[cfg_attr(test, assert_instr(roundps))]
721#[stable(feature = "simd_x86", since = "1.27.0")]
722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
723pub const fn _mm_floor_ps(a: __m128) -> __m128 {
724 unsafe { simd_floor(a) }
725}
726
727#[inline]
735#[target_feature(enable = "sse4.1")]
736#[cfg_attr(test, assert_instr(roundsd))]
737#[stable(feature = "simd_x86", since = "1.27.0")]
738pub fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
739 unsafe { roundsd(a, b, _MM_FROUND_FLOOR) }
740}
741
742#[inline]
750#[target_feature(enable = "sse4.1")]
751#[cfg_attr(test, assert_instr(roundss))]
752#[stable(feature = "simd_x86", since = "1.27.0")]
753pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
754 unsafe { roundss(a, b, _MM_FROUND_FLOOR) }
755}
756
757#[inline]
763#[target_feature(enable = "sse4.1")]
764#[cfg_attr(test, assert_instr(roundpd))]
765#[stable(feature = "simd_x86", since = "1.27.0")]
766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
767pub const fn _mm_ceil_pd(a: __m128d) -> __m128d {
768 unsafe { simd_ceil(a) }
769}
770
771#[inline]
777#[target_feature(enable = "sse4.1")]
778#[cfg_attr(test, assert_instr(roundps))]
779#[stable(feature = "simd_x86", since = "1.27.0")]
780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
781pub const fn _mm_ceil_ps(a: __m128) -> __m128 {
782 unsafe { simd_ceil(a) }
783}
784
785#[inline]
793#[target_feature(enable = "sse4.1")]
794#[cfg_attr(test, assert_instr(roundsd))]
795#[stable(feature = "simd_x86", since = "1.27.0")]
796pub fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
797 unsafe { roundsd(a, b, _MM_FROUND_CEIL) }
798}
799
800#[inline]
808#[target_feature(enable = "sse4.1")]
809#[cfg_attr(test, assert_instr(roundss))]
810#[stable(feature = "simd_x86", since = "1.27.0")]
811pub fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
812 unsafe { roundss(a, b, _MM_FROUND_CEIL) }
813}
814
815#[inline]
828#[target_feature(enable = "sse4.1")]
829#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
830#[rustc_legacy_const_generics(1)]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
833 static_assert_uimm_bits!(ROUNDING, 4);
834 unsafe { roundpd(a, ROUNDING) }
835}
836
837#[inline]
850#[target_feature(enable = "sse4.1")]
851#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
852#[rustc_legacy_const_generics(1)]
853#[stable(feature = "simd_x86", since = "1.27.0")]
854pub fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
855 static_assert_uimm_bits!(ROUNDING, 4);
856 unsafe { roundps(a, ROUNDING) }
857}
858
859#[inline]
874#[target_feature(enable = "sse4.1")]
875#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
876#[rustc_legacy_const_generics(2)]
877#[stable(feature = "simd_x86", since = "1.27.0")]
878pub fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
879 static_assert_uimm_bits!(ROUNDING, 4);
880 unsafe { roundsd(a, b, ROUNDING) }
881}
882
883#[inline]
898#[target_feature(enable = "sse4.1")]
899#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
900#[rustc_legacy_const_generics(2)]
901#[stable(feature = "simd_x86", since = "1.27.0")]
902pub fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
903 static_assert_uimm_bits!(ROUNDING, 4);
904 unsafe { roundss(a, b, ROUNDING) }
905}
906
907#[inline]
929#[target_feature(enable = "sse4.1")]
930#[cfg_attr(test, assert_instr(phminposuw))]
931#[stable(feature = "simd_x86", since = "1.27.0")]
932pub fn _mm_minpos_epu16(a: __m128i) -> __m128i {
933 unsafe { transmute(phminposuw(a.as_u16x8())) }
934}
935
936#[inline]
941#[target_feature(enable = "sse4.1")]
942#[cfg_attr(test, assert_instr(pmuldq))]
943#[stable(feature = "simd_x86", since = "1.27.0")]
944#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
945pub const fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
946 unsafe {
947 let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
948 let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
949 transmute(simd_mul(a, b))
950 }
951}
952
953#[inline]
962#[target_feature(enable = "sse4.1")]
963#[cfg_attr(test, assert_instr(pmulld))]
964#[stable(feature = "simd_x86", since = "1.27.0")]
965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
966pub const fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
967 unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) }
968}
969
970#[inline]
1004#[target_feature(enable = "sse4.1")]
1005#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
1006#[rustc_legacy_const_generics(2)]
1007#[stable(feature = "simd_x86", since = "1.27.0")]
1008pub fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1009 static_assert_uimm_bits!(IMM8, 3);
1010 unsafe { transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) }
1011}
1012
1013#[inline]
1029#[target_feature(enable = "sse4.1")]
1030#[cfg_attr(test, assert_instr(ptest))]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1033pub const fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
1034 unsafe {
1035 let r = simd_reduce_or(simd_and(a.as_i64x2(), mask.as_i64x2()));
1036 (0i64 == r) as i32
1037 }
1038}
1039
1040#[inline]
1056#[target_feature(enable = "sse4.1")]
1057#[cfg_attr(test, assert_instr(ptest))]
1058#[stable(feature = "simd_x86", since = "1.27.0")]
1059#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1060pub const fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
1061 unsafe {
1062 let r = simd_reduce_or(simd_and(
1063 simd_xor(a.as_i64x2(), i64x2::splat(!0)),
1064 mask.as_i64x2(),
1065 ));
1066 (0i64 == r) as i32
1067 }
1068}
1069
1070#[inline]
1086#[target_feature(enable = "sse4.1")]
1087#[cfg_attr(test, assert_instr(ptest))]
1088#[stable(feature = "simd_x86", since = "1.27.0")]
1089pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
1090 unsafe { ptestnzc(a.as_i64x2(), mask.as_i64x2()) }
1091}
1092
1093#[inline]
1109#[target_feature(enable = "sse4.1")]
1110#[cfg_attr(test, assert_instr(ptest))]
1111#[stable(feature = "simd_x86", since = "1.27.0")]
1112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1113pub const fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
1114 _mm_testz_si128(a, mask)
1115}
1116
1117#[inline]
1131#[target_feature(enable = "sse4.1")]
1132#[cfg_attr(test, assert_instr(pcmpeqd))]
1133#[cfg_attr(test, assert_instr(ptest))]
1134#[stable(feature = "simd_x86", since = "1.27.0")]
1135#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1136pub const fn _mm_test_all_ones(a: __m128i) -> i32 {
1137 _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
1138}
1139
1140#[inline]
1156#[target_feature(enable = "sse4.1")]
1157#[cfg_attr(test, assert_instr(ptest))]
1158#[stable(feature = "simd_x86", since = "1.27.0")]
1159pub fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1160 _mm_testnzc_si128(a, mask)
1161}
1162
1163#[inline]
1169#[target_feature(enable = "sse4.1")]
1170#[cfg_attr(test, assert_instr(movntdqa))]
1171#[stable(feature = "simd_x86_updates", since = "1.82.0")]
1172pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
1173 let dst: __m128i;
1174 crate::arch::asm!(
1175 vpl!("movntdqa {a}"),
1176 a = out(xmm_reg) dst,
1177 p = in(reg) mem_addr,
1178 options(pure, readonly, nostack, preserves_flags),
1179 );
1180 dst
1181}
1182
1183#[allow(improper_ctypes)]
1184unsafe extern "C" {
1185 #[link_name = "llvm.x86.sse41.insertps"]
1186 fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
1187 #[link_name = "llvm.x86.sse41.dppd"]
1188 fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
1189 #[link_name = "llvm.x86.sse41.dpps"]
1190 fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
1191 #[link_name = "llvm.x86.sse41.round.pd"]
1192 fn roundpd(a: __m128d, rounding: i32) -> __m128d;
1193 #[link_name = "llvm.x86.sse41.round.ps"]
1194 fn roundps(a: __m128, rounding: i32) -> __m128;
1195 #[link_name = "llvm.x86.sse41.round.sd"]
1196 fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
1197 #[link_name = "llvm.x86.sse41.round.ss"]
1198 fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
1199 #[link_name = "llvm.x86.sse41.phminposuw"]
1200 fn phminposuw(a: u16x8) -> u16x8;
1201 #[link_name = "llvm.x86.sse41.mpsadbw"]
1202 fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1203 #[link_name = "llvm.x86.sse41.ptestnzc"]
1204 fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
1205}
1206
1207#[cfg(test)]
1208mod tests {
1209 use crate::core_arch::assert_eq_const as assert_eq;
1210 use crate::core_arch::x86::*;
1211 use std::mem;
1212 use stdarch_test::simd_test;
1213
1214 #[simd_test(enable = "sse4.1")]
1215 const fn test_mm_blendv_epi8() {
1216 #[rustfmt::skip]
1217 let a = _mm_setr_epi8(
1218 0, 1, 2, 3, 4, 5, 6, 7,
1219 8, 9, 10, 11, 12, 13, 14, 15,
1220 );
1221 #[rustfmt::skip]
1222 let b = _mm_setr_epi8(
1223 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1224 );
1225 #[rustfmt::skip]
1226 let mask = _mm_setr_epi8(
1227 0, -1, 0, -1, 0, -1, 0, -1,
1228 0, -1, 0, -1, 0, -1, 0, -1,
1229 );
1230 #[rustfmt::skip]
1231 let e = _mm_setr_epi8(
1232 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1233 );
1234 assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
1235 }
1236
1237 #[simd_test(enable = "sse4.1")]
1238 const fn test_mm_blendv_pd() {
1239 let a = _mm_set1_pd(0.0);
1240 let b = _mm_set1_pd(1.0);
1241 let mask = _mm_castsi128_pd(_mm_setr_epi64x(0, -1));
1242 let r = _mm_blendv_pd(a, b, mask);
1243 let e = _mm_setr_pd(0.0, 1.0);
1244 assert_eq_m128d(r, e);
1245 }
1246
1247 #[simd_test(enable = "sse4.1")]
1248 const fn test_mm_blendv_ps() {
1249 let a = _mm_set1_ps(0.0);
1250 let b = _mm_set1_ps(1.0);
1251 let mask = _mm_castsi128_ps(_mm_setr_epi32(0, -1, 0, -1));
1252 let r = _mm_blendv_ps(a, b, mask);
1253 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1254 assert_eq_m128(r, e);
1255 }
1256
1257 #[simd_test(enable = "sse4.1")]
1258 const fn test_mm_blend_pd() {
1259 let a = _mm_set1_pd(0.0);
1260 let b = _mm_set1_pd(1.0);
1261 let r = _mm_blend_pd::<0b10>(a, b);
1262 let e = _mm_setr_pd(0.0, 1.0);
1263 assert_eq_m128d(r, e);
1264 }
1265
1266 #[simd_test(enable = "sse4.1")]
1267 const fn test_mm_blend_ps() {
1268 let a = _mm_set1_ps(0.0);
1269 let b = _mm_set1_ps(1.0);
1270 let r = _mm_blend_ps::<0b1010>(a, b);
1271 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1272 assert_eq_m128(r, e);
1273 }
1274
1275 #[simd_test(enable = "sse4.1")]
1276 const fn test_mm_blend_epi16() {
1277 let a = _mm_set1_epi16(0);
1278 let b = _mm_set1_epi16(1);
1279 let r = _mm_blend_epi16::<0b1010_1100>(a, b);
1280 let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1281 assert_eq_m128i(r, e);
1282 }
1283
1284 #[simd_test(enable = "sse4.1")]
1285 const fn test_mm_extract_ps() {
1286 let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1287 let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
1288 assert_eq!(r, 1.0);
1289 let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
1290 assert_eq!(r, 3.0);
1291 }
1292
1293 #[simd_test(enable = "sse4.1")]
1294 const fn test_mm_extract_epi8() {
1295 #[rustfmt::skip]
1296 let a = _mm_setr_epi8(
1297 -1, 1, 2, 3, 4, 5, 6, 7,
1298 8, 9, 10, 11, 12, 13, 14, 15
1299 );
1300 let r1 = _mm_extract_epi8::<0>(a);
1301 let r2 = _mm_extract_epi8::<3>(a);
1302 assert_eq!(r1, 0xFF);
1303 assert_eq!(r2, 3);
1304 }
1305
1306 #[simd_test(enable = "sse4.1")]
1307 const fn test_mm_extract_epi32() {
1308 let a = _mm_setr_epi32(0, 1, 2, 3);
1309 let r = _mm_extract_epi32::<1>(a);
1310 assert_eq!(r, 1);
1311 let r = _mm_extract_epi32::<3>(a);
1312 assert_eq!(r, 3);
1313 }
1314
1315 #[simd_test(enable = "sse4.1")]
1316 fn test_mm_insert_ps() {
1317 let a = _mm_set1_ps(1.0);
1318 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1319 let r = _mm_insert_ps::<0b11_00_1100>(a, b);
1320 let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1321 assert_eq_m128(r, e);
1322
1323 let a = _mm_set1_ps(1.0);
1325 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1326 let r = _mm_insert_ps::<0b11_00_0001>(a, b);
1327 let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
1328 assert_eq_m128(r, e);
1329 }
1330
1331 #[simd_test(enable = "sse4.1")]
1332 const fn test_mm_insert_epi8() {
1333 let a = _mm_set1_epi8(0);
1334 let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1335 let r = _mm_insert_epi8::<1>(a, 32);
1336 assert_eq_m128i(r, e);
1337 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1338 let r = _mm_insert_epi8::<14>(a, 32);
1339 assert_eq_m128i(r, e);
1340 }
1341
1342 #[simd_test(enable = "sse4.1")]
1343 const fn test_mm_insert_epi32() {
1344 let a = _mm_set1_epi32(0);
1345 let e = _mm_setr_epi32(0, 32, 0, 0);
1346 let r = _mm_insert_epi32::<1>(a, 32);
1347 assert_eq_m128i(r, e);
1348 let e = _mm_setr_epi32(0, 0, 0, 32);
1349 let r = _mm_insert_epi32::<3>(a, 32);
1350 assert_eq_m128i(r, e);
1351 }
1352
1353 #[simd_test(enable = "sse4.1")]
1354 const fn test_mm_max_epi8() {
1355 #[rustfmt::skip]
1356 let a = _mm_setr_epi8(
1357 1, 4, 5, 8, 9, 12, 13, 16,
1358 17, 20, 21, 24, 25, 28, 29, 32,
1359 );
1360 #[rustfmt::skip]
1361 let b = _mm_setr_epi8(
1362 2, 3, 6, 7, 10, 11, 14, 15,
1363 18, 19, 22, 23, 26, 27, 30, 31,
1364 );
1365 let r = _mm_max_epi8(a, b);
1366 #[rustfmt::skip]
1367 let e = _mm_setr_epi8(
1368 2, 4, 6, 8, 10, 12, 14, 16,
1369 18, 20, 22, 24, 26, 28, 30, 32,
1370 );
1371 assert_eq_m128i(r, e);
1372 }
1373
1374 #[simd_test(enable = "sse4.1")]
1375 const fn test_mm_max_epu16() {
1376 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1377 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1378 let r = _mm_max_epu16(a, b);
1379 let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1380 assert_eq_m128i(r, e);
1381 }
1382
1383 #[simd_test(enable = "sse4.1")]
1384 const fn test_mm_max_epi32() {
1385 let a = _mm_setr_epi32(1, 4, 5, 8);
1386 let b = _mm_setr_epi32(2, 3, 6, 7);
1387 let r = _mm_max_epi32(a, b);
1388 let e = _mm_setr_epi32(2, 4, 6, 8);
1389 assert_eq_m128i(r, e);
1390 }
1391
1392 #[simd_test(enable = "sse4.1")]
1393 const fn test_mm_max_epu32() {
1394 let a = _mm_setr_epi32(1, 4, 5, 8);
1395 let b = _mm_setr_epi32(2, 3, 6, 7);
1396 let r = _mm_max_epu32(a, b);
1397 let e = _mm_setr_epi32(2, 4, 6, 8);
1398 assert_eq_m128i(r, e);
1399 }
1400
1401 #[simd_test(enable = "sse4.1")]
1402 const fn test_mm_min_epi8() {
1403 #[rustfmt::skip]
1404 let a = _mm_setr_epi8(
1405 1, 4, 5, 8, 9, 12, 13, 16,
1406 17, 20, 21, 24, 25, 28, 29, 32,
1407 );
1408 #[rustfmt::skip]
1409 let b = _mm_setr_epi8(
1410 2, 3, 6, 7, 10, 11, 14, 15,
1411 18, 19, 22, 23, 26, 27, 30, 31,
1412 );
1413 let r = _mm_min_epi8(a, b);
1414 #[rustfmt::skip]
1415 let e = _mm_setr_epi8(
1416 1, 3, 5, 7, 9, 11, 13, 15,
1417 17, 19, 21, 23, 25, 27, 29, 31,
1418 );
1419 assert_eq_m128i(r, e);
1420
1421 #[rustfmt::skip]
1422 let a = _mm_setr_epi8(
1423 1, -4, -5, 8, -9, -12, 13, -16,
1424 17, 20, 21, 24, 25, 28, 29, 32,
1425 );
1426 #[rustfmt::skip]
1427 let b = _mm_setr_epi8(
1428 2, -3, -6, 7, -10, -11, 14, -15,
1429 18, 19, 22, 23, 26, 27, 30, 31,
1430 );
1431 let r = _mm_min_epi8(a, b);
1432 #[rustfmt::skip]
1433 let e = _mm_setr_epi8(
1434 1, -4, -6, 7, -10, -12, 13, -16,
1435 17, 19, 21, 23, 25, 27, 29, 31,
1436 );
1437 assert_eq_m128i(r, e);
1438 }
1439
1440 #[simd_test(enable = "sse4.1")]
1441 const fn test_mm_min_epu16() {
1442 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1443 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1444 let r = _mm_min_epu16(a, b);
1445 let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1446 assert_eq_m128i(r, e);
1447 }
1448
1449 #[simd_test(enable = "sse4.1")]
1450 const fn test_mm_min_epi32() {
1451 let a = _mm_setr_epi32(1, 4, 5, 8);
1452 let b = _mm_setr_epi32(2, 3, 6, 7);
1453 let r = _mm_min_epi32(a, b);
1454 let e = _mm_setr_epi32(1, 3, 5, 7);
1455 assert_eq_m128i(r, e);
1456
1457 let a = _mm_setr_epi32(-1, 4, 5, -7);
1458 let b = _mm_setr_epi32(-2, 3, -6, 8);
1459 let r = _mm_min_epi32(a, b);
1460 let e = _mm_setr_epi32(-2, 3, -6, -7);
1461 assert_eq_m128i(r, e);
1462 }
1463
1464 #[simd_test(enable = "sse4.1")]
1465 const fn test_mm_min_epu32() {
1466 let a = _mm_setr_epi32(1, 4, 5, 8);
1467 let b = _mm_setr_epi32(2, 3, 6, 7);
1468 let r = _mm_min_epu32(a, b);
1469 let e = _mm_setr_epi32(1, 3, 5, 7);
1470 assert_eq_m128i(r, e);
1471 }
1472
1473 #[simd_test(enable = "sse4.1")]
1474 const fn test_mm_packus_epi32() {
1475 let a = _mm_setr_epi32(1, 2, 3, 4);
1476 let b = _mm_setr_epi32(-1, -2, -3, -4);
1477 let r = _mm_packus_epi32(a, b);
1478 let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1479 assert_eq_m128i(r, e);
1480 }
1481
1482 #[simd_test(enable = "sse4.1")]
1483 const fn test_mm_cmpeq_epi64() {
1484 let a = _mm_setr_epi64x(0, 1);
1485 let b = _mm_setr_epi64x(0, 0);
1486 let r = _mm_cmpeq_epi64(a, b);
1487 let e = _mm_setr_epi64x(-1, 0);
1488 assert_eq_m128i(r, e);
1489 }
1490
1491 #[simd_test(enable = "sse4.1")]
1492 const fn test_mm_cvtepi8_epi16() {
1493 let a = _mm_set1_epi8(10);
1494 let r = _mm_cvtepi8_epi16(a);
1495 let e = _mm_set1_epi16(10);
1496 assert_eq_m128i(r, e);
1497 let a = _mm_set1_epi8(-10);
1498 let r = _mm_cvtepi8_epi16(a);
1499 let e = _mm_set1_epi16(-10);
1500 assert_eq_m128i(r, e);
1501 }
1502
1503 #[simd_test(enable = "sse4.1")]
1504 const fn test_mm_cvtepi8_epi32() {
1505 let a = _mm_set1_epi8(10);
1506 let r = _mm_cvtepi8_epi32(a);
1507 let e = _mm_set1_epi32(10);
1508 assert_eq_m128i(r, e);
1509 let a = _mm_set1_epi8(-10);
1510 let r = _mm_cvtepi8_epi32(a);
1511 let e = _mm_set1_epi32(-10);
1512 assert_eq_m128i(r, e);
1513 }
1514
1515 #[simd_test(enable = "sse4.1")]
1516 const fn test_mm_cvtepi8_epi64() {
1517 let a = _mm_set1_epi8(10);
1518 let r = _mm_cvtepi8_epi64(a);
1519 let e = _mm_set1_epi64x(10);
1520 assert_eq_m128i(r, e);
1521 let a = _mm_set1_epi8(-10);
1522 let r = _mm_cvtepi8_epi64(a);
1523 let e = _mm_set1_epi64x(-10);
1524 assert_eq_m128i(r, e);
1525 }
1526
1527 #[simd_test(enable = "sse4.1")]
1528 const fn test_mm_cvtepi16_epi32() {
1529 let a = _mm_set1_epi16(10);
1530 let r = _mm_cvtepi16_epi32(a);
1531 let e = _mm_set1_epi32(10);
1532 assert_eq_m128i(r, e);
1533 let a = _mm_set1_epi16(-10);
1534 let r = _mm_cvtepi16_epi32(a);
1535 let e = _mm_set1_epi32(-10);
1536 assert_eq_m128i(r, e);
1537 }
1538
1539 #[simd_test(enable = "sse4.1")]
1540 const fn test_mm_cvtepi16_epi64() {
1541 let a = _mm_set1_epi16(10);
1542 let r = _mm_cvtepi16_epi64(a);
1543 let e = _mm_set1_epi64x(10);
1544 assert_eq_m128i(r, e);
1545 let a = _mm_set1_epi16(-10);
1546 let r = _mm_cvtepi16_epi64(a);
1547 let e = _mm_set1_epi64x(-10);
1548 assert_eq_m128i(r, e);
1549 }
1550
1551 #[simd_test(enable = "sse4.1")]
1552 const fn test_mm_cvtepi32_epi64() {
1553 let a = _mm_set1_epi32(10);
1554 let r = _mm_cvtepi32_epi64(a);
1555 let e = _mm_set1_epi64x(10);
1556 assert_eq_m128i(r, e);
1557 let a = _mm_set1_epi32(-10);
1558 let r = _mm_cvtepi32_epi64(a);
1559 let e = _mm_set1_epi64x(-10);
1560 assert_eq_m128i(r, e);
1561 }
1562
1563 #[simd_test(enable = "sse4.1")]
1564 const fn test_mm_cvtepu8_epi16() {
1565 let a = _mm_set1_epi8(10);
1566 let r = _mm_cvtepu8_epi16(a);
1567 let e = _mm_set1_epi16(10);
1568 assert_eq_m128i(r, e);
1569 }
1570
1571 #[simd_test(enable = "sse4.1")]
1572 const fn test_mm_cvtepu8_epi32() {
1573 let a = _mm_set1_epi8(10);
1574 let r = _mm_cvtepu8_epi32(a);
1575 let e = _mm_set1_epi32(10);
1576 assert_eq_m128i(r, e);
1577 }
1578
1579 #[simd_test(enable = "sse4.1")]
1580 const fn test_mm_cvtepu8_epi64() {
1581 let a = _mm_set1_epi8(10);
1582 let r = _mm_cvtepu8_epi64(a);
1583 let e = _mm_set1_epi64x(10);
1584 assert_eq_m128i(r, e);
1585 }
1586
1587 #[simd_test(enable = "sse4.1")]
1588 const fn test_mm_cvtepu16_epi32() {
1589 let a = _mm_set1_epi16(10);
1590 let r = _mm_cvtepu16_epi32(a);
1591 let e = _mm_set1_epi32(10);
1592 assert_eq_m128i(r, e);
1593 }
1594
1595 #[simd_test(enable = "sse4.1")]
1596 const fn test_mm_cvtepu16_epi64() {
1597 let a = _mm_set1_epi16(10);
1598 let r = _mm_cvtepu16_epi64(a);
1599 let e = _mm_set1_epi64x(10);
1600 assert_eq_m128i(r, e);
1601 }
1602
1603 #[simd_test(enable = "sse4.1")]
1604 const fn test_mm_cvtepu32_epi64() {
1605 let a = _mm_set1_epi32(10);
1606 let r = _mm_cvtepu32_epi64(a);
1607 let e = _mm_set1_epi64x(10);
1608 assert_eq_m128i(r, e);
1609 }
1610
1611 #[simd_test(enable = "sse4.1")]
1612 fn test_mm_dp_pd() {
1613 let a = _mm_setr_pd(2.0, 3.0);
1614 let b = _mm_setr_pd(1.0, 4.0);
1615 let e = _mm_setr_pd(14.0, 0.0);
1616 assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
1617 }
1618
1619 #[simd_test(enable = "sse4.1")]
1620 fn test_mm_dp_ps() {
1621 let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1622 let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1623 let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1624 assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
1625 }
1626
1627 #[simd_test(enable = "sse4.1")]
1628 const fn test_mm_floor_pd() {
1629 let a = _mm_setr_pd(2.5, 4.5);
1630 let r = _mm_floor_pd(a);
1631 let e = _mm_setr_pd(2.0, 4.0);
1632 assert_eq_m128d(r, e);
1633 }
1634
1635 #[simd_test(enable = "sse4.1")]
1636 const fn test_mm_floor_ps() {
1637 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1638 let r = _mm_floor_ps(a);
1639 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1640 assert_eq_m128(r, e);
1641 }
1642
1643 #[simd_test(enable = "sse4.1")]
1644 fn test_mm_floor_sd() {
1645 let a = _mm_setr_pd(2.5, 4.5);
1646 let b = _mm_setr_pd(-1.5, -3.5);
1647 let r = _mm_floor_sd(a, b);
1648 let e = _mm_setr_pd(-2.0, 4.5);
1649 assert_eq_m128d(r, e);
1650 }
1651
1652 #[simd_test(enable = "sse4.1")]
1653 fn test_mm_floor_ss() {
1654 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1655 let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1656 let r = _mm_floor_ss(a, b);
1657 let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1658 assert_eq_m128(r, e);
1659 }
1660
1661 #[simd_test(enable = "sse4.1")]
1662 const fn test_mm_ceil_pd() {
1663 let a = _mm_setr_pd(1.5, 3.5);
1664 let r = _mm_ceil_pd(a);
1665 let e = _mm_setr_pd(2.0, 4.0);
1666 assert_eq_m128d(r, e);
1667 }
1668
1669 #[simd_test(enable = "sse4.1")]
1670 const fn test_mm_ceil_ps() {
1671 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1672 let r = _mm_ceil_ps(a);
1673 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1674 assert_eq_m128(r, e);
1675 }
1676
1677 #[simd_test(enable = "sse4.1")]
1678 fn test_mm_ceil_sd() {
1679 let a = _mm_setr_pd(1.5, 3.5);
1680 let b = _mm_setr_pd(-2.5, -4.5);
1681 let r = _mm_ceil_sd(a, b);
1682 let e = _mm_setr_pd(-2.0, 3.5);
1683 assert_eq_m128d(r, e);
1684 }
1685
1686 #[simd_test(enable = "sse4.1")]
1687 fn test_mm_ceil_ss() {
1688 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1689 let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1690 let r = _mm_ceil_ss(a, b);
1691 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1692 assert_eq_m128(r, e);
1693 }
1694
1695 #[simd_test(enable = "sse4.1")]
1696 fn test_mm_round_pd() {
1697 let a = _mm_setr_pd(1.25, 3.75);
1698 let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
1699 let e = _mm_setr_pd(1.0, 4.0);
1700 assert_eq_m128d(r, e);
1701 }
1702
1703 #[simd_test(enable = "sse4.1")]
1704 fn test_mm_round_ps() {
1705 let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1706 let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
1707 let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1708 assert_eq_m128(r, e);
1709 }
1710
1711 #[simd_test(enable = "sse4.1")]
1712 fn test_mm_round_sd() {
1713 let a = _mm_setr_pd(1.5, 3.5);
1714 let b = _mm_setr_pd(-2.5, -4.5);
1715 let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1716 let e = _mm_setr_pd(-2.0, 3.5);
1717 assert_eq_m128d(r, e);
1718
1719 let a = _mm_setr_pd(1.5, 3.5);
1720 let b = _mm_setr_pd(-2.5, -4.5);
1721 let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
1722 let e = _mm_setr_pd(-3.0, 3.5);
1723 assert_eq_m128d(r, e);
1724
1725 let a = _mm_setr_pd(1.5, 3.5);
1726 let b = _mm_setr_pd(-2.5, -4.5);
1727 let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
1728 let e = _mm_setr_pd(-2.0, 3.5);
1729 assert_eq_m128d(r, e);
1730
1731 let a = _mm_setr_pd(1.5, 3.5);
1732 let b = _mm_setr_pd(-2.5, -4.5);
1733 let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
1734 let e = _mm_setr_pd(-2.0, 3.5);
1735 assert_eq_m128d(r, e);
1736 }
1737
1738 #[simd_test(enable = "sse4.1")]
1739 fn test_mm_round_ss() {
1740 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1741 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1742 let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1743 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1744 assert_eq_m128(r, e);
1745
1746 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1747 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1748 let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
1749 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1750 assert_eq_m128(r, e);
1751
1752 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1753 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1754 let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
1755 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1756 assert_eq_m128(r, e);
1757
1758 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1759 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1760 let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
1761 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1762 assert_eq_m128(r, e);
1763 }
1764
1765 #[simd_test(enable = "sse4.1")]
1766 fn test_mm_minpos_epu16_1() {
1767 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1768 let r = _mm_minpos_epu16(a);
1769 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1770 assert_eq_m128i(r, e);
1771 }
1772
1773 #[simd_test(enable = "sse4.1")]
1774 fn test_mm_minpos_epu16_2() {
1775 let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1776 let r = _mm_minpos_epu16(a);
1777 let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1778 assert_eq_m128i(r, e);
1779 }
1780
1781 #[simd_test(enable = "sse4.1")]
1782 fn test_mm_minpos_epu16_3() {
1783 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
1785 let r = _mm_minpos_epu16(a);
1786 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1787 assert_eq_m128i(r, e);
1788 }
1789
1790 #[simd_test(enable = "sse4.1")]
1791 const fn test_mm_mul_epi32() {
1792 {
1793 let a = _mm_setr_epi32(1, 1, 1, 1);
1794 let b = _mm_setr_epi32(1, 2, 3, 4);
1795 let r = _mm_mul_epi32(a, b);
1796 let e = _mm_setr_epi64x(1, 3);
1797 assert_eq_m128i(r, e);
1798 }
1799 {
1800 let a = _mm_setr_epi32(15, 2 , 1234567, 4 );
1801 let b = _mm_setr_epi32(
1802 -20, -256, 666666, 666666, );
1805 let r = _mm_mul_epi32(a, b);
1806 let e = _mm_setr_epi64x(-300, 823043843622);
1807 assert_eq_m128i(r, e);
1808 }
1809 }
1810
1811 #[simd_test(enable = "sse4.1")]
1812 const fn test_mm_mullo_epi32() {
1813 {
1814 let a = _mm_setr_epi32(1, 1, 1, 1);
1815 let b = _mm_setr_epi32(1, 2, 3, 4);
1816 let r = _mm_mullo_epi32(a, b);
1817 let e = _mm_setr_epi32(1, 2, 3, 4);
1818 assert_eq_m128i(r, e);
1819 }
1820 {
1821 let a = _mm_setr_epi32(15, -2, 1234567, 99999);
1822 let b = _mm_setr_epi32(-20, -256, 666666, -99999);
1823 let r = _mm_mullo_epi32(a, b);
1824 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1828 assert_eq_m128i(r, e);
1829 }
1830 }
1831
1832 #[simd_test(enable = "sse4.1")]
1833 fn test_mm_minpos_epu16() {
1834 let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1835 let r = _mm_minpos_epu16(a);
1836 let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1837 assert_eq_m128i(r, e);
1838 }
1839
1840 #[simd_test(enable = "sse4.1")]
1841 fn test_mm_mpsadbw_epu8() {
1842 #[rustfmt::skip]
1843 let a = _mm_setr_epi8(
1844 0, 1, 2, 3, 4, 5, 6, 7,
1845 8, 9, 10, 11, 12, 13, 14, 15,
1846 );
1847
1848 let r = _mm_mpsadbw_epu8::<0b000>(a, a);
1849 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1850 assert_eq_m128i(r, e);
1851
1852 let r = _mm_mpsadbw_epu8::<0b001>(a, a);
1853 let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1854 assert_eq_m128i(r, e);
1855
1856 let r = _mm_mpsadbw_epu8::<0b100>(a, a);
1857 let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1858 assert_eq_m128i(r, e);
1859
1860 let r = _mm_mpsadbw_epu8::<0b101>(a, a);
1861 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1862 assert_eq_m128i(r, e);
1863
1864 let r = _mm_mpsadbw_epu8::<0b111>(a, a);
1865 let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1866 assert_eq_m128i(r, e);
1867 }
1868
1869 #[simd_test(enable = "sse4.1")]
1870 const fn test_mm_testz_si128() {
1871 let a = _mm_set1_epi8(1);
1872 let mask = _mm_set1_epi8(0);
1873 let r = _mm_testz_si128(a, mask);
1874 assert_eq!(r, 1);
1875 let a = _mm_set1_epi8(0b101);
1876 let mask = _mm_set1_epi8(0b110);
1877 let r = _mm_testz_si128(a, mask);
1878 assert_eq!(r, 0);
1879 let a = _mm_set1_epi8(0b011);
1880 let mask = _mm_set1_epi8(0b100);
1881 let r = _mm_testz_si128(a, mask);
1882 assert_eq!(r, 1);
1883 }
1884
1885 #[simd_test(enable = "sse4.1")]
1886 const fn test_mm_testc_si128() {
1887 let a = _mm_set1_epi8(-1);
1888 let mask = _mm_set1_epi8(0);
1889 let r = _mm_testc_si128(a, mask);
1890 assert_eq!(r, 1);
1891 let a = _mm_set1_epi8(0b101);
1892 let mask = _mm_set1_epi8(0b110);
1893 let r = _mm_testc_si128(a, mask);
1894 assert_eq!(r, 0);
1895 let a = _mm_set1_epi8(0b101);
1896 let mask = _mm_set1_epi8(0b100);
1897 let r = _mm_testc_si128(a, mask);
1898 assert_eq!(r, 1);
1899 }
1900
1901 #[simd_test(enable = "sse4.1")]
1902 fn test_mm_testnzc_si128() {
1903 let a = _mm_set1_epi8(0);
1904 let mask = _mm_set1_epi8(1);
1905 let r = _mm_testnzc_si128(a, mask);
1906 assert_eq!(r, 0);
1907 let a = _mm_set1_epi8(-1);
1908 let mask = _mm_set1_epi8(0);
1909 let r = _mm_testnzc_si128(a, mask);
1910 assert_eq!(r, 0);
1911 let a = _mm_set1_epi8(0b101);
1912 let mask = _mm_set1_epi8(0b110);
1913 let r = _mm_testnzc_si128(a, mask);
1914 assert_eq!(r, 1);
1915 let a = _mm_set1_epi8(0b101);
1916 let mask = _mm_set1_epi8(0b101);
1917 let r = _mm_testnzc_si128(a, mask);
1918 assert_eq!(r, 0);
1919 }
1920
1921 #[simd_test(enable = "sse4.1")]
1922 const fn test_mm_test_all_zeros() {
1923 let a = _mm_set1_epi8(1);
1924 let mask = _mm_set1_epi8(0);
1925 let r = _mm_test_all_zeros(a, mask);
1926 assert_eq!(r, 1);
1927 let a = _mm_set1_epi8(0b101);
1928 let mask = _mm_set1_epi8(0b110);
1929 let r = _mm_test_all_zeros(a, mask);
1930 assert_eq!(r, 0);
1931 let a = _mm_set1_epi8(0b011);
1932 let mask = _mm_set1_epi8(0b100);
1933 let r = _mm_test_all_zeros(a, mask);
1934 assert_eq!(r, 1);
1935 }
1936
1937 #[simd_test(enable = "sse4.1")]
1938 const fn test_mm_test_all_ones() {
1939 let a = _mm_set1_epi8(-1);
1940 let r = _mm_test_all_ones(a);
1941 assert_eq!(r, 1);
1942 let a = _mm_set1_epi8(0b101);
1943 let r = _mm_test_all_ones(a);
1944 assert_eq!(r, 0);
1945 }
1946
1947 #[simd_test(enable = "sse4.1")]
1948 fn test_mm_test_mix_ones_zeros() {
1949 let a = _mm_set1_epi8(0);
1950 let mask = _mm_set1_epi8(1);
1951 let r = _mm_test_mix_ones_zeros(a, mask);
1952 assert_eq!(r, 0);
1953 let a = _mm_set1_epi8(-1);
1954 let mask = _mm_set1_epi8(0);
1955 let r = _mm_test_mix_ones_zeros(a, mask);
1956 assert_eq!(r, 0);
1957 let a = _mm_set1_epi8(0b101);
1958 let mask = _mm_set1_epi8(0b110);
1959 let r = _mm_test_mix_ones_zeros(a, mask);
1960 assert_eq!(r, 1);
1961 let a = _mm_set1_epi8(0b101);
1962 let mask = _mm_set1_epi8(0b101);
1963 let r = _mm_test_mix_ones_zeros(a, mask);
1964 assert_eq!(r, 0);
1965 }
1966
1967 #[simd_test(enable = "sse4.1")]
1968 fn test_mm_stream_load_si128() {
1969 let a = _mm_set_epi64x(5, 6);
1970 let r = unsafe { _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _) };
1971 assert_eq_m128i(a, r);
1972 }
1973}