1use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6 intrinsics::sqrtf32,
7 mem, ptr,
8};
9
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13#[inline]
18#[target_feature(enable = "sse")]
19#[cfg_attr(test, assert_instr(addss))]
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
22 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) }
23}
24
25#[inline]
30#[target_feature(enable = "sse")]
31#[cfg_attr(test, assert_instr(addps))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
34 unsafe { simd_add(a, b) }
35}
36
37#[inline]
42#[target_feature(enable = "sse")]
43#[cfg_attr(test, assert_instr(subss))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
46 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) }
47}
48
49#[inline]
54#[target_feature(enable = "sse")]
55#[cfg_attr(test, assert_instr(subps))]
56#[stable(feature = "simd_x86", since = "1.27.0")]
57pub fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
58 unsafe { simd_sub(a, b) }
59}
60
61#[inline]
66#[target_feature(enable = "sse")]
67#[cfg_attr(test, assert_instr(mulss))]
68#[stable(feature = "simd_x86", since = "1.27.0")]
69pub fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
70 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) }
71}
72
73#[inline]
78#[target_feature(enable = "sse")]
79#[cfg_attr(test, assert_instr(mulps))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
82 unsafe { simd_mul(a, b) }
83}
84
85#[inline]
90#[target_feature(enable = "sse")]
91#[cfg_attr(test, assert_instr(divss))]
92#[stable(feature = "simd_x86", since = "1.27.0")]
93pub fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
94 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) }
95}
96
97#[inline]
102#[target_feature(enable = "sse")]
103#[cfg_attr(test, assert_instr(divps))]
104#[stable(feature = "simd_x86", since = "1.27.0")]
105pub fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
106 unsafe { simd_div(a, b) }
107}
108
109#[inline]
114#[target_feature(enable = "sse")]
115#[cfg_attr(test, assert_instr(sqrtss))]
116#[stable(feature = "simd_x86", since = "1.27.0")]
117pub fn _mm_sqrt_ss(a: __m128) -> __m128 {
118 unsafe { simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a))) }
119}
120
121#[inline]
126#[target_feature(enable = "sse")]
127#[cfg_attr(test, assert_instr(sqrtps))]
128#[stable(feature = "simd_x86", since = "1.27.0")]
129pub fn _mm_sqrt_ps(a: __m128) -> __m128 {
130 unsafe { simd_fsqrt(a) }
131}
132
133#[inline]
138#[target_feature(enable = "sse")]
139#[cfg_attr(test, assert_instr(rcpss))]
140#[stable(feature = "simd_x86", since = "1.27.0")]
141pub fn _mm_rcp_ss(a: __m128) -> __m128 {
142 unsafe { rcpss(a) }
143}
144
145#[inline]
150#[target_feature(enable = "sse")]
151#[cfg_attr(test, assert_instr(rcpps))]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub fn _mm_rcp_ps(a: __m128) -> __m128 {
154 unsafe { rcpps(a) }
155}
156
157#[inline]
162#[target_feature(enable = "sse")]
163#[cfg_attr(test, assert_instr(rsqrtss))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub fn _mm_rsqrt_ss(a: __m128) -> __m128 {
166 unsafe { rsqrtss(a) }
167}
168
169#[inline]
174#[target_feature(enable = "sse")]
175#[cfg_attr(test, assert_instr(rsqrtps))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub fn _mm_rsqrt_ps(a: __m128) -> __m128 {
178 unsafe { rsqrtps(a) }
179}
180
181#[inline]
187#[target_feature(enable = "sse")]
188#[cfg_attr(test, assert_instr(minss))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
191 unsafe { minss(a, b) }
192}
193
194#[inline]
199#[target_feature(enable = "sse")]
200#[cfg_attr(test, assert_instr(minps))]
201#[stable(feature = "simd_x86", since = "1.27.0")]
202pub fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
203 unsafe { minps(a, b) }
205}
206
207#[inline]
213#[target_feature(enable = "sse")]
214#[cfg_attr(test, assert_instr(maxss))]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216pub fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
217 unsafe { maxss(a, b) }
218}
219
220#[inline]
225#[target_feature(enable = "sse")]
226#[cfg_attr(test, assert_instr(maxps))]
227#[stable(feature = "simd_x86", since = "1.27.0")]
228pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
229 unsafe { maxps(a, b) }
231}
232
233#[inline]
237#[target_feature(enable = "sse")]
238#[cfg_attr(
240 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
241 assert_instr(andps)
242)]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
245 unsafe {
246 let a: __m128i = mem::transmute(a);
247 let b: __m128i = mem::transmute(b);
248 mem::transmute(simd_and(a, b))
249 }
250}
251
252#[inline]
259#[target_feature(enable = "sse")]
260#[cfg_attr(
263 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
264 assert_instr(andnps)
265)]
266#[stable(feature = "simd_x86", since = "1.27.0")]
267pub fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
268 unsafe {
269 let a: __m128i = mem::transmute(a);
270 let b: __m128i = mem::transmute(b);
271 let mask: __m128i = mem::transmute(i32x4::splat(-1));
272 mem::transmute(simd_and(simd_xor(mask, a), b))
273 }
274}
275
276#[inline]
280#[target_feature(enable = "sse")]
281#[cfg_attr(
283 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
284 assert_instr(orps)
285)]
286#[stable(feature = "simd_x86", since = "1.27.0")]
287pub fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
288 unsafe {
289 let a: __m128i = mem::transmute(a);
290 let b: __m128i = mem::transmute(b);
291 mem::transmute(simd_or(a, b))
292 }
293}
294
295#[inline]
300#[target_feature(enable = "sse")]
301#[cfg_attr(
303 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
304 assert_instr(xorps)
305)]
306#[stable(feature = "simd_x86", since = "1.27.0")]
307pub fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
308 unsafe {
309 let a: __m128i = mem::transmute(a);
310 let b: __m128i = mem::transmute(b);
311 mem::transmute(simd_xor(a, b))
312 }
313}
314
315#[inline]
321#[target_feature(enable = "sse")]
322#[cfg_attr(test, assert_instr(cmpeqss))]
323#[stable(feature = "simd_x86", since = "1.27.0")]
324pub fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
325 unsafe { cmpss(a, b, 0) }
326}
327
328#[inline]
335#[target_feature(enable = "sse")]
336#[cfg_attr(test, assert_instr(cmpltss))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338pub fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
339 unsafe { cmpss(a, b, 1) }
340}
341
342#[inline]
349#[target_feature(enable = "sse")]
350#[cfg_attr(test, assert_instr(cmpless))]
351#[stable(feature = "simd_x86", since = "1.27.0")]
352pub fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
353 unsafe { cmpss(a, b, 2) }
354}
355
356#[inline]
363#[target_feature(enable = "sse")]
364#[cfg_attr(test, assert_instr(cmpltss))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
367 unsafe { simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3]) }
368}
369
370#[inline]
377#[target_feature(enable = "sse")]
378#[cfg_attr(test, assert_instr(cmpless))]
379#[stable(feature = "simd_x86", since = "1.27.0")]
380pub fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
381 unsafe { simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3]) }
382}
383
384#[inline]
391#[target_feature(enable = "sse")]
392#[cfg_attr(test, assert_instr(cmpneqss))]
393#[stable(feature = "simd_x86", since = "1.27.0")]
394pub fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
395 unsafe { cmpss(a, b, 4) }
396}
397
398#[inline]
405#[target_feature(enable = "sse")]
406#[cfg_attr(test, assert_instr(cmpnltss))]
407#[stable(feature = "simd_x86", since = "1.27.0")]
408pub fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
409 unsafe { cmpss(a, b, 5) }
410}
411
412#[inline]
419#[target_feature(enable = "sse")]
420#[cfg_attr(test, assert_instr(cmpnless))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
423 unsafe { cmpss(a, b, 6) }
424}
425
426#[inline]
433#[target_feature(enable = "sse")]
434#[cfg_attr(test, assert_instr(cmpnltss))]
435#[stable(feature = "simd_x86", since = "1.27.0")]
436pub fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
437 unsafe { simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3]) }
438}
439
440#[inline]
447#[target_feature(enable = "sse")]
448#[cfg_attr(test, assert_instr(cmpnless))]
449#[stable(feature = "simd_x86", since = "1.27.0")]
450pub fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
451 unsafe { simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3]) }
452}
453
454#[inline]
461#[target_feature(enable = "sse")]
462#[cfg_attr(test, assert_instr(cmpordss))]
463#[stable(feature = "simd_x86", since = "1.27.0")]
464pub fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
465 unsafe { cmpss(a, b, 7) }
466}
467
468#[inline]
475#[target_feature(enable = "sse")]
476#[cfg_attr(test, assert_instr(cmpunordss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478pub fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
479 unsafe { cmpss(a, b, 3) }
480}
481
482#[inline]
488#[target_feature(enable = "sse")]
489#[cfg_attr(test, assert_instr(cmpeqps))]
490#[stable(feature = "simd_x86", since = "1.27.0")]
491pub fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
492 unsafe { cmpps(a, b, 0) }
493}
494
495#[inline]
501#[target_feature(enable = "sse")]
502#[cfg_attr(test, assert_instr(cmpltps))]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
505 unsafe { cmpps(a, b, 1) }
506}
507
508#[inline]
515#[target_feature(enable = "sse")]
516#[cfg_attr(test, assert_instr(cmpleps))]
517#[stable(feature = "simd_x86", since = "1.27.0")]
518pub fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
519 unsafe { cmpps(a, b, 2) }
520}
521
522#[inline]
528#[target_feature(enable = "sse")]
529#[cfg_attr(test, assert_instr(cmpltps))]
530#[stable(feature = "simd_x86", since = "1.27.0")]
531pub fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
532 unsafe { cmpps(b, a, 1) }
533}
534
535#[inline]
542#[target_feature(enable = "sse")]
543#[cfg_attr(test, assert_instr(cmpleps))]
544#[stable(feature = "simd_x86", since = "1.27.0")]
545pub fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
546 unsafe { cmpps(b, a, 2) }
547}
548
549#[inline]
555#[target_feature(enable = "sse")]
556#[cfg_attr(test, assert_instr(cmpneqps))]
557#[stable(feature = "simd_x86", since = "1.27.0")]
558pub fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
559 unsafe { cmpps(a, b, 4) }
560}
561
562#[inline]
569#[target_feature(enable = "sse")]
570#[cfg_attr(test, assert_instr(cmpnltps))]
571#[stable(feature = "simd_x86", since = "1.27.0")]
572pub fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
573 unsafe { cmpps(a, b, 5) }
574}
575
576#[inline]
583#[target_feature(enable = "sse")]
584#[cfg_attr(test, assert_instr(cmpnleps))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
587 unsafe { cmpps(a, b, 6) }
588}
589
590#[inline]
597#[target_feature(enable = "sse")]
598#[cfg_attr(test, assert_instr(cmpnltps))]
599#[stable(feature = "simd_x86", since = "1.27.0")]
600pub fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
601 unsafe { cmpps(b, a, 5) }
602}
603
604#[inline]
611#[target_feature(enable = "sse")]
612#[cfg_attr(test, assert_instr(cmpnleps))]
613#[stable(feature = "simd_x86", since = "1.27.0")]
614pub fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
615 unsafe { cmpps(b, a, 6) }
616}
617
618#[inline]
625#[target_feature(enable = "sse")]
626#[cfg_attr(test, assert_instr(cmpordps))]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
629 unsafe { cmpps(b, a, 7) }
630}
631
632#[inline]
639#[target_feature(enable = "sse")]
640#[cfg_attr(test, assert_instr(cmpunordps))]
641#[stable(feature = "simd_x86", since = "1.27.0")]
642pub fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
643 unsafe { cmpps(b, a, 3) }
644}
645
646#[inline]
651#[target_feature(enable = "sse")]
652#[cfg_attr(test, assert_instr(comiss))]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
655 unsafe { comieq_ss(a, b) }
656}
657
658#[inline]
663#[target_feature(enable = "sse")]
664#[cfg_attr(test, assert_instr(comiss))]
665#[stable(feature = "simd_x86", since = "1.27.0")]
666pub fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
667 unsafe { comilt_ss(a, b) }
668}
669
670#[inline]
676#[target_feature(enable = "sse")]
677#[cfg_attr(test, assert_instr(comiss))]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
680 unsafe { comile_ss(a, b) }
681}
682
683#[inline]
689#[target_feature(enable = "sse")]
690#[cfg_attr(test, assert_instr(comiss))]
691#[stable(feature = "simd_x86", since = "1.27.0")]
692pub fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
693 unsafe { comigt_ss(a, b) }
694}
695
696#[inline]
702#[target_feature(enable = "sse")]
703#[cfg_attr(test, assert_instr(comiss))]
704#[stable(feature = "simd_x86", since = "1.27.0")]
705pub fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
706 unsafe { comige_ss(a, b) }
707}
708
709#[inline]
714#[target_feature(enable = "sse")]
715#[cfg_attr(test, assert_instr(comiss))]
716#[stable(feature = "simd_x86", since = "1.27.0")]
717pub fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
718 unsafe { comineq_ss(a, b) }
719}
720
721#[inline]
727#[target_feature(enable = "sse")]
728#[cfg_attr(test, assert_instr(ucomiss))]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
731 unsafe { ucomieq_ss(a, b) }
732}
733
734#[inline]
741#[target_feature(enable = "sse")]
742#[cfg_attr(test, assert_instr(ucomiss))]
743#[stable(feature = "simd_x86", since = "1.27.0")]
744pub fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
745 unsafe { ucomilt_ss(a, b) }
746}
747
748#[inline]
755#[target_feature(enable = "sse")]
756#[cfg_attr(test, assert_instr(ucomiss))]
757#[stable(feature = "simd_x86", since = "1.27.0")]
758pub fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
759 unsafe { ucomile_ss(a, b) }
760}
761
762#[inline]
769#[target_feature(enable = "sse")]
770#[cfg_attr(test, assert_instr(ucomiss))]
771#[stable(feature = "simd_x86", since = "1.27.0")]
772pub fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
773 unsafe { ucomigt_ss(a, b) }
774}
775
776#[inline]
783#[target_feature(enable = "sse")]
784#[cfg_attr(test, assert_instr(ucomiss))]
785#[stable(feature = "simd_x86", since = "1.27.0")]
786pub fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
787 unsafe { ucomige_ss(a, b) }
788}
789
790#[inline]
796#[target_feature(enable = "sse")]
797#[cfg_attr(test, assert_instr(ucomiss))]
798#[stable(feature = "simd_x86", since = "1.27.0")]
799pub fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
800 unsafe { ucomineq_ss(a, b) }
801}
802
803#[inline]
813#[target_feature(enable = "sse")]
814#[cfg_attr(test, assert_instr(cvtss2si))]
815#[stable(feature = "simd_x86", since = "1.27.0")]
816pub fn _mm_cvtss_si32(a: __m128) -> i32 {
817 unsafe { cvtss2si(a) }
818}
819
820#[inline]
824#[target_feature(enable = "sse")]
825#[cfg_attr(test, assert_instr(cvtss2si))]
826#[stable(feature = "simd_x86", since = "1.27.0")]
827pub fn _mm_cvt_ss2si(a: __m128) -> i32 {
828 _mm_cvtss_si32(a)
829}
830
831#[inline]
843#[target_feature(enable = "sse")]
844#[cfg_attr(test, assert_instr(cvttss2si))]
845#[stable(feature = "simd_x86", since = "1.27.0")]
846pub fn _mm_cvttss_si32(a: __m128) -> i32 {
847 unsafe { cvttss2si(a) }
848}
849
850#[inline]
854#[target_feature(enable = "sse")]
855#[cfg_attr(test, assert_instr(cvttss2si))]
856#[stable(feature = "simd_x86", since = "1.27.0")]
857pub fn _mm_cvtt_ss2si(a: __m128) -> i32 {
858 _mm_cvttss_si32(a)
859}
860
861#[inline]
865#[target_feature(enable = "sse")]
866#[stable(feature = "simd_x86", since = "1.27.0")]
869pub fn _mm_cvtss_f32(a: __m128) -> f32 {
870 unsafe { simd_extract!(a, 0) }
871}
872
873#[inline]
881#[target_feature(enable = "sse")]
882#[cfg_attr(test, assert_instr(cvtsi2ss))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
885 unsafe { cvtsi2ss(a, b) }
886}
887
888#[inline]
892#[target_feature(enable = "sse")]
893#[cfg_attr(test, assert_instr(cvtsi2ss))]
894#[stable(feature = "simd_x86", since = "1.27.0")]
895pub fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
896 _mm_cvtsi32_ss(a, b)
897}
898
899#[inline]
904#[target_feature(enable = "sse")]
905#[cfg_attr(test, assert_instr(movss))]
906#[stable(feature = "simd_x86", since = "1.27.0")]
907pub fn _mm_set_ss(a: f32) -> __m128 {
908 __m128([a, 0.0, 0.0, 0.0])
909}
910
911#[inline]
915#[target_feature(enable = "sse")]
916#[cfg_attr(test, assert_instr(shufps))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918pub fn _mm_set1_ps(a: f32) -> __m128 {
919 __m128([a, a, a, a])
920}
921
922#[inline]
926#[target_feature(enable = "sse")]
927#[cfg_attr(test, assert_instr(shufps))]
928#[stable(feature = "simd_x86", since = "1.27.0")]
929pub fn _mm_set_ps1(a: f32) -> __m128 {
930 _mm_set1_ps(a)
931}
932
933#[inline]
953#[target_feature(enable = "sse")]
954#[cfg_attr(test, assert_instr(unpcklps))]
955#[stable(feature = "simd_x86", since = "1.27.0")]
956pub fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
957 __m128([d, c, b, a])
958}
959
960#[inline]
971#[target_feature(enable = "sse")]
972#[cfg_attr(
973 all(test, any(target_env = "msvc", target_arch = "x86_64")),
974 assert_instr(unpcklps)
975)]
976#[cfg_attr(
978 all(test, all(not(target_env = "msvc"), target_arch = "x86")),
979 assert_instr(movaps)
980)]
981#[stable(feature = "simd_x86", since = "1.27.0")]
982pub fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
983 __m128([a, b, c, d])
984}
985
986#[inline]
990#[target_feature(enable = "sse")]
991#[cfg_attr(test, assert_instr(xorps))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm_setzero_ps() -> __m128 {
994 const { unsafe { mem::zeroed() } }
995}
996
997#[inline]
1000#[allow(non_snake_case)]
1001#[unstable(feature = "stdarch_x86_mm_shuffle", issue = "111147")]
1002pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
1003 ((z << 6) | (y << 4) | (x << 2) | w) as i32
1004}
1005
1006#[inline]
1020#[target_feature(enable = "sse")]
1021#[cfg_attr(test, assert_instr(shufps, MASK = 3))]
1022#[rustc_legacy_const_generics(2)]
1023#[stable(feature = "simd_x86", since = "1.27.0")]
1024pub fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
1025 static_assert_uimm_bits!(MASK, 8);
1026 unsafe {
1027 simd_shuffle!(
1028 a,
1029 b,
1030 [
1031 MASK as u32 & 0b11,
1032 (MASK as u32 >> 2) & 0b11,
1033 ((MASK as u32 >> 4) & 0b11) + 4,
1034 ((MASK as u32 >> 6) & 0b11) + 4,
1035 ],
1036 )
1037 }
1038}
1039
1040#[inline]
1045#[target_feature(enable = "sse")]
1046#[cfg_attr(test, assert_instr(unpckhps))]
1047#[stable(feature = "simd_x86", since = "1.27.0")]
1048pub fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
1049 unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
1050}
1051
1052#[inline]
1057#[target_feature(enable = "sse")]
1058#[cfg_attr(test, assert_instr(unpcklps))]
1059#[stable(feature = "simd_x86", since = "1.27.0")]
1060pub fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
1061 unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
1062}
1063
1064#[inline]
1069#[target_feature(enable = "sse")]
1070#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhlps))]
1071#[stable(feature = "simd_x86", since = "1.27.0")]
1072pub fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
1073 unsafe { simd_shuffle!(a, b, [6, 7, 2, 3]) }
1075}
1076
1077#[inline]
1082#[target_feature(enable = "sse")]
1083#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1085pub fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
1086 unsafe { simd_shuffle!(a, b, [0, 1, 4, 5]) }
1087}
1088
1089#[inline]
1096#[target_feature(enable = "sse")]
1097#[cfg_attr(test, assert_instr(movmskps))]
1098#[stable(feature = "simd_x86", since = "1.27.0")]
1099pub fn _mm_movemask_ps(a: __m128) -> i32 {
1100 unsafe {
1103 let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO);
1104 simd_bitmask::<i32x4, u8>(mask).into()
1105 }
1106}
1107
1108#[inline]
1115#[target_feature(enable = "sse")]
1116#[cfg_attr(test, assert_instr(movss))]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1118pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
1119 __m128([*p, 0.0, 0.0, 0.0])
1120}
1121
1122#[inline]
1130#[target_feature(enable = "sse")]
1131#[cfg_attr(test, assert_instr(movss))]
1132#[stable(feature = "simd_x86", since = "1.27.0")]
1133pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
1134 let a = *p;
1135 __m128([a, a, a, a])
1136}
1137
1138#[inline]
1142#[target_feature(enable = "sse")]
1143#[cfg_attr(test, assert_instr(movss))]
1144#[stable(feature = "simd_x86", since = "1.27.0")]
1145pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
1146 _mm_load1_ps(p)
1147}
1148
1149#[inline]
1160#[target_feature(enable = "sse")]
1161#[cfg_attr(test, assert_instr(movaps))]
1162#[stable(feature = "simd_x86", since = "1.27.0")]
1163#[allow(clippy::cast_ptr_alignment)]
1164pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
1165 *(p as *const __m128)
1166}
1167
1168#[inline]
1178#[target_feature(enable = "sse")]
1179#[cfg_attr(test, assert_instr(movups))]
1180#[stable(feature = "simd_x86", since = "1.27.0")]
1181pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
1182 let mut dst = _mm_undefined_ps();
1185 ptr::copy_nonoverlapping(
1186 p as *const u8,
1187 ptr::addr_of_mut!(dst) as *mut u8,
1188 mem::size_of::<__m128>(),
1189 );
1190 dst
1191}
1192
1193#[inline]
1215#[target_feature(enable = "sse")]
1216#[cfg_attr(test, assert_instr(movaps))]
1217#[stable(feature = "simd_x86", since = "1.27.0")]
1218pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
1219 let a = _mm_load_ps(p);
1220 simd_shuffle!(a, a, [3, 2, 1, 0])
1221}
1222
1223#[inline]
1229#[target_feature(enable = "sse")]
1230#[cfg_attr(test, assert_instr(movss))]
1231#[stable(feature = "simd_x86", since = "1.27.0")]
1232pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
1233 *p = simd_extract!(a, 0);
1234}
1235
1236#[inline]
1255#[target_feature(enable = "sse")]
1256#[cfg_attr(test, assert_instr(movaps))]
1257#[stable(feature = "simd_x86", since = "1.27.0")]
1258#[allow(clippy::cast_ptr_alignment)]
1259pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
1260 let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]);
1261 *(p as *mut __m128) = b;
1262}
1263
1264#[inline]
1268#[target_feature(enable = "sse")]
1269#[cfg_attr(test, assert_instr(movaps))]
1270#[stable(feature = "simd_x86", since = "1.27.0")]
1271pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
1272 _mm_store1_ps(p, a);
1273}
1274
1275#[inline]
1287#[target_feature(enable = "sse")]
1288#[cfg_attr(test, assert_instr(movaps))]
1289#[stable(feature = "simd_x86", since = "1.27.0")]
1290#[allow(clippy::cast_ptr_alignment)]
1291pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
1292 *(p as *mut __m128) = a;
1293}
1294
1295#[inline]
1303#[target_feature(enable = "sse")]
1304#[cfg_attr(test, assert_instr(movups))]
1305#[stable(feature = "simd_x86", since = "1.27.0")]
1306pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
1307 ptr::copy_nonoverlapping(
1308 ptr::addr_of!(a) as *const u8,
1309 p as *mut u8,
1310 mem::size_of::<__m128>(),
1311 );
1312}
1313
1314#[inline]
1331#[target_feature(enable = "sse")]
1332#[cfg_attr(test, assert_instr(movaps))]
1333#[stable(feature = "simd_x86", since = "1.27.0")]
1334#[allow(clippy::cast_ptr_alignment)]
1335pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
1336 let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]);
1337 *(p as *mut __m128) = b;
1338}
1339
1340#[inline]
1350#[target_feature(enable = "sse")]
1351#[cfg_attr(test, assert_instr(movss))]
1352#[stable(feature = "simd_x86", since = "1.27.0")]
1353pub fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
1354 unsafe { simd_shuffle!(a, b, [4, 1, 2, 3]) }
1355}
1356
1357#[inline]
1425#[target_feature(enable = "sse")]
1426#[cfg_attr(test, assert_instr(sfence))]
1427#[stable(feature = "simd_x86", since = "1.27.0")]
1428pub unsafe fn _mm_sfence() {
1429 sfence()
1430}
1431
1432#[inline]
1447#[target_feature(enable = "sse")]
1448#[cfg_attr(test, assert_instr(stmxcsr))]
1449#[stable(feature = "simd_x86", since = "1.27.0")]
1450#[deprecated(
1451 since = "1.75.0",
1452 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1453)]
1454pub unsafe fn _mm_getcsr() -> u32 {
1455 unsafe {
1456 let mut result = 0_i32;
1457 stmxcsr(ptr::addr_of_mut!(result) as *mut i8);
1458 result as u32
1459 }
1460}
1461
1462#[inline]
1596#[target_feature(enable = "sse")]
1597#[cfg_attr(test, assert_instr(ldmxcsr))]
1598#[stable(feature = "simd_x86", since = "1.27.0")]
1599#[deprecated(
1600 since = "1.75.0",
1601 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1602)]
1603pub unsafe fn _mm_setcsr(val: u32) {
1604 ldmxcsr(ptr::addr_of!(val) as *const i8);
1605}
1606
1607#[stable(feature = "simd_x86", since = "1.27.0")]
1609pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
1610#[stable(feature = "simd_x86", since = "1.27.0")]
1612pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
1613#[stable(feature = "simd_x86", since = "1.27.0")]
1615pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
1616#[stable(feature = "simd_x86", since = "1.27.0")]
1618pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
1619#[stable(feature = "simd_x86", since = "1.27.0")]
1621pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
1622#[stable(feature = "simd_x86", since = "1.27.0")]
1624pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
1625#[stable(feature = "simd_x86", since = "1.27.0")]
1627pub const _MM_EXCEPT_MASK: u32 = 0x003f;
1628
1629#[stable(feature = "simd_x86", since = "1.27.0")]
1631pub const _MM_MASK_INVALID: u32 = 0x0080;
1632#[stable(feature = "simd_x86", since = "1.27.0")]
1634pub const _MM_MASK_DENORM: u32 = 0x0100;
1635#[stable(feature = "simd_x86", since = "1.27.0")]
1637pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
1638#[stable(feature = "simd_x86", since = "1.27.0")]
1640pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
1641#[stable(feature = "simd_x86", since = "1.27.0")]
1643pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
1644#[stable(feature = "simd_x86", since = "1.27.0")]
1646pub const _MM_MASK_INEXACT: u32 = 0x1000;
1647#[stable(feature = "simd_x86", since = "1.27.0")]
1649pub const _MM_MASK_MASK: u32 = 0x1f80;
1650
1651#[stable(feature = "simd_x86", since = "1.27.0")]
1653pub const _MM_ROUND_NEAREST: u32 = 0x0000;
1654#[stable(feature = "simd_x86", since = "1.27.0")]
1656pub const _MM_ROUND_DOWN: u32 = 0x2000;
1657#[stable(feature = "simd_x86", since = "1.27.0")]
1659pub const _MM_ROUND_UP: u32 = 0x4000;
1660#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
1663
1664#[stable(feature = "simd_x86", since = "1.27.0")]
1666pub const _MM_ROUND_MASK: u32 = 0x6000;
1667
1668#[stable(feature = "simd_x86", since = "1.27.0")]
1670pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
1671#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
1674#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
1677
1678#[inline]
1682#[allow(deprecated)] #[allow(non_snake_case)]
1684#[target_feature(enable = "sse")]
1685#[stable(feature = "simd_x86", since = "1.27.0")]
1686#[deprecated(
1687 since = "1.75.0",
1688 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1689)]
1690pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
1691 _mm_getcsr() & _MM_MASK_MASK
1692}
1693
1694#[inline]
1698#[allow(deprecated)] #[allow(non_snake_case)]
1700#[target_feature(enable = "sse")]
1701#[stable(feature = "simd_x86", since = "1.27.0")]
1702#[deprecated(
1703 since = "1.75.0",
1704 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1705)]
1706pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
1707 _mm_getcsr() & _MM_EXCEPT_MASK
1708}
1709
1710#[inline]
1714#[allow(deprecated)] #[allow(non_snake_case)]
1716#[target_feature(enable = "sse")]
1717#[stable(feature = "simd_x86", since = "1.27.0")]
1718#[deprecated(
1719 since = "1.75.0",
1720 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1721)]
1722pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
1723 _mm_getcsr() & _MM_FLUSH_ZERO_MASK
1724}
1725
1726#[inline]
1730#[allow(deprecated)] #[allow(non_snake_case)]
1732#[target_feature(enable = "sse")]
1733#[stable(feature = "simd_x86", since = "1.27.0")]
1734#[deprecated(
1735 since = "1.75.0",
1736 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1737)]
1738pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
1739 _mm_getcsr() & _MM_ROUND_MASK
1740}
1741
1742#[inline]
1746#[allow(deprecated)] #[allow(non_snake_case)]
1748#[target_feature(enable = "sse")]
1749#[stable(feature = "simd_x86", since = "1.27.0")]
1750#[deprecated(
1751 since = "1.75.0",
1752 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1753)]
1754pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
1755 _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | (x & _MM_MASK_MASK))
1756}
1757
1758#[inline]
1762#[allow(deprecated)] #[allow(non_snake_case)]
1764#[target_feature(enable = "sse")]
1765#[stable(feature = "simd_x86", since = "1.27.0")]
1766#[deprecated(
1767 since = "1.75.0",
1768 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1769)]
1770pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
1771 _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | (x & _MM_EXCEPT_MASK))
1772}
1773
1774#[inline]
1778#[allow(deprecated)] #[allow(non_snake_case)]
1780#[target_feature(enable = "sse")]
1781#[stable(feature = "simd_x86", since = "1.27.0")]
1782#[deprecated(
1783 since = "1.75.0",
1784 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1785)]
1786pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
1787 _mm_setcsr((_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | (x & _MM_FLUSH_ZERO_MASK))
1788}
1789
1790#[inline]
1794#[allow(deprecated)] #[allow(non_snake_case)]
1796#[target_feature(enable = "sse")]
1797#[stable(feature = "simd_x86", since = "1.27.0")]
1798#[deprecated(
1799 since = "1.75.0",
1800 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1801)]
1802pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
1803 _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | (x & _MM_ROUND_MASK))
1804}
1805
1806#[stable(feature = "simd_x86", since = "1.27.0")]
1808pub const _MM_HINT_T0: i32 = 3;
1809
1810#[stable(feature = "simd_x86", since = "1.27.0")]
1812pub const _MM_HINT_T1: i32 = 2;
1813
1814#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub const _MM_HINT_T2: i32 = 1;
1817
1818#[stable(feature = "simd_x86", since = "1.27.0")]
1820pub const _MM_HINT_NTA: i32 = 0;
1821
1822#[stable(feature = "simd_x86", since = "1.27.0")]
1824pub const _MM_HINT_ET0: i32 = 7;
1825
1826#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub const _MM_HINT_ET1: i32 = 6;
1829
1830#[inline]
1873#[target_feature(enable = "sse")]
1874#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))]
1875#[cfg_attr(test, assert_instr(prefetcht1, STRATEGY = _MM_HINT_T1))]
1876#[cfg_attr(test, assert_instr(prefetcht2, STRATEGY = _MM_HINT_T2))]
1877#[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))]
1878#[rustc_legacy_const_generics(1)]
1879#[stable(feature = "simd_x86", since = "1.27.0")]
1880pub unsafe fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
1881 static_assert_uimm_bits!(STRATEGY, 3);
1882 prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
1885}
1886
1887#[inline]
1894#[target_feature(enable = "sse")]
1895#[stable(feature = "simd_x86", since = "1.27.0")]
1896pub fn _mm_undefined_ps() -> __m128 {
1897 const { unsafe { mem::zeroed() } }
1898}
1899
1900#[inline]
1904#[allow(non_snake_case)]
1905#[target_feature(enable = "sse")]
1906#[stable(feature = "simd_x86", since = "1.27.0")]
1907pub fn _MM_TRANSPOSE4_PS(
1908 row0: &mut __m128,
1909 row1: &mut __m128,
1910 row2: &mut __m128,
1911 row3: &mut __m128,
1912) {
1913 let tmp0 = _mm_unpacklo_ps(*row0, *row1);
1914 let tmp2 = _mm_unpacklo_ps(*row2, *row3);
1915 let tmp1 = _mm_unpackhi_ps(*row0, *row1);
1916 let tmp3 = _mm_unpackhi_ps(*row2, *row3);
1917
1918 *row0 = _mm_movelh_ps(tmp0, tmp2);
1919 *row1 = _mm_movehl_ps(tmp2, tmp0);
1920 *row2 = _mm_movelh_ps(tmp1, tmp3);
1921 *row3 = _mm_movehl_ps(tmp3, tmp1);
1922}
1923
1924#[allow(improper_ctypes)]
1925unsafe extern "C" {
1926 #[link_name = "llvm.x86.sse.rcp.ss"]
1927 fn rcpss(a: __m128) -> __m128;
1928 #[link_name = "llvm.x86.sse.rcp.ps"]
1929 fn rcpps(a: __m128) -> __m128;
1930 #[link_name = "llvm.x86.sse.rsqrt.ss"]
1931 fn rsqrtss(a: __m128) -> __m128;
1932 #[link_name = "llvm.x86.sse.rsqrt.ps"]
1933 fn rsqrtps(a: __m128) -> __m128;
1934 #[link_name = "llvm.x86.sse.min.ss"]
1935 fn minss(a: __m128, b: __m128) -> __m128;
1936 #[link_name = "llvm.x86.sse.min.ps"]
1937 fn minps(a: __m128, b: __m128) -> __m128;
1938 #[link_name = "llvm.x86.sse.max.ss"]
1939 fn maxss(a: __m128, b: __m128) -> __m128;
1940 #[link_name = "llvm.x86.sse.max.ps"]
1941 fn maxps(a: __m128, b: __m128) -> __m128;
1942 #[link_name = "llvm.x86.sse.cmp.ps"]
1943 fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
1944 #[link_name = "llvm.x86.sse.comieq.ss"]
1945 fn comieq_ss(a: __m128, b: __m128) -> i32;
1946 #[link_name = "llvm.x86.sse.comilt.ss"]
1947 fn comilt_ss(a: __m128, b: __m128) -> i32;
1948 #[link_name = "llvm.x86.sse.comile.ss"]
1949 fn comile_ss(a: __m128, b: __m128) -> i32;
1950 #[link_name = "llvm.x86.sse.comigt.ss"]
1951 fn comigt_ss(a: __m128, b: __m128) -> i32;
1952 #[link_name = "llvm.x86.sse.comige.ss"]
1953 fn comige_ss(a: __m128, b: __m128) -> i32;
1954 #[link_name = "llvm.x86.sse.comineq.ss"]
1955 fn comineq_ss(a: __m128, b: __m128) -> i32;
1956 #[link_name = "llvm.x86.sse.ucomieq.ss"]
1957 fn ucomieq_ss(a: __m128, b: __m128) -> i32;
1958 #[link_name = "llvm.x86.sse.ucomilt.ss"]
1959 fn ucomilt_ss(a: __m128, b: __m128) -> i32;
1960 #[link_name = "llvm.x86.sse.ucomile.ss"]
1961 fn ucomile_ss(a: __m128, b: __m128) -> i32;
1962 #[link_name = "llvm.x86.sse.ucomigt.ss"]
1963 fn ucomigt_ss(a: __m128, b: __m128) -> i32;
1964 #[link_name = "llvm.x86.sse.ucomige.ss"]
1965 fn ucomige_ss(a: __m128, b: __m128) -> i32;
1966 #[link_name = "llvm.x86.sse.ucomineq.ss"]
1967 fn ucomineq_ss(a: __m128, b: __m128) -> i32;
1968 #[link_name = "llvm.x86.sse.cvtss2si"]
1969 fn cvtss2si(a: __m128) -> i32;
1970 #[link_name = "llvm.x86.sse.cvttss2si"]
1971 fn cvttss2si(a: __m128) -> i32;
1972 #[link_name = "llvm.x86.sse.cvtsi2ss"]
1973 fn cvtsi2ss(a: __m128, b: i32) -> __m128;
1974 #[link_name = "llvm.x86.sse.sfence"]
1975 fn sfence();
1976 #[link_name = "llvm.x86.sse.stmxcsr"]
1977 fn stmxcsr(p: *mut i8);
1978 #[link_name = "llvm.x86.sse.ldmxcsr"]
1979 fn ldmxcsr(p: *const i8);
1980 #[link_name = "llvm.prefetch"]
1981 fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
1982 #[link_name = "llvm.x86.sse.cmp.ss"]
1983 fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
1984}
1985
1986#[inline]
2002#[target_feature(enable = "sse")]
2003#[cfg_attr(test, assert_instr(movntps))]
2004#[stable(feature = "simd_x86", since = "1.27.0")]
2005#[allow(clippy::cast_ptr_alignment)]
2006pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
2007 crate::arch::asm!(
2008 vps!("movntps", ",{a}"),
2009 p = in(reg) mem_addr,
2010 a = in(xmm_reg) a,
2011 options(nostack, preserves_flags),
2012 );
2013}
2014
2015#[cfg(test)]
2016mod tests {
2017 use crate::{hint::black_box, mem::transmute, ptr};
2018 use std::boxed;
2019 use stdarch_test::simd_test;
2020
2021 use crate::core_arch::{simd::*, x86::*};
2022
2023 const NAN: f32 = f32::NAN;
2024
2025 #[simd_test(enable = "sse")]
2026 unsafe fn test_mm_add_ps() {
2027 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2028 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2029 let r = _mm_add_ps(a, b);
2030 assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0));
2031 }
2032
2033 #[simd_test(enable = "sse")]
2034 unsafe fn test_mm_add_ss() {
2035 let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
2036 let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
2037 let r = _mm_add_ss(a, b);
2038 assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
2039 }
2040
2041 #[simd_test(enable = "sse")]
2042 unsafe fn test_mm_sub_ps() {
2043 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2044 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2045 let r = _mm_sub_ps(a, b);
2046 assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0));
2047 }
2048
2049 #[simd_test(enable = "sse")]
2050 unsafe fn test_mm_sub_ss() {
2051 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2052 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2053 let r = _mm_sub_ss(a, b);
2054 assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
2055 }
2056
2057 #[simd_test(enable = "sse")]
2058 unsafe fn test_mm_mul_ps() {
2059 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2060 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2061 let r = _mm_mul_ps(a, b);
2062 assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0));
2063 }
2064
2065 #[simd_test(enable = "sse")]
2066 unsafe fn test_mm_mul_ss() {
2067 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2068 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2069 let r = _mm_mul_ss(a, b);
2070 assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
2071 }
2072
2073 #[simd_test(enable = "sse")]
2074 unsafe fn test_mm_div_ps() {
2075 let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
2076 let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
2077 let r = _mm_div_ps(a, b);
2078 assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0));
2079 }
2080
2081 #[simd_test(enable = "sse")]
2082 unsafe fn test_mm_div_ss() {
2083 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2084 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2085 let r = _mm_div_ss(a, b);
2086 assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
2087 }
2088
2089 #[simd_test(enable = "sse")]
2090 unsafe fn test_mm_sqrt_ss() {
2091 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2092 let r = _mm_sqrt_ss(a);
2093 let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
2094 assert_eq_m128(r, e);
2095 }
2096
2097 #[simd_test(enable = "sse")]
2098 unsafe fn test_mm_sqrt_ps() {
2099 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2100 let r = _mm_sqrt_ps(a);
2101 let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
2102 assert_eq_m128(r, e);
2103 }
2104
2105 #[simd_test(enable = "sse")]
2106 unsafe fn test_mm_rcp_ss() {
2107 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2108 let r = _mm_rcp_ss(a);
2109 let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
2110 let rel_err = 0.00048828125;
2111 assert_approx_eq!(get_m128(r, 0), get_m128(e, 0), 2. * rel_err);
2112 for i in 1..4 {
2113 assert_eq!(get_m128(r, i), get_m128(e, i));
2114 }
2115 }
2116
2117 #[simd_test(enable = "sse")]
2118 unsafe fn test_mm_rcp_ps() {
2119 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2120 let r = _mm_rcp_ps(a);
2121 let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
2122 let rel_err = 0.00048828125;
2123 for i in 0..4 {
2124 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2125 }
2126 }
2127
2128 #[simd_test(enable = "sse")]
2129 unsafe fn test_mm_rsqrt_ss() {
2130 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2131 let r = _mm_rsqrt_ss(a);
2132 let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
2133 let rel_err = 0.00048828125;
2134 for i in 0..4 {
2135 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2136 }
2137 }
2138
2139 #[simd_test(enable = "sse")]
2140 unsafe fn test_mm_rsqrt_ps() {
2141 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2142 let r = _mm_rsqrt_ps(a);
2143 let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
2144 let rel_err = 0.00048828125;
2145 for i in 0..4 {
2146 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2147 }
2148 }
2149
2150 #[simd_test(enable = "sse")]
2151 unsafe fn test_mm_min_ss() {
2152 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2153 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2154 let r = _mm_min_ss(a, b);
2155 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2156 }
2157
2158 #[simd_test(enable = "sse")]
2159 unsafe fn test_mm_min_ps() {
2160 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2161 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2162 let r = _mm_min_ps(a, b);
2163 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2164
2165 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2171 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2172 let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
2173 let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
2174 let a: [u8; 16] = transmute(a);
2175 let b: [u8; 16] = transmute(b);
2176 assert_eq!(r1, b);
2177 assert_eq!(r2, a);
2178 assert_ne!(a, b); }
2180
2181 #[simd_test(enable = "sse")]
2182 unsafe fn test_mm_max_ss() {
2183 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2184 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2185 let r = _mm_max_ss(a, b);
2186 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
2187 }
2188
2189 #[simd_test(enable = "sse")]
2190 unsafe fn test_mm_max_ps() {
2191 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2192 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2193 let r = _mm_max_ps(a, b);
2194 assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
2195
2196 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2198 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2199 let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
2200 let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
2201 let a: [u8; 16] = transmute(a);
2202 let b: [u8; 16] = transmute(b);
2203 assert_eq!(r1, b);
2204 assert_eq!(r2, a);
2205 assert_ne!(a, b); }
2207
2208 #[simd_test(enable = "sse")]
2209 unsafe fn test_mm_and_ps() {
2210 let a = transmute(u32x4::splat(0b0011));
2211 let b = transmute(u32x4::splat(0b0101));
2212 let r = _mm_and_ps(*black_box(&a), *black_box(&b));
2213 let e = transmute(u32x4::splat(0b0001));
2214 assert_eq_m128(r, e);
2215 }
2216
2217 #[simd_test(enable = "sse")]
2218 unsafe fn test_mm_andnot_ps() {
2219 let a = transmute(u32x4::splat(0b0011));
2220 let b = transmute(u32x4::splat(0b0101));
2221 let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
2222 let e = transmute(u32x4::splat(0b0100));
2223 assert_eq_m128(r, e);
2224 }
2225
2226 #[simd_test(enable = "sse")]
2227 unsafe fn test_mm_or_ps() {
2228 let a = transmute(u32x4::splat(0b0011));
2229 let b = transmute(u32x4::splat(0b0101));
2230 let r = _mm_or_ps(*black_box(&a), *black_box(&b));
2231 let e = transmute(u32x4::splat(0b0111));
2232 assert_eq_m128(r, e);
2233 }
2234
2235 #[simd_test(enable = "sse")]
2236 unsafe fn test_mm_xor_ps() {
2237 let a = transmute(u32x4::splat(0b0011));
2238 let b = transmute(u32x4::splat(0b0101));
2239 let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
2240 let e = transmute(u32x4::splat(0b0110));
2241 assert_eq_m128(r, e);
2242 }
2243
2244 #[simd_test(enable = "sse")]
2245 unsafe fn test_mm_cmpeq_ss() {
2246 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2247 let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
2248 let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
2249 let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
2250 assert_eq!(r, e);
2251
2252 let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2253 let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
2254 let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
2255 assert_eq!(r2, e2);
2256 }
2257
2258 #[simd_test(enable = "sse")]
2259 unsafe fn test_mm_cmplt_ss() {
2260 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2261 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2262 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2263 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2264
2265 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
2270 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2271 assert_eq!(rb, eb);
2272
2273 let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
2274 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2275 assert_eq!(rc, ec);
2276
2277 let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
2278 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2279 assert_eq!(rd, ed);
2280 }
2281
2282 #[simd_test(enable = "sse")]
2283 unsafe fn test_mm_cmple_ss() {
2284 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2285 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2286 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2287 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2288
2289 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
2294 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2295 assert_eq!(rb, eb);
2296
2297 let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
2298 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2299 assert_eq!(rc, ec);
2300
2301 let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
2302 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2303 assert_eq!(rd, ed);
2304 }
2305
2306 #[simd_test(enable = "sse")]
2307 unsafe fn test_mm_cmpgt_ss() {
2308 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2309 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2310 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2311 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2312
2313 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
2318 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2319 assert_eq!(rb, eb);
2320
2321 let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
2322 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2323 assert_eq!(rc, ec);
2324
2325 let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
2326 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2327 assert_eq!(rd, ed);
2328 }
2329
2330 #[simd_test(enable = "sse")]
2331 unsafe fn test_mm_cmpge_ss() {
2332 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2333 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2334 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2335 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2336
2337 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
2342 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2343 assert_eq!(rb, eb);
2344
2345 let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
2346 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2347 assert_eq!(rc, ec);
2348
2349 let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
2350 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2351 assert_eq!(rd, ed);
2352 }
2353
2354 #[simd_test(enable = "sse")]
2355 unsafe fn test_mm_cmpneq_ss() {
2356 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2357 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2358 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2359 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2360
2361 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
2366 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2367 assert_eq!(rb, eb);
2368
2369 let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
2370 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2371 assert_eq!(rc, ec);
2372
2373 let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
2374 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2375 assert_eq!(rd, ed);
2376 }
2377
2378 #[simd_test(enable = "sse")]
2379 unsafe fn test_mm_cmpnlt_ss() {
2380 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2386 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2387 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2388 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2389
2390 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
2395 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2396 assert_eq!(rb, eb);
2397
2398 let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
2399 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2400 assert_eq!(rc, ec);
2401
2402 let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
2403 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2404 assert_eq!(rd, ed);
2405 }
2406
2407 #[simd_test(enable = "sse")]
2408 unsafe fn test_mm_cmpnle_ss() {
2409 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2415 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2416 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2417 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2418
2419 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
2424 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2425 assert_eq!(rb, eb);
2426
2427 let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
2428 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2429 assert_eq!(rc, ec);
2430
2431 let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
2432 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2433 assert_eq!(rd, ed);
2434 }
2435
2436 #[simd_test(enable = "sse")]
2437 unsafe fn test_mm_cmpngt_ss() {
2438 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2444 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2445 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2446 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2447
2448 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
2453 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2454 assert_eq!(rb, eb);
2455
2456 let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
2457 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2458 assert_eq!(rc, ec);
2459
2460 let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
2461 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2462 assert_eq!(rd, ed);
2463 }
2464
2465 #[simd_test(enable = "sse")]
2466 unsafe fn test_mm_cmpnge_ss() {
2467 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2473 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2474 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2475 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2476
2477 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
2482 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2483 assert_eq!(rb, eb);
2484
2485 let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
2486 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2487 assert_eq!(rc, ec);
2488
2489 let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
2490 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2491 assert_eq!(rd, ed);
2492 }
2493
2494 #[simd_test(enable = "sse")]
2495 unsafe fn test_mm_cmpord_ss() {
2496 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2497 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2498 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2499 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2500
2501 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
2506 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2507 assert_eq!(rb, eb);
2508
2509 let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
2510 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2511 assert_eq!(rc, ec);
2512
2513 let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
2514 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2515 assert_eq!(rd, ed);
2516 }
2517
2518 #[simd_test(enable = "sse")]
2519 unsafe fn test_mm_cmpunord_ss() {
2520 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2521 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2522 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2523 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2524
2525 let b1 = 0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
2530 let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2531 assert_eq!(rb, eb);
2532
2533 let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
2534 let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2535 assert_eq!(rc, ec);
2536
2537 let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
2538 let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2539 assert_eq!(rd, ed);
2540 }
2541
2542 #[simd_test(enable = "sse")]
2543 unsafe fn test_mm_cmpeq_ps() {
2544 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2545 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2546 let tru = !0u32;
2547 let fls = 0u32;
2548
2549 let e = u32x4::new(fls, fls, tru, fls);
2550 let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
2551 assert_eq!(r, e);
2552 }
2553
2554 #[simd_test(enable = "sse")]
2555 unsafe fn test_mm_cmplt_ps() {
2556 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2557 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2558 let tru = !0u32;
2559 let fls = 0u32;
2560
2561 let e = u32x4::new(tru, fls, fls, fls);
2562 let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
2563 assert_eq!(r, e);
2564 }
2565
2566 #[simd_test(enable = "sse")]
2567 unsafe fn test_mm_cmple_ps() {
2568 let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
2569 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2570 let tru = !0u32;
2571 let fls = 0u32;
2572
2573 let e = u32x4::new(tru, fls, tru, fls);
2574 let r: u32x4 = transmute(_mm_cmple_ps(a, b));
2575 assert_eq!(r, e);
2576 }
2577
2578 #[simd_test(enable = "sse")]
2579 unsafe fn test_mm_cmpgt_ps() {
2580 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2581 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2582 let tru = !0u32;
2583 let fls = 0u32;
2584
2585 let e = u32x4::new(fls, tru, fls, fls);
2586 let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
2587 assert_eq!(r, e);
2588 }
2589
2590 #[simd_test(enable = "sse")]
2591 unsafe fn test_mm_cmpge_ps() {
2592 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2593 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2594 let tru = !0u32;
2595 let fls = 0u32;
2596
2597 let e = u32x4::new(fls, tru, tru, fls);
2598 let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
2599 assert_eq!(r, e);
2600 }
2601
2602 #[simd_test(enable = "sse")]
2603 unsafe fn test_mm_cmpneq_ps() {
2604 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2605 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2606 let tru = !0u32;
2607 let fls = 0u32;
2608
2609 let e = u32x4::new(tru, tru, fls, tru);
2610 let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
2611 assert_eq!(r, e);
2612 }
2613
2614 #[simd_test(enable = "sse")]
2615 unsafe fn test_mm_cmpnlt_ps() {
2616 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2617 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2618 let tru = !0u32;
2619 let fls = 0u32;
2620
2621 let e = u32x4::new(fls, tru, tru, tru);
2622 let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
2623 assert_eq!(r, e);
2624 }
2625
2626 #[simd_test(enable = "sse")]
2627 unsafe fn test_mm_cmpnle_ps() {
2628 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2629 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2630 let tru = !0u32;
2631 let fls = 0u32;
2632
2633 let e = u32x4::new(fls, tru, fls, tru);
2634 let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
2635 assert_eq!(r, e);
2636 }
2637
2638 #[simd_test(enable = "sse")]
2639 unsafe fn test_mm_cmpngt_ps() {
2640 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2641 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2642 let tru = !0u32;
2643 let fls = 0u32;
2644
2645 let e = u32x4::new(tru, fls, tru, tru);
2646 let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
2647 assert_eq!(r, e);
2648 }
2649
2650 #[simd_test(enable = "sse")]
2651 unsafe fn test_mm_cmpnge_ps() {
2652 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2653 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2654 let tru = !0u32;
2655 let fls = 0u32;
2656
2657 let e = u32x4::new(tru, fls, fls, tru);
2658 let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
2659 assert_eq!(r, e);
2660 }
2661
2662 #[simd_test(enable = "sse")]
2663 unsafe fn test_mm_cmpord_ps() {
2664 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2665 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2666 let tru = !0u32;
2667 let fls = 0u32;
2668
2669 let e = u32x4::new(tru, fls, fls, fls);
2670 let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
2671 assert_eq!(r, e);
2672 }
2673
2674 #[simd_test(enable = "sse")]
2675 unsafe fn test_mm_cmpunord_ps() {
2676 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2677 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2678 let tru = !0u32;
2679 let fls = 0u32;
2680
2681 let e = u32x4::new(fls, tru, tru, tru);
2682 let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
2683 assert_eq!(r, e);
2684 }
2685
2686 #[simd_test(enable = "sse")]
2687 unsafe fn test_mm_comieq_ss() {
2688 let aa = &[3.0f32, 12.0, 23.0, NAN];
2689 let bb = &[3.0f32, 47.5, 1.5, NAN];
2690
2691 let ee = &[1i32, 0, 0, 0];
2692
2693 for i in 0..4 {
2694 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2695 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2696
2697 let r = _mm_comieq_ss(a, b);
2698
2699 assert_eq!(
2700 ee[i], r,
2701 "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2702 a, b, r, ee[i], i
2703 );
2704 }
2705 }
2706
2707 #[simd_test(enable = "sse")]
2708 unsafe fn test_mm_comilt_ss() {
2709 let aa = &[3.0f32, 12.0, 23.0, NAN];
2710 let bb = &[3.0f32, 47.5, 1.5, NAN];
2711
2712 let ee = &[0i32, 1, 0, 0];
2713
2714 for i in 0..4 {
2715 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2716 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2717
2718 let r = _mm_comilt_ss(a, b);
2719
2720 assert_eq!(
2721 ee[i], r,
2722 "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2723 a, b, r, ee[i], i
2724 );
2725 }
2726 }
2727
2728 #[simd_test(enable = "sse")]
2729 unsafe fn test_mm_comile_ss() {
2730 let aa = &[3.0f32, 12.0, 23.0, NAN];
2731 let bb = &[3.0f32, 47.5, 1.5, NAN];
2732
2733 let ee = &[1i32, 1, 0, 0];
2734
2735 for i in 0..4 {
2736 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2737 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2738
2739 let r = _mm_comile_ss(a, b);
2740
2741 assert_eq!(
2742 ee[i], r,
2743 "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2744 a, b, r, ee[i], i
2745 );
2746 }
2747 }
2748
2749 #[simd_test(enable = "sse")]
2750 unsafe fn test_mm_comigt_ss() {
2751 let aa = &[3.0f32, 12.0, 23.0, NAN];
2752 let bb = &[3.0f32, 47.5, 1.5, NAN];
2753
2754 let ee = &[1i32, 0, 1, 0];
2755
2756 for i in 0..4 {
2757 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2758 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2759
2760 let r = _mm_comige_ss(a, b);
2761
2762 assert_eq!(
2763 ee[i], r,
2764 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2765 a, b, r, ee[i], i
2766 );
2767 }
2768 }
2769
2770 #[simd_test(enable = "sse")]
2771 unsafe fn test_mm_comineq_ss() {
2772 let aa = &[3.0f32, 12.0, 23.0, NAN];
2773 let bb = &[3.0f32, 47.5, 1.5, NAN];
2774
2775 let ee = &[0i32, 1, 1, 1];
2776
2777 for i in 0..4 {
2778 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2779 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2780
2781 let r = _mm_comineq_ss(a, b);
2782
2783 assert_eq!(
2784 ee[i], r,
2785 "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2786 a, b, r, ee[i], i
2787 );
2788 }
2789 }
2790
2791 #[simd_test(enable = "sse")]
2792 unsafe fn test_mm_ucomieq_ss() {
2793 let aa = &[3.0f32, 12.0, 23.0, NAN];
2794 let bb = &[3.0f32, 47.5, 1.5, NAN];
2795
2796 let ee = &[1i32, 0, 0, 0];
2797
2798 for i in 0..4 {
2799 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2800 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2801
2802 let r = _mm_ucomieq_ss(a, b);
2803
2804 assert_eq!(
2805 ee[i], r,
2806 "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2807 a, b, r, ee[i], i
2808 );
2809 }
2810 }
2811
2812 #[simd_test(enable = "sse")]
2813 unsafe fn test_mm_ucomilt_ss() {
2814 let aa = &[3.0f32, 12.0, 23.0, NAN];
2815 let bb = &[3.0f32, 47.5, 1.5, NAN];
2816
2817 let ee = &[0i32, 1, 0, 0];
2818
2819 for i in 0..4 {
2820 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2821 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2822
2823 let r = _mm_ucomilt_ss(a, b);
2824
2825 assert_eq!(
2826 ee[i], r,
2827 "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2828 a, b, r, ee[i], i
2829 );
2830 }
2831 }
2832
2833 #[simd_test(enable = "sse")]
2834 unsafe fn test_mm_ucomile_ss() {
2835 let aa = &[3.0f32, 12.0, 23.0, NAN];
2836 let bb = &[3.0f32, 47.5, 1.5, NAN];
2837
2838 let ee = &[1i32, 1, 0, 0];
2839
2840 for i in 0..4 {
2841 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2842 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2843
2844 let r = _mm_ucomile_ss(a, b);
2845
2846 assert_eq!(
2847 ee[i], r,
2848 "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2849 a, b, r, ee[i], i
2850 );
2851 }
2852 }
2853
2854 #[simd_test(enable = "sse")]
2855 unsafe fn test_mm_ucomigt_ss() {
2856 let aa = &[3.0f32, 12.0, 23.0, NAN];
2857 let bb = &[3.0f32, 47.5, 1.5, NAN];
2858
2859 let ee = &[0i32, 0, 1, 0];
2860
2861 for i in 0..4 {
2862 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2863 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2864
2865 let r = _mm_ucomigt_ss(a, b);
2866
2867 assert_eq!(
2868 ee[i], r,
2869 "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2870 a, b, r, ee[i], i
2871 );
2872 }
2873 }
2874
2875 #[simd_test(enable = "sse")]
2876 unsafe fn test_mm_ucomige_ss() {
2877 let aa = &[3.0f32, 12.0, 23.0, NAN];
2878 let bb = &[3.0f32, 47.5, 1.5, NAN];
2879
2880 let ee = &[1i32, 0, 1, 0];
2881
2882 for i in 0..4 {
2883 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2884 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2885
2886 let r = _mm_ucomige_ss(a, b);
2887
2888 assert_eq!(
2889 ee[i], r,
2890 "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2891 a, b, r, ee[i], i
2892 );
2893 }
2894 }
2895
2896 #[simd_test(enable = "sse")]
2897 unsafe fn test_mm_ucomineq_ss() {
2898 let aa = &[3.0f32, 12.0, 23.0, NAN];
2899 let bb = &[3.0f32, 47.5, 1.5, NAN];
2900
2901 let ee = &[0i32, 1, 1, 1];
2902
2903 for i in 0..4 {
2904 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2905 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2906
2907 let r = _mm_ucomineq_ss(a, b);
2908
2909 assert_eq!(
2910 ee[i], r,
2911 "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2912 a, b, r, ee[i], i
2913 );
2914 }
2915 }
2916
2917 #[simd_test(enable = "sse")]
2918 unsafe fn test_mm_cvtss_si32() {
2919 let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
2920 let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
2921 for i in 0..inputs.len() {
2922 let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
2923 let e = result[i];
2924 let r = _mm_cvtss_si32(x);
2925 assert_eq!(
2926 e, r,
2927 "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
2928 i, x, r, e
2929 );
2930 }
2931 }
2932
2933 #[simd_test(enable = "sse")]
2934 unsafe fn test_mm_cvttss_si32() {
2935 let inputs = &[
2936 (42.0f32, 42i32),
2937 (-31.4, -31),
2938 (-33.5, -33),
2939 (-34.5, -34),
2940 (10.999, 10),
2941 (-5.99, -5),
2942 (4.0e10, i32::MIN),
2943 (4.0e-10, 0),
2944 (NAN, i32::MIN),
2945 (2147483500.1, 2147483520),
2946 ];
2947 for (i, &(xi, e)) in inputs.iter().enumerate() {
2948 let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
2949 let r = _mm_cvttss_si32(x);
2950 assert_eq!(
2951 e, r,
2952 "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
2953 i, x, r, e
2954 );
2955 }
2956 }
2957
2958 #[simd_test(enable = "sse")]
2959 unsafe fn test_mm_cvtsi32_ss() {
2960 let inputs = &[
2961 (4555i32, 4555.0f32),
2962 (322223333, 322223330.0),
2963 (-432, -432.0),
2964 (-322223333, -322223330.0),
2965 ];
2966
2967 for &(x, f) in inputs.iter() {
2968 let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
2969 let r = _mm_cvtsi32_ss(a, x);
2970 let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
2971 assert_eq_m128(e, r);
2972 }
2973 }
2974
2975 #[simd_test(enable = "sse")]
2976 unsafe fn test_mm_cvtss_f32() {
2977 let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
2978 assert_eq!(_mm_cvtss_f32(a), 312.0134);
2979 }
2980
2981 #[simd_test(enable = "sse")]
2982 unsafe fn test_mm_set_ss() {
2983 let r = _mm_set_ss(black_box(4.25));
2984 assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
2985 }
2986
2987 #[simd_test(enable = "sse")]
2988 unsafe fn test_mm_set1_ps() {
2989 let r1 = _mm_set1_ps(black_box(4.25));
2990 let r2 = _mm_set_ps1(black_box(4.25));
2991 assert_eq!(get_m128(r1, 0), 4.25);
2992 assert_eq!(get_m128(r1, 1), 4.25);
2993 assert_eq!(get_m128(r1, 2), 4.25);
2994 assert_eq!(get_m128(r1, 3), 4.25);
2995 assert_eq!(get_m128(r2, 0), 4.25);
2996 assert_eq!(get_m128(r2, 1), 4.25);
2997 assert_eq!(get_m128(r2, 2), 4.25);
2998 assert_eq!(get_m128(r2, 3), 4.25);
2999 }
3000
3001 #[simd_test(enable = "sse")]
3002 unsafe fn test_mm_set_ps() {
3003 let r = _mm_set_ps(
3004 black_box(1.0),
3005 black_box(2.0),
3006 black_box(3.0),
3007 black_box(4.0),
3008 );
3009 assert_eq!(get_m128(r, 0), 4.0);
3010 assert_eq!(get_m128(r, 1), 3.0);
3011 assert_eq!(get_m128(r, 2), 2.0);
3012 assert_eq!(get_m128(r, 3), 1.0);
3013 }
3014
3015 #[simd_test(enable = "sse")]
3016 unsafe fn test_mm_setr_ps() {
3017 let r = _mm_setr_ps(
3018 black_box(1.0),
3019 black_box(2.0),
3020 black_box(3.0),
3021 black_box(4.0),
3022 );
3023 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3024 }
3025
3026 #[simd_test(enable = "sse")]
3027 unsafe fn test_mm_setzero_ps() {
3028 let r = *black_box(&_mm_setzero_ps());
3029 assert_eq_m128(r, _mm_set1_ps(0.0));
3030 }
3031
3032 #[simd_test(enable = "sse")]
3033 unsafe fn test_mm_shuffle() {
3034 assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
3035 assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
3036 assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
3037 }
3038
3039 #[simd_test(enable = "sse")]
3040 unsafe fn test_mm_shuffle_ps() {
3041 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3042 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3043 let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
3044 assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
3045 }
3046
3047 #[simd_test(enable = "sse")]
3048 unsafe fn test_mm_unpackhi_ps() {
3049 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3050 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3051 let r = _mm_unpackhi_ps(a, b);
3052 assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0));
3053 }
3054
3055 #[simd_test(enable = "sse")]
3056 unsafe fn test_mm_unpacklo_ps() {
3057 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3058 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3059 let r = _mm_unpacklo_ps(a, b);
3060 assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0));
3061 }
3062
3063 #[simd_test(enable = "sse")]
3064 unsafe fn test_mm_movehl_ps() {
3065 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3066 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3067 let r = _mm_movehl_ps(a, b);
3068 assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0));
3069 }
3070
3071 #[simd_test(enable = "sse")]
3072 unsafe fn test_mm_movelh_ps() {
3073 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3074 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3075 let r = _mm_movelh_ps(a, b);
3076 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
3077 }
3078
3079 #[simd_test(enable = "sse")]
3080 unsafe fn test_mm_load_ss() {
3081 let a = 42.0f32;
3082 let r = _mm_load_ss(ptr::addr_of!(a));
3083 assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
3084 }
3085
3086 #[simd_test(enable = "sse")]
3087 unsafe fn test_mm_load1_ps() {
3088 let a = 42.0f32;
3089 let r = _mm_load1_ps(ptr::addr_of!(a));
3090 assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
3091 }
3092
3093 #[simd_test(enable = "sse")]
3094 unsafe fn test_mm_load_ps() {
3095 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3096
3097 let mut p = vals.as_ptr();
3098 let mut fixup = 0.0f32;
3099
3100 let unalignment = (p as usize) & 0xf;
3104 if unalignment != 0 {
3105 let delta = (16 - unalignment) >> 2;
3106 fixup = delta as f32;
3107 p = p.add(delta);
3108 }
3109
3110 let r = _mm_load_ps(p);
3111 let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
3112 assert_eq_m128(r, e);
3113 }
3114
3115 #[simd_test(enable = "sse")]
3116 unsafe fn test_mm_loadu_ps() {
3117 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3118 let p = vals.as_ptr().add(3);
3119 let r = _mm_loadu_ps(black_box(p));
3120 assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
3121 }
3122
3123 #[simd_test(enable = "sse")]
3124 unsafe fn test_mm_loadr_ps() {
3125 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3126
3127 let mut p = vals.as_ptr();
3128 let mut fixup = 0.0f32;
3129
3130 let unalignment = (p as usize) & 0xf;
3134 if unalignment != 0 {
3135 let delta = (16 - unalignment) >> 2;
3136 fixup = delta as f32;
3137 p = p.add(delta);
3138 }
3139
3140 let r = _mm_loadr_ps(p);
3141 let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
3142 assert_eq_m128(r, e);
3143 }
3144
3145 #[simd_test(enable = "sse")]
3146 unsafe fn test_mm_store_ss() {
3147 let mut vals = [0.0f32; 8];
3148 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3149 _mm_store_ss(vals.as_mut_ptr().add(1), a);
3150
3151 assert_eq!(vals[0], 0.0);
3152 assert_eq!(vals[1], 1.0);
3153 assert_eq!(vals[2], 0.0);
3154 }
3155
3156 #[simd_test(enable = "sse")]
3157 unsafe fn test_mm_store1_ps() {
3158 let mut vals = [0.0f32; 8];
3159 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3160
3161 let mut ofs = 0;
3162 let mut p = vals.as_mut_ptr();
3163
3164 if (p as usize) & 0xf != 0 {
3165 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3166 p = p.add(ofs);
3167 }
3168
3169 _mm_store1_ps(p, *black_box(&a));
3170
3171 if ofs > 0 {
3172 assert_eq!(vals[ofs - 1], 0.0);
3173 }
3174 assert_eq!(vals[ofs + 0], 1.0);
3175 assert_eq!(vals[ofs + 1], 1.0);
3176 assert_eq!(vals[ofs + 2], 1.0);
3177 assert_eq!(vals[ofs + 3], 1.0);
3178 assert_eq!(vals[ofs + 4], 0.0);
3179 }
3180
3181 #[simd_test(enable = "sse")]
3182 unsafe fn test_mm_store_ps() {
3183 let mut vals = [0.0f32; 8];
3184 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3185
3186 let mut ofs = 0;
3187 let mut p = vals.as_mut_ptr();
3188
3189 if (p as usize) & 0xf != 0 {
3191 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3192 p = p.add(ofs);
3193 }
3194
3195 _mm_store_ps(p, *black_box(&a));
3196
3197 if ofs > 0 {
3198 assert_eq!(vals[ofs - 1], 0.0);
3199 }
3200 assert_eq!(vals[ofs + 0], 1.0);
3201 assert_eq!(vals[ofs + 1], 2.0);
3202 assert_eq!(vals[ofs + 2], 3.0);
3203 assert_eq!(vals[ofs + 3], 4.0);
3204 assert_eq!(vals[ofs + 4], 0.0);
3205 }
3206
3207 #[simd_test(enable = "sse")]
3208 unsafe fn test_mm_storer_ps() {
3209 let mut vals = [0.0f32; 8];
3210 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3211
3212 let mut ofs = 0;
3213 let mut p = vals.as_mut_ptr();
3214
3215 if (p as usize) & 0xf != 0 {
3217 ofs = (16 - ((p as usize) & 0xf)) >> 2;
3218 p = p.add(ofs);
3219 }
3220
3221 _mm_storer_ps(p, *black_box(&a));
3222
3223 if ofs > 0 {
3224 assert_eq!(vals[ofs - 1], 0.0);
3225 }
3226 assert_eq!(vals[ofs + 0], 4.0);
3227 assert_eq!(vals[ofs + 1], 3.0);
3228 assert_eq!(vals[ofs + 2], 2.0);
3229 assert_eq!(vals[ofs + 3], 1.0);
3230 assert_eq!(vals[ofs + 4], 0.0);
3231 }
3232
3233 #[simd_test(enable = "sse")]
3234 unsafe fn test_mm_storeu_ps() {
3235 let mut vals = [0.0f32; 8];
3236 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3237
3238 let mut ofs = 0;
3239 let mut p = vals.as_mut_ptr();
3240
3241 if (p as usize) & 0xf == 0 {
3243 ofs = 1;
3244 p = p.add(1);
3245 }
3246
3247 _mm_storeu_ps(p, *black_box(&a));
3248
3249 if ofs > 0 {
3250 assert_eq!(vals[ofs - 1], 0.0);
3251 }
3252 assert_eq!(vals[ofs + 0], 1.0);
3253 assert_eq!(vals[ofs + 1], 2.0);
3254 assert_eq!(vals[ofs + 2], 3.0);
3255 assert_eq!(vals[ofs + 3], 4.0);
3256 assert_eq!(vals[ofs + 4], 0.0);
3257 }
3258
3259 #[simd_test(enable = "sse")]
3260 unsafe fn test_mm_move_ss() {
3261 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3262 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3263
3264 let r = _mm_move_ss(a, b);
3265 let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0);
3266 assert_eq_m128(e, r);
3267 }
3268
3269 #[simd_test(enable = "sse")]
3270 unsafe fn test_mm_movemask_ps() {
3271 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
3272 assert_eq!(r, 0b0101);
3273
3274 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
3275 assert_eq!(r, 0b0111);
3276 }
3277
3278 #[simd_test(enable = "sse")]
3279 #[cfg_attr(miri, ignore)]
3281 unsafe fn test_mm_sfence() {
3282 _mm_sfence();
3283 }
3284
3285 #[simd_test(enable = "sse")]
3286 unsafe fn test_MM_TRANSPOSE4_PS() {
3287 let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3288 let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3289 let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
3290 let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0);
3291
3292 _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
3293
3294 assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0));
3295 assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0));
3296 assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0));
3297 assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0));
3298 }
3299
3300 #[repr(align(16))]
3301 struct Memory {
3302 pub data: [f32; 4],
3303 }
3304
3305 #[simd_test(enable = "sse")]
3306 #[cfg_attr(miri, ignore)]
3309 unsafe fn test_mm_stream_ps() {
3310 let a = _mm_set1_ps(7.0);
3311 let mut mem = Memory { data: [-1.0; 4] };
3312
3313 _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
3314 for i in 0..4 {
3315 assert_eq!(mem.data[i], get_m128(a, i));
3316 }
3317 }
3318}