miri/shims/x86/
sse2.rs

1use rustc_abi::CanonAbi;
2use rustc_apfloat::ieee::Double;
3use rustc_middle::ty::Ty;
4use rustc_span::Symbol;
5use rustc_target::callconv::FnAbi;
6
7use super::{
8    FloatBinOp, ShiftOp, bin_op_simd_float_all, bin_op_simd_float_first, convert_float_to_int,
9    packssdw, packsswb, packuswb, pmaddwd, psadbw, shift_simd_by_scalar,
10};
11use crate::*;
12
13impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
14pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
15    fn emulate_x86_sse2_intrinsic(
16        &mut self,
17        link_name: Symbol,
18        abi: &FnAbi<'tcx, Ty<'tcx>>,
19        args: &[OpTy<'tcx>],
20        dest: &MPlaceTy<'tcx>,
21    ) -> InterpResult<'tcx, EmulateItemResult> {
22        let this = self.eval_context_mut();
23        this.expect_target_feature_for_intrinsic(link_name, "sse2")?;
24        // Prefix should have already been checked.
25        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse2.").unwrap();
26
27        // These intrinsics operate on 128-bit (f32x4, f64x2, i8x16, i16x8, i32x4, i64x2) SIMD
28        // vectors unless stated otherwise.
29        // Many intrinsic names are suffixed with "ps" (packed single), "ss" (scalar single),
30        // "pd" (packed double) or "sd" (scalar double), where single means single precision
31        // floating point (f32) and double means double precision floating point (f64). "ps"
32        // and "pd" means that the operation is performed on each element of the vector, while
33        // "ss" and "sd" means that the operation is performed only on the first element, copying
34        // the remaining elements from the input vector (for binary operations, from the left-hand
35        // side).
36        // Intrinsics suffixed with "epiX" or "epuX" operate with X-bit signed or unsigned
37        // vectors.
38        match unprefixed_name {
39            // Used to implement the _mm_sad_epu8 function.
40            "psad.bw" => {
41                let [left, right] =
42                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
43
44                psadbw(this, left, right, dest)?
45            }
46            // Used to implement the _mm_{sll,srl,sra}_epi{16,32,64} functions
47            // (except _mm_sra_epi64, which is not available in SSE2).
48            // Shifts N-bit packed integers in left by the amount in right.
49            // Both operands are 128-bit vectors. However, right is interpreted as
50            // a single 64-bit integer (remaining bits are ignored).
51            // For logic shifts, when right is larger than N - 1, zero is produced.
52            // For arithmetic shifts, when right is larger than N - 1, the sign bit
53            // is copied to remaining bits.
54            "psll.w" | "psrl.w" | "psra.w" | "psll.d" | "psrl.d" | "psra.d" | "psll.q"
55            | "psrl.q" => {
56                let [left, right] =
57                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
58
59                let which = match unprefixed_name {
60                    "psll.w" | "psll.d" | "psll.q" => ShiftOp::Left,
61                    "psrl.w" | "psrl.d" | "psrl.q" => ShiftOp::RightLogic,
62                    "psra.w" | "psra.d" => ShiftOp::RightArith,
63                    _ => unreachable!(),
64                };
65
66                shift_simd_by_scalar(this, left, right, which, dest)?;
67            }
68            // Used to implement the _mm_cvtps_epi32, _mm_cvttps_epi32, _mm_cvtpd_epi32
69            // and _mm_cvttpd_epi32 functions.
70            // Converts packed f32/f64 to packed i32.
71            "cvtps2dq" | "cvttps2dq" | "cvtpd2dq" | "cvttpd2dq" => {
72                let [op] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
73
74                let (op_len, _) = op.layout.ty.simd_size_and_type(*this.tcx);
75                let (dest_len, _) = dest.layout.ty.simd_size_and_type(*this.tcx);
76                match unprefixed_name {
77                    "cvtps2dq" | "cvttps2dq" => {
78                        // f32x4 to i32x4 conversion
79                        assert_eq!(op_len, 4);
80                        assert_eq!(dest_len, op_len);
81                    }
82                    "cvtpd2dq" | "cvttpd2dq" => {
83                        // f64x2 to i32x4 conversion
84                        // the last two values are filled with zeros
85                        assert_eq!(op_len, 2);
86                        assert_eq!(dest_len, 4);
87                    }
88                    _ => unreachable!(),
89                }
90
91                let rnd = match unprefixed_name {
92                    // "current SSE rounding mode", assume nearest
93                    // https://www.felixcloutier.com/x86/cvtps2dq
94                    // https://www.felixcloutier.com/x86/cvtpd2dq
95                    "cvtps2dq" | "cvtpd2dq" => rustc_apfloat::Round::NearestTiesToEven,
96                    // always truncate
97                    // https://www.felixcloutier.com/x86/cvttps2dq
98                    // https://www.felixcloutier.com/x86/cvttpd2dq
99                    "cvttps2dq" | "cvttpd2dq" => rustc_apfloat::Round::TowardZero,
100                    _ => unreachable!(),
101                };
102
103                convert_float_to_int(this, op, rnd, dest)?;
104            }
105            // Used to implement the _mm_packs_epi16 function.
106            // Converts two 16-bit integer vectors to a single 8-bit integer
107            // vector with signed saturation.
108            "packsswb.128" => {
109                let [left, right] =
110                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
111
112                packsswb(this, left, right, dest)?;
113            }
114            // Used to implement the _mm_packus_epi16 function.
115            // Converts two 16-bit signed integer vectors to a single 8-bit
116            // unsigned integer vector with saturation.
117            "packuswb.128" => {
118                let [left, right] =
119                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
120
121                packuswb(this, left, right, dest)?;
122            }
123            // Used to implement the _mm_packs_epi32 function.
124            // Converts two 32-bit integer vectors to a single 16-bit integer
125            // vector with signed saturation.
126            "packssdw.128" => {
127                let [left, right] =
128                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
129
130                packssdw(this, left, right, dest)?;
131            }
132            // Used to implement _mm_min_sd and _mm_max_sd functions.
133            // Note that the semantics are a bit different from Rust simd_min
134            // and simd_max intrinsics regarding handling of NaN and -0.0: Rust
135            // matches the IEEE min/max operations, while x86 has different
136            // semantics.
137            "min.sd" | "max.sd" => {
138                let [left, right] =
139                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
140
141                let which = match unprefixed_name {
142                    "min.sd" => FloatBinOp::Min,
143                    "max.sd" => FloatBinOp::Max,
144                    _ => unreachable!(),
145                };
146
147                bin_op_simd_float_first::<Double>(this, which, left, right, dest)?;
148            }
149            // Used to implement _mm_min_pd and _mm_max_pd functions.
150            // Note that the semantics are a bit different from Rust simd_min
151            // and simd_max intrinsics regarding handling of NaN and -0.0: Rust
152            // matches the IEEE min/max operations, while x86 has different
153            // semantics.
154            "min.pd" | "max.pd" => {
155                let [left, right] =
156                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
157
158                let which = match unprefixed_name {
159                    "min.pd" => FloatBinOp::Min,
160                    "max.pd" => FloatBinOp::Max,
161                    _ => unreachable!(),
162                };
163
164                bin_op_simd_float_all::<Double>(this, which, left, right, dest)?;
165            }
166            // Used to implement the _mm_cmp*_sd functions.
167            // Performs a comparison operation on the first component of `left`
168            // and `right`, returning 0 if false or `u64::MAX` if true. The remaining
169            // components are copied from `left`.
170            // _mm_cmp_sd is actually an AVX function where the operation is specified
171            // by a const parameter.
172            // _mm_cmp{eq,lt,le,gt,ge,neq,nlt,nle,ngt,nge,ord,unord}_sd are SSE2 functions
173            // with hard-coded operations.
174            "cmp.sd" => {
175                let [left, right, imm] =
176                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
177
178                let which =
179                    FloatBinOp::cmp_from_imm(this, this.read_scalar(imm)?.to_i8()?, link_name)?;
180
181                bin_op_simd_float_first::<Double>(this, which, left, right, dest)?;
182            }
183            // Used to implement the _mm_cmp*_pd functions.
184            // Performs a comparison operation on each component of `left`
185            // and `right`. For each component, returns 0 if false or `u64::MAX`
186            // if true.
187            // _mm_cmp_pd is actually an AVX function where the operation is specified
188            // by a const parameter.
189            // _mm_cmp{eq,lt,le,gt,ge,neq,nlt,nle,ngt,nge,ord,unord}_pd are SSE2 functions
190            // with hard-coded operations.
191            "cmp.pd" => {
192                let [left, right, imm] =
193                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
194
195                let which =
196                    FloatBinOp::cmp_from_imm(this, this.read_scalar(imm)?.to_i8()?, link_name)?;
197
198                bin_op_simd_float_all::<Double>(this, which, left, right, dest)?;
199            }
200            // Used to implement _mm_{,u}comi{eq,lt,le,gt,ge,neq}_sd functions.
201            // Compares the first component of `left` and `right` and returns
202            // a scalar value (0 or 1).
203            "comieq.sd" | "comilt.sd" | "comile.sd" | "comigt.sd" | "comige.sd" | "comineq.sd"
204            | "ucomieq.sd" | "ucomilt.sd" | "ucomile.sd" | "ucomigt.sd" | "ucomige.sd"
205            | "ucomineq.sd" => {
206                let [left, right] =
207                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
208
209                let (left, left_len) = this.project_to_simd(left)?;
210                let (right, right_len) = this.project_to_simd(right)?;
211
212                assert_eq!(left_len, right_len);
213
214                let left = this.read_scalar(&this.project_index(&left, 0)?)?.to_f64()?;
215                let right = this.read_scalar(&this.project_index(&right, 0)?)?.to_f64()?;
216                // The difference between the com* and ucom* variants is signaling
217                // of exceptions when either argument is a quiet NaN. We do not
218                // support accessing the SSE status register from miri (or from Rust,
219                // for that matter), so we treat both variants equally.
220                let res = match unprefixed_name {
221                    "comieq.sd" | "ucomieq.sd" => left == right,
222                    "comilt.sd" | "ucomilt.sd" => left < right,
223                    "comile.sd" | "ucomile.sd" => left <= right,
224                    "comigt.sd" | "ucomigt.sd" => left > right,
225                    "comige.sd" | "ucomige.sd" => left >= right,
226                    "comineq.sd" | "ucomineq.sd" => left != right,
227                    _ => unreachable!(),
228                };
229                this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?;
230            }
231            // Use to implement the _mm_cvtsd_si32, _mm_cvttsd_si32,
232            // _mm_cvtsd_si64 and _mm_cvttsd_si64 functions.
233            // Converts the first component of `op` from f64 to i32/i64.
234            "cvtsd2si" | "cvttsd2si" | "cvtsd2si64" | "cvttsd2si64" => {
235                let [op] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
236                let (op, _) = this.project_to_simd(op)?;
237
238                let op = this.read_immediate(&this.project_index(&op, 0)?)?;
239
240                let rnd = match unprefixed_name {
241                    // "current SSE rounding mode", assume nearest
242                    // https://www.felixcloutier.com/x86/cvtsd2si
243                    "cvtsd2si" | "cvtsd2si64" => rustc_apfloat::Round::NearestTiesToEven,
244                    // always truncate
245                    // https://www.felixcloutier.com/x86/cvttsd2si
246                    "cvttsd2si" | "cvttsd2si64" => rustc_apfloat::Round::TowardZero,
247                    _ => unreachable!(),
248                };
249
250                let res = this.float_to_int_checked(&op, dest.layout, rnd)?.unwrap_or_else(|| {
251                    // Fallback to minimum according to SSE semantics.
252                    ImmTy::from_int(dest.layout.size.signed_int_min(), dest.layout)
253                });
254
255                this.write_immediate(*res, dest)?;
256            }
257            // Used to implement the _mm_cvtsd_ss function.
258            // Converts the first f64 from `right` to f32 and copies the remaining
259            // elements from `left`
260            "cvtsd2ss" => {
261                let [left, right] =
262                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
263
264                let (left, left_len) = this.project_to_simd(left)?;
265                let (right, _) = this.project_to_simd(right)?;
266                let (dest, dest_len) = this.project_to_simd(dest)?;
267
268                assert_eq!(dest_len, left_len);
269
270                // Convert first element of `right`
271                let right0 = this.read_immediate(&this.project_index(&right, 0)?)?;
272                let dest0 = this.project_index(&dest, 0)?;
273                let res0 = this.float_to_float_or_int(&right0, dest0.layout)?;
274                this.write_immediate(*res0, &dest0)?;
275
276                // Copy remaining from `left`
277                for i in 1..dest_len {
278                    this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
279                }
280            }
281            // Used to implement the _mm_madd_epi16 function.
282            // Multiplies packed signed 16-bit integers in `left` and `right`, producing
283            // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
284            // intermediate 32-bit integers, and pack the results in `dest`.
285            "pmadd.wd" => {
286                let [left, right] =
287                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
288
289                pmaddwd(this, left, right, dest)?;
290            }
291            _ => return interp_ok(EmulateItemResult::NotSupported),
292        }
293        interp_ok(EmulateItemResult::NeedsReturn)
294    }
295}
miri/shims/x86/sse2.rs

miri/shims/x86/
sse2.rs