1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
use rustc_span::Symbol;
use rustc_target::spec::abi::Abi;

use super::{conditional_dot_product, mpsadbw, packusdw, round_all, round_first, test_bits_masked};
use crate::*;

impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
    fn emulate_x86_sse41_intrinsic(
        &mut self,
        link_name: Symbol,
        abi: Abi,
        args: &[OpTy<'tcx>],
        dest: &MPlaceTy<'tcx>,
    ) -> InterpResult<'tcx, EmulateItemResult> {
        let this = self.eval_context_mut();
        this.expect_target_feature_for_intrinsic(link_name, "sse4.1")?;
        // Prefix should have already been checked.
        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();

        match unprefixed_name {
            // Used to implement the _mm_insert_ps function.
            // Takes one element of `right` and inserts it into `left` and
            // optionally zero some elements. Source index is specified
            // in bits `6..=7` of `imm`, destination index is specified in
            // bits `4..=5` if `imm`, and `i`th bit specifies whether element
            // `i` is zeroed.
            "insertps" => {
                let [left, right, imm] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                let (left, left_len) = this.operand_to_simd(left)?;
                let (right, right_len) = this.operand_to_simd(right)?;
                let (dest, dest_len) = this.mplace_to_simd(dest)?;

                assert_eq!(dest_len, left_len);
                assert_eq!(dest_len, right_len);
                assert!(dest_len <= 4);

                let imm = this.read_scalar(imm)?.to_u8()?;
                let src_index = u64::from((imm >> 6) & 0b11);
                let dst_index = u64::from((imm >> 4) & 0b11);

                let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;

                for i in 0..dest_len {
                    let dest = this.project_index(&dest, i)?;

                    if imm & (1 << i) != 0 {
                        // zeroed
                        this.write_scalar(Scalar::from_u32(0), &dest)?;
                    } else if i == dst_index {
                        // copy from `right` at specified index
                        this.write_immediate(*src_value, &dest)?;
                    } else {
                        // copy from `left`
                        this.copy_op(&this.project_index(&left, i)?, &dest)?;
                    }
                }
            }
            // Used to implement the _mm_packus_epi32 function.
            // Concatenates two 32-bit signed integer vectors and converts
            // the result to a 16-bit unsigned integer vector with saturation.
            "packusdw" => {
                let [left, right] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                packusdw(this, left, right, dest)?;
            }
            // Used to implement the _mm_dp_ps and _mm_dp_pd functions.
            // Conditionally multiplies the packed floating-point elements in
            // `left` and `right` using the high 4 bits in `imm`, sums the four
            // products, and conditionally stores the sum in `dest` using the low
            // 4 bits of `imm`.
            "dpps" | "dppd" => {
                let [left, right, imm] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                conditional_dot_product(this, left, right, imm, dest)?;
            }
            // Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
            // functions. Rounds the first element of `right` according to `rounding`
            // and copies the remaining elements from `left`.
            "round.ss" => {
                let [left, right, rounding] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
            }
            // Used to implement the _mm_floor_ps, _mm_ceil_ps and _mm_round_ps
            // functions. Rounds the elements of `op` according to `rounding`.
            "round.ps" => {
                let [op, rounding] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                round_all::<rustc_apfloat::ieee::Single>(this, op, rounding, dest)?;
            }
            // Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
            // functions. Rounds the first element of `right` according to `rounding`
            // and copies the remaining elements from `left`.
            "round.sd" => {
                let [left, right, rounding] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
            }
            // Used to implement the _mm_floor_pd, _mm_ceil_pd and _mm_round_pd
            // functions. Rounds the elements of `op` according to `rounding`.
            "round.pd" => {
                let [op, rounding] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                round_all::<rustc_apfloat::ieee::Double>(this, op, rounding, dest)?;
            }
            // Used to implement the _mm_minpos_epu16 function.
            // Find the minimum unsinged 16-bit integer in `op` and
            // returns its value and position.
            "phminposuw" => {
                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                let (op, op_len) = this.operand_to_simd(op)?;
                let (dest, dest_len) = this.mplace_to_simd(dest)?;

                // Find minimum
                let mut min_value = u16::MAX;
                let mut min_index = 0;
                for i in 0..op_len {
                    let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
                    if op < min_value {
                        min_value = op;
                        min_index = i;
                    }
                }

                // Write value and index
                this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
                this.write_scalar(
                    Scalar::from_u16(min_index.try_into().unwrap()),
                    &this.project_index(&dest, 1)?,
                )?;
                // Fill remainder with zeros
                for i in 2..dest_len {
                    this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
                }
            }
            // Used to implement the _mm_mpsadbw_epu8 function.
            // Compute the sum of absolute differences of quadruplets of unsigned
            // 8-bit integers in `left` and `right`, and store the 16-bit results
            // in `right`. Quadruplets are selected from `left` and `right` with
            // offsets specified in `imm`.
            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
            "mpsadbw" => {
                let [left, right, imm] =
                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                mpsadbw(this, left, right, imm, dest)?;
            }
            // Used to implement the _mm_testz_si128, _mm_testc_si128
            // and _mm_testnzc_si128 functions.
            // Tests `(op & mask) == 0`, `(op & mask) == mask` or
            // `(op & mask) != 0 && (op & mask) != mask`
            "ptestz" | "ptestc" | "ptestnzc" => {
                let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
                let res = match unprefixed_name {
                    "ptestz" => all_zero,
                    "ptestc" => masked_set,
                    "ptestnzc" => !all_zero && !masked_set,
                    _ => unreachable!(),
                };

                this.write_scalar(Scalar::from_i32(res.into()), dest)?;
            }
            _ => return Ok(EmulateItemResult::NotSupported),
        }
        Ok(EmulateItemResult::NeedsReturn)
    }
}