Skip to main content

miri/shims/x86/
sse42.rs

1use rustc_abi::{CanonAbi, Size};
2use rustc_middle::mir;
3use rustc_middle::ty::Ty;
4use rustc_span::Symbol;
5use rustc_target::callconv::FnAbi;
6use rustc_target::spec::Arch;
7
8use crate::shims::math::compute_crc32;
9use crate::*;
10
11/// A bitmask constant for scrutinizing the immediate byte provided
12/// to the string comparison intrinsics. It distinuishes between
13/// 16-bit integers and 8-bit integers. See [`compare_strings`]
14/// for more details about the immediate byte.
15const USE_WORDS: u8 = 1;
16
17/// A bitmask constant for scrutinizing the immediate byte provided
18/// to the string comparison intrinsics. It distinuishes between
19/// signed integers and unsigned integers. See [`compare_strings`]
20/// for more details about the immediate byte.
21const USE_SIGNED: u8 = 2;
22
23/// The main worker for the string comparison intrinsics, where the given
24/// strings are analyzed according to the given immediate byte.
25///
26/// # Arguments
27///
28/// * `str1` - The first string argument. It is always a length 16 array of bytes
29///   or a length 8 array of two-byte words.
30/// * `str2` - The second string argument. It is always a length 16 array of bytes
31///   or a length 8 array of two-byte words.
32/// * `len` is the length values of the supplied strings. It is distinct from the operand length
33///   in that it describes how much of `str1` and `str2` will be used for the calculation and may
34///   be smaller than the array length of `str1` and `str2`. The string length is counted in bytes
35///   if using byte operands and in two-byte words when using two-byte word operands.
36///   If the value is `None`, the length of a string is determined by the first
37///   null value inside the string.
38/// * `imm` is the immediate byte argument supplied to the intrinsic. The byte influences
39///   the operation as follows:
40///
41///   ```text
42///   0babccddef
43///     || | |||- Use of bytes vs use of two-byte words inside the operation.
44///     || | ||
45///     || | ||- Use of signed values versus use of unsigned values.
46///     || | |
47///     || | |- The comparison operation performed. A total of four operations are available.
48///     || |    * Equal any: Checks which characters of `str2` are inside `str1`.
49///     || |    * String ranges: Check if characters in `str2` are inside the provided character ranges.
50///     || |      Adjacent characters in `str1` constitute one range.
51///     || |    * String comparison: Mark positions where `str1` and `str2` have the same character.
52///     || |    * Substring search: Mark positions where `str1` is a substring in `str2`.
53///     || |
54///     || |- Result Polarity. The result bits may be subjected to a bitwise complement
55///     ||    if these bits are set.
56///     ||
57///     ||- Output selection. This bit has two meanings depending on the instruction.
58///     |   If the instruction is generating a mask, it distinguishes between a bit mask
59///     |   and a byte mask. Otherwise it distinguishes between the most significand bit
60///     |   and the least significand bit when generating an index.
61///     |
62///     |- This bit is ignored. It is expected that this bit is set to zero, but it is
63///        not a requirement.
64///   ```
65///
66/// # Returns
67///
68/// A result mask. The bit at index `i` inside the mask is set if 'str2' starting at `i`
69/// fulfills the test as defined inside the immediate byte.
70/// The mask may be negated if negation flags inside the immediate byte are set.
71///
72/// For more information, see the Intel Software Developer's Manual, Vol. 2b, Chapter 4.1.
73#[expect(clippy::arithmetic_side_effects)]
74fn compare_strings<'tcx>(
75    ecx: &mut MiriInterpCx<'tcx>,
76    str1: &OpTy<'tcx>,
77    str2: &OpTy<'tcx>,
78    len: Option<(u64, u64)>,
79    imm: u8,
80) -> InterpResult<'tcx, i32> {
81    let default_len = default_len::<u64>(imm);
82    let (len1, len2) = if let Some(t) = len {
83        t
84    } else {
85        let len1 = implicit_len(ecx, str1, imm)?.unwrap_or(default_len);
86        let len2 = implicit_len(ecx, str2, imm)?.unwrap_or(default_len);
87        (len1, len2)
88    };
89
90    let mut result = 0;
91    match (imm >> 2) & 3 {
92        0 => {
93            // Equal any: Checks which characters of `str2` are inside `str1`.
94            for i in 0..len2 {
95                let ch2 = ecx.read_immediate(&ecx.project_index(str2, i)?)?;
96
97                for j in 0..len1 {
98                    let ch1 = ecx.read_immediate(&ecx.project_index(str1, j)?)?;
99
100                    let eq = ecx.binary_op(mir::BinOp::Eq, &ch1, &ch2)?;
101                    if eq.to_scalar().to_bool()? {
102                        result |= 1 << i;
103                        break;
104                    }
105                }
106            }
107        }
108        1 => {
109            // String ranges: Check if characters in `str2` are inside the provided character ranges.
110            // Adjacent characters in `str1` constitute one range.
111            let len1 = len1 - (len1 & 1);
112            let get_ch = |ch: Scalar| -> InterpResult<'tcx, i32> {
113                let result = match (imm & USE_WORDS != 0, imm & USE_SIGNED != 0) {
114                    (true, true) => i32::from(ch.to_i16()?),
115                    (true, false) => i32::from(ch.to_u16()?),
116                    (false, true) => i32::from(ch.to_i8()?),
117                    (false, false) => i32::from(ch.to_u8()?),
118                };
119                interp_ok(result)
120            };
121
122            for i in 0..len2 {
123                for j in (0..len1).step_by(2) {
124                    let ch2 = get_ch(ecx.read_scalar(&ecx.project_index(str2, i)?)?)?;
125                    let ch1_1 = get_ch(ecx.read_scalar(&ecx.project_index(str1, j)?)?)?;
126                    let ch1_2 = get_ch(ecx.read_scalar(&ecx.project_index(str1, j + 1)?)?)?;
127
128                    if ch1_1 <= ch2 && ch2 <= ch1_2 {
129                        result |= 1 << i;
130                    }
131                }
132            }
133        }
134        2 => {
135            // String comparison: Mark positions where `str1` and `str2` have the same character.
136            result = (1 << default_len) - 1;
137            result ^= (1 << len1.max(len2)) - 1;
138
139            for i in 0..len1.min(len2) {
140                let ch1 = ecx.read_immediate(&ecx.project_index(str1, i)?)?;
141                let ch2 = ecx.read_immediate(&ecx.project_index(str2, i)?)?;
142                let eq = ecx.binary_op(mir::BinOp::Eq, &ch1, &ch2)?;
143                result |= i32::from(eq.to_scalar().to_bool()?) << i;
144            }
145        }
146        3 => {
147            // Substring search: Mark positions where `str1` is a substring in `str2`.
148            if len1 == 0 {
149                result = (1 << default_len) - 1;
150            } else if len1 <= len2 {
151                for i in 0..len2 {
152                    if len1 > len2 - i {
153                        break;
154                    }
155
156                    result |= 1 << i;
157
158                    for j in 0..len1 {
159                        let k = i + j;
160
161                        if k >= default_len {
162                            break;
163                        } else {
164                            let ch1 = ecx.read_immediate(&ecx.project_index(str1, j)?)?;
165                            let ch2 = ecx.read_immediate(&ecx.project_index(str2, k)?)?;
166                            let ne = ecx.binary_op(mir::BinOp::Ne, &ch1, &ch2)?;
167
168                            if ne.to_scalar().to_bool()? {
169                                result &= !(1 << i);
170                                break;
171                            }
172                        }
173                    }
174                }
175            }
176        }
177        _ => unreachable!(),
178    }
179
180    // Polarity: Possibly perform a bitwise complement on the result.
181    match (imm >> 4) & 3 {
182        3 => result ^= (1 << len1) - 1,
183        1 => result ^= (1 << default_len) - 1,
184        _ => (),
185    }
186
187    interp_ok(result)
188}
189
190/// Obtain the arguments of the intrinsic based on its name.
191/// The result is a tuple with the following values:
192/// * The first string argument.
193/// * The second string argument.
194/// * The string length values, if the intrinsic requires them.
195/// * The immediate instruction byte.
196///
197/// The string arguments will be transmuted into arrays of bytes
198/// or two-byte words, depending on the value of the immediate byte.
199/// Originally, they are [__m128i](https://doc.rust-lang.org/stable/core/arch/x86_64/struct.__m128i.html) values
200/// corresponding to the x86 128-bit integer SIMD type.
201fn deconstruct_args<'tcx>(
202    unprefixed_name: &str,
203    ecx: &mut MiriInterpCx<'tcx>,
204    link_name: Symbol,
205    abi: &FnAbi<'tcx, Ty<'tcx>>,
206    args: &[OpTy<'tcx>],
207) -> InterpResult<'tcx, (OpTy<'tcx>, OpTy<'tcx>, Option<(u64, u64)>, u8)> {
208    let array_layout_fn = |ecx: &mut MiriInterpCx<'tcx>, imm: u8| {
209        if imm & USE_WORDS != 0 {
210            ecx.layout_of(Ty::new_array(ecx.tcx.tcx, ecx.tcx.types.u16, 8))
211        } else {
212            ecx.layout_of(Ty::new_array(ecx.tcx.tcx, ecx.tcx.types.u8, 16))
213        }
214    };
215
216    // The fourth letter of each string comparison intrinsic is either 'e' for "explicit" or 'i' for "implicit".
217    // The distinction will correspond to the intrinsics type signature. In this context, "explicit" and "implicit"
218    // refer to the way the string length is determined. The length is either passed explicitly in the "explicit"
219    // case or determined by a null terminator in the "implicit" case.
220    let is_explicit = match unprefixed_name.as_bytes().get(4) {
221        Some(&b'e') => true,
222        Some(&b'i') => false,
223        _ => unreachable!(),
224    };
225
226    if is_explicit {
227        let [str1, len1, str2, len2, imm] =
228            ecx.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
229        let imm = ecx.read_scalar(imm)?.to_u8()?;
230
231        let default_len = default_len::<u32>(imm);
232        let len1 = u64::from(ecx.read_scalar(len1)?.to_u32()?.min(default_len));
233        let len2 = u64::from(ecx.read_scalar(len2)?.to_u32()?.min(default_len));
234
235        let array_layout = array_layout_fn(ecx, imm)?;
236        let str1 = str1.transmute(array_layout, ecx)?;
237        let str2 = str2.transmute(array_layout, ecx)?;
238
239        interp_ok((str1, str2, Some((len1, len2)), imm))
240    } else {
241        let [str1, str2, imm] = ecx.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
242        let imm = ecx.read_scalar(imm)?.to_u8()?;
243
244        let array_layout = array_layout_fn(ecx, imm)?;
245        let str1 = str1.transmute(array_layout, ecx)?;
246        let str2 = str2.transmute(array_layout, ecx)?;
247
248        interp_ok((str1, str2, None, imm))
249    }
250}
251
252/// Calculate the c-style string length for a given string `str`.
253/// The string is either a length 16 array of bytes a length 8 array of two-byte words.
254fn implicit_len<'tcx>(
255    ecx: &mut MiriInterpCx<'tcx>,
256    str: &OpTy<'tcx>,
257    imm: u8,
258) -> InterpResult<'tcx, Option<u64>> {
259    let mut result = None;
260    let zero = ImmTy::from_int(0, str.layout.field(ecx, 0));
261
262    for i in 0..default_len::<u64>(imm) {
263        let ch = ecx.read_immediate(&ecx.project_index(str, i)?)?;
264        let is_zero = ecx.binary_op(mir::BinOp::Eq, &ch, &zero)?;
265        if is_zero.to_scalar().to_bool()? {
266            result = Some(i);
267            break;
268        }
269    }
270    interp_ok(result)
271}
272
273#[inline]
274fn default_len<T: From<u8>>(imm: u8) -> T {
275    if imm & USE_WORDS != 0 { T::from(8u8) } else { T::from(16u8) }
276}
277
278impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
279pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
280    fn emulate_x86_sse42_intrinsic(
281        &mut self,
282        link_name: Symbol,
283        abi: &FnAbi<'tcx, Ty<'tcx>>,
284        args: &[OpTy<'tcx>],
285        dest: &MPlaceTy<'tcx>,
286    ) -> InterpResult<'tcx, EmulateItemResult> {
287        let this = self.eval_context_mut();
288        this.expect_target_feature_for_intrinsic(link_name, "sse4.2")?;
289        // Prefix should have already been checked.
290        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse42.").unwrap();
291
292        match unprefixed_name {
293            // Used to implement the `_mm_cmpestrm` and the `_mm_cmpistrm` functions.
294            // These functions compare the input strings and return the resulting mask.
295            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1044,922
296            "pcmpistrm128" | "pcmpestrm128" => {
297                let (str1, str2, len, imm) =
298                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
299                let mask = compare_strings(this, &str1, &str2, len, imm)?;
300
301                // The sixth bit inside the immediate byte distinguishes
302                // between a bit mask or a byte mask when generating a mask.
303                if imm & 0b100_0000 != 0 {
304                    let (array_layout, size) = if imm & USE_WORDS != 0 {
305                        (this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u16, 8))?, 2)
306                    } else {
307                        (this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u8, 16))?, 1)
308                    };
309                    let size = Size::from_bytes(size);
310                    let dest = dest.transmute(array_layout, this)?;
311
312                    for i in 0..default_len::<u64>(imm) {
313                        let result = helpers::bool_to_simd_element(mask & (1 << i) != 0, size);
314                        this.write_scalar(result, &this.project_index(&dest, i)?)?;
315                    }
316                } else {
317                    let layout = this.layout_of(this.tcx.types.i128)?;
318                    let dest = dest.transmute(layout, this)?;
319                    this.write_scalar(Scalar::from_i128(i128::from(mask)), &dest)?;
320                }
321            }
322
323            // Used to implement the `_mm_cmpestra` and the `_mm_cmpistra` functions.
324            // These functions compare the input strings and return `1` if the end of the second
325            // input string is not reached and the resulting mask is zero, and `0` otherwise.
326            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=919,1041
327            "pcmpistria128" | "pcmpestria128" => {
328                let (str1, str2, len, imm) =
329                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
330                let result = if compare_strings(this, &str1, &str2, len, imm)? != 0 {
331                    false
332                } else if let Some((_, len)) = len {
333                    len >= default_len::<u64>(imm)
334                } else {
335                    implicit_len(this, &str1, imm)?.is_some()
336                };
337
338                this.write_scalar(Scalar::from_i32(i32::from(result)), dest)?;
339            }
340
341            // Used to implement the `_mm_cmpestri` and the `_mm_cmpistri` functions.
342            // These functions compare the input strings and return the bit index
343            // for most significant or least significant bit of the resulting mask.
344            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=921,1043
345            "pcmpistri128" | "pcmpestri128" => {
346                let (str1, str2, len, imm) =
347                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
348                let mask = compare_strings(this, &str1, &str2, len, imm)?;
349
350                let len = default_len::<u32>(imm);
351                // The sixth bit inside the immediate byte distinguishes between the least
352                // significant bit and the most significant bit when generating an index.
353                let result = if imm & 0b100_0000 != 0 {
354                    // most significant bit
355                    31u32.wrapping_sub(mask.leading_zeros()).min(len)
356                } else {
357                    // least significant bit
358                    mask.trailing_zeros().min(len)
359                };
360                this.write_scalar(Scalar::from_i32(i32::try_from(result).unwrap()), dest)?;
361            }
362
363            // Used to implement the `_mm_cmpestro` and the `_mm_cmpistro` functions.
364            // These functions compare the input strings and return the lowest bit of the
365            // resulting mask.
366            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=923,1045
367            "pcmpistrio128" | "pcmpestrio128" => {
368                let (str1, str2, len, imm) =
369                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
370                let mask = compare_strings(this, &str1, &str2, len, imm)?;
371                this.write_scalar(Scalar::from_i32(mask & 1), dest)?;
372            }
373
374            // Used to implement the `_mm_cmpestrc` and the `_mm_cmpistrc` functions.
375            // These functions compare the input strings and return `1` if the resulting
376            // mask was non-zero, and `0` otherwise.
377            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=920,1042
378            "pcmpistric128" | "pcmpestric128" => {
379                let (str1, str2, len, imm) =
380                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
381                let mask = compare_strings(this, &str1, &str2, len, imm)?;
382                this.write_scalar(Scalar::from_i32(i32::from(mask != 0)), dest)?;
383            }
384
385            // Used to implement the `_mm_cmpistrz` and the `_mm_cmpistrs` functions.
386            // These functions return `1` if the string end has been reached and `0` otherwise.
387            // Since these functions define the string length implicitly, it is equal to a
388            // search for a null terminator (see `deconstruct_args` for more details).
389            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=924,925
390            "pcmpistriz128" | "pcmpistris128" => {
391                let [str1, str2, imm] =
392                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
393                let imm = this.read_scalar(imm)?.to_u8()?;
394
395                let str = if unprefixed_name == "pcmpistris128" { str1 } else { str2 };
396                let array_layout = if imm & USE_WORDS != 0 {
397                    this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u16, 8))?
398                } else {
399                    this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u8, 16))?
400                };
401                let str = str.transmute(array_layout, this)?;
402                let result = implicit_len(this, &str, imm)?.is_some();
403
404                this.write_scalar(Scalar::from_i32(i32::from(result)), dest)?;
405            }
406
407            // Used to implement the `_mm_cmpestrz` and the `_mm_cmpestrs` functions.
408            // These functions return 1 if the explicitly passed string length is smaller
409            // than 16 for byte-sized operands or 8 for word-sized operands.
410            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1046,1047
411            "pcmpestriz128" | "pcmpestris128" => {
412                let [_, len1, _, len2, imm] =
413                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
414                let len = if unprefixed_name == "pcmpestris128" { len1 } else { len2 };
415                let len = this.read_scalar(len)?.to_i32()?;
416                let imm = this.read_scalar(imm)?.to_u8()?;
417                this.write_scalar(
418                    Scalar::from_i32(i32::from(len < default_len::<i32>(imm))),
419                    dest,
420                )?;
421            }
422
423            // Used to implement the `_mm_crc32_u{8, 16, 32, 64}` functions.
424            // These functions calculate a 32-bit CRC using `0x11EDC6F41`
425            // as the polynomial, also known as CRC32C.
426            // https://datatracker.ietf.org/doc/html/rfc3720#section-12.1
427            "crc32.32.8" | "crc32.32.16" | "crc32.32.32" | "crc32.64.64" => {
428                let bit_size = match unprefixed_name {
429                    "crc32.32.8" => 8,
430                    "crc32.32.16" => 16,
431                    "crc32.32.32" => 32,
432                    "crc32.64.64" => 64,
433                    _ => unreachable!(),
434                };
435
436                if bit_size == 64 && this.tcx.sess.target.arch != Arch::X86_64 {
437                    return interp_ok(EmulateItemResult::NotSupported);
438                }
439
440                let [left, right] =
441                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
442                let left = this.read_scalar(left)?;
443                let right = this.read_scalar(right)?;
444
445                let crc = if bit_size == 64 {
446                    // The 64-bit version will only consider the lower 32 bits,
447                    // while the upper 32 bits get discarded.
448                    #[expect(clippy::as_conversions)]
449                    (left.to_u64()? as u32)
450                } else {
451                    left.to_u32()?
452                };
453                let data = match bit_size {
454                    8 => u64::from(right.to_u8()?),
455                    16 => u64::from(right.to_u16()?),
456                    32 => u64::from(right.to_u32()?),
457                    64 => right.to_u64()?,
458                    _ => unreachable!(),
459                };
460
461                let result = compute_crc32(crc, data, bit_size, 0x11EDC6F41);
462                let result = if bit_size == 64 {
463                    Scalar::from_u64(u64::from(result))
464                } else {
465                    Scalar::from_u32(result)
466                };
467
468                this.write_scalar(result, dest)?;
469            }
470            _ => return interp_ok(EmulateItemResult::NotSupported),
471        }
472        interp_ok(EmulateItemResult::NeedsReturn)
473    }
474}