miri/shims/x86/
sse42.rs

1use rustc_abi::{CanonAbi, Size};
2use rustc_middle::mir;
3use rustc_middle::ty::Ty;
4use rustc_span::Symbol;
5use rustc_target::callconv::FnAbi;
6use rustc_target::spec::Arch;
7
8use crate::*;
9
10/// A bitmask constant for scrutinizing the immediate byte provided
11/// to the string comparison intrinsics. It distinuishes between
12/// 16-bit integers and 8-bit integers. See [`compare_strings`]
13/// for more details about the immediate byte.
14const USE_WORDS: u8 = 1;
15
16/// A bitmask constant for scrutinizing the immediate byte provided
17/// to the string comparison intrinsics. It distinuishes between
18/// signed integers and unsigned integers. See [`compare_strings`]
19/// for more details about the immediate byte.
20const USE_SIGNED: u8 = 2;
21
22/// The main worker for the string comparison intrinsics, where the given
23/// strings are analyzed according to the given immediate byte.
24///
25/// # Arguments
26///
27/// * `str1` - The first string argument. It is always a length 16 array of bytes
28///   or a length 8 array of two-byte words.
29/// * `str2` - The second string argument. It is always a length 16 array of bytes
30///   or a length 8 array of two-byte words.
31/// * `len` is the length values of the supplied strings. It is distinct from the operand length
32///   in that it describes how much of `str1` and `str2` will be used for the calculation and may
33///   be smaller than the array length of `str1` and `str2`. The string length is counted in bytes
34///   if using byte operands and in two-byte words when using two-byte word operands.
35///   If the value is `None`, the length of a string is determined by the first
36///   null value inside the string.
37/// * `imm` is the immediate byte argument supplied to the intrinsic. The byte influences
38///   the operation as follows:
39///
40///   ```text
41///   0babccddef
42///     || | |||- Use of bytes vs use of two-byte words inside the operation.
43///     || | ||
44///     || | ||- Use of signed values versus use of unsigned values.
45///     || | |
46///     || | |- The comparison operation performed. A total of four operations are available.
47///     || |    * Equal any: Checks which characters of `str2` are inside `str1`.
48///     || |    * String ranges: Check if characters in `str2` are inside the provided character ranges.
49///     || |      Adjacent characters in `str1` constitute one range.
50///     || |    * String comparison: Mark positions where `str1` and `str2` have the same character.
51///     || |    * Substring search: Mark positions where `str1` is a substring in `str2`.
52///     || |
53///     || |- Result Polarity. The result bits may be subjected to a bitwise complement
54///     ||    if these bits are set.
55///     ||
56///     ||- Output selection. This bit has two meanings depending on the instruction.
57///     |   If the instruction is generating a mask, it distinguishes between a bit mask
58///     |   and a byte mask. Otherwise it distinguishes between the most significand bit
59///     |   and the least significand bit when generating an index.
60///     |
61///     |- This bit is ignored. It is expected that this bit is set to zero, but it is
62///        not a requirement.
63///   ```
64///
65/// # Returns
66///
67/// A result mask. The bit at index `i` inside the mask is set if 'str2' starting at `i`
68/// fulfills the test as defined inside the immediate byte.
69/// The mask may be negated if negation flags inside the immediate byte are set.
70///
71/// For more information, see the Intel Software Developer's Manual, Vol. 2b, Chapter 4.1.
72#[expect(clippy::arithmetic_side_effects)]
73fn compare_strings<'tcx>(
74    ecx: &mut MiriInterpCx<'tcx>,
75    str1: &OpTy<'tcx>,
76    str2: &OpTy<'tcx>,
77    len: Option<(u64, u64)>,
78    imm: u8,
79) -> InterpResult<'tcx, i32> {
80    let default_len = default_len::<u64>(imm);
81    let (len1, len2) = if let Some(t) = len {
82        t
83    } else {
84        let len1 = implicit_len(ecx, str1, imm)?.unwrap_or(default_len);
85        let len2 = implicit_len(ecx, str2, imm)?.unwrap_or(default_len);
86        (len1, len2)
87    };
88
89    let mut result = 0;
90    match (imm >> 2) & 3 {
91        0 => {
92            // Equal any: Checks which characters of `str2` are inside `str1`.
93            for i in 0..len2 {
94                let ch2 = ecx.read_immediate(&ecx.project_index(str2, i)?)?;
95
96                for j in 0..len1 {
97                    let ch1 = ecx.read_immediate(&ecx.project_index(str1, j)?)?;
98
99                    let eq = ecx.binary_op(mir::BinOp::Eq, &ch1, &ch2)?;
100                    if eq.to_scalar().to_bool()? {
101                        result |= 1 << i;
102                        break;
103                    }
104                }
105            }
106        }
107        1 => {
108            // String ranges: Check if characters in `str2` are inside the provided character ranges.
109            // Adjacent characters in `str1` constitute one range.
110            let len1 = len1 - (len1 & 1);
111            let get_ch = |ch: Scalar| -> InterpResult<'tcx, i32> {
112                let result = match (imm & USE_WORDS != 0, imm & USE_SIGNED != 0) {
113                    (true, true) => i32::from(ch.to_i16()?),
114                    (true, false) => i32::from(ch.to_u16()?),
115                    (false, true) => i32::from(ch.to_i8()?),
116                    (false, false) => i32::from(ch.to_u8()?),
117                };
118                interp_ok(result)
119            };
120
121            for i in 0..len2 {
122                for j in (0..len1).step_by(2) {
123                    let ch2 = get_ch(ecx.read_scalar(&ecx.project_index(str2, i)?)?)?;
124                    let ch1_1 = get_ch(ecx.read_scalar(&ecx.project_index(str1, j)?)?)?;
125                    let ch1_2 = get_ch(ecx.read_scalar(&ecx.project_index(str1, j + 1)?)?)?;
126
127                    if ch1_1 <= ch2 && ch2 <= ch1_2 {
128                        result |= 1 << i;
129                    }
130                }
131            }
132        }
133        2 => {
134            // String comparison: Mark positions where `str1` and `str2` have the same character.
135            result = (1 << default_len) - 1;
136            result ^= (1 << len1.max(len2)) - 1;
137
138            for i in 0..len1.min(len2) {
139                let ch1 = ecx.read_immediate(&ecx.project_index(str1, i)?)?;
140                let ch2 = ecx.read_immediate(&ecx.project_index(str2, i)?)?;
141                let eq = ecx.binary_op(mir::BinOp::Eq, &ch1, &ch2)?;
142                result |= i32::from(eq.to_scalar().to_bool()?) << i;
143            }
144        }
145        3 => {
146            // Substring search: Mark positions where `str1` is a substring in `str2`.
147            if len1 == 0 {
148                result = (1 << default_len) - 1;
149            } else if len1 <= len2 {
150                for i in 0..len2 {
151                    if len1 > len2 - i {
152                        break;
153                    }
154
155                    result |= 1 << i;
156
157                    for j in 0..len1 {
158                        let k = i + j;
159
160                        if k >= default_len {
161                            break;
162                        } else {
163                            let ch1 = ecx.read_immediate(&ecx.project_index(str1, j)?)?;
164                            let ch2 = ecx.read_immediate(&ecx.project_index(str2, k)?)?;
165                            let ne = ecx.binary_op(mir::BinOp::Ne, &ch1, &ch2)?;
166
167                            if ne.to_scalar().to_bool()? {
168                                result &= !(1 << i);
169                                break;
170                            }
171                        }
172                    }
173                }
174            }
175        }
176        _ => unreachable!(),
177    }
178
179    // Polarity: Possibly perform a bitwise complement on the result.
180    match (imm >> 4) & 3 {
181        3 => result ^= (1 << len1) - 1,
182        1 => result ^= (1 << default_len) - 1,
183        _ => (),
184    }
185
186    interp_ok(result)
187}
188
189/// Obtain the arguments of the intrinsic based on its name.
190/// The result is a tuple with the following values:
191/// * The first string argument.
192/// * The second string argument.
193/// * The string length values, if the intrinsic requires them.
194/// * The immediate instruction byte.
195///
196/// The string arguments will be transmuted into arrays of bytes
197/// or two-byte words, depending on the value of the immediate byte.
198/// Originally, they are [__m128i](https://doc.rust-lang.org/stable/core/arch/x86_64/struct.__m128i.html) values
199/// corresponding to the x86 128-bit integer SIMD type.
200fn deconstruct_args<'tcx>(
201    unprefixed_name: &str,
202    ecx: &mut MiriInterpCx<'tcx>,
203    link_name: Symbol,
204    abi: &FnAbi<'tcx, Ty<'tcx>>,
205    args: &[OpTy<'tcx>],
206) -> InterpResult<'tcx, (OpTy<'tcx>, OpTy<'tcx>, Option<(u64, u64)>, u8)> {
207    let array_layout_fn = |ecx: &mut MiriInterpCx<'tcx>, imm: u8| {
208        if imm & USE_WORDS != 0 {
209            ecx.layout_of(Ty::new_array(ecx.tcx.tcx, ecx.tcx.types.u16, 8))
210        } else {
211            ecx.layout_of(Ty::new_array(ecx.tcx.tcx, ecx.tcx.types.u8, 16))
212        }
213    };
214
215    // The fourth letter of each string comparison intrinsic is either 'e' for "explicit" or 'i' for "implicit".
216    // The distinction will correspond to the intrinsics type signature. In this constext, "explicit" and "implicit"
217    // refer to the way the string length is determined. The length is either passed explicitly in the "explicit"
218    // case or determined by a null terminator in the "implicit" case.
219    let is_explicit = match unprefixed_name.as_bytes().get(4) {
220        Some(&b'e') => true,
221        Some(&b'i') => false,
222        _ => unreachable!(),
223    };
224
225    if is_explicit {
226        let [str1, len1, str2, len2, imm] =
227            ecx.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
228        let imm = ecx.read_scalar(imm)?.to_u8()?;
229
230        let default_len = default_len::<u32>(imm);
231        let len1 = u64::from(ecx.read_scalar(len1)?.to_u32()?.min(default_len));
232        let len2 = u64::from(ecx.read_scalar(len2)?.to_u32()?.min(default_len));
233
234        let array_layout = array_layout_fn(ecx, imm)?;
235        let str1 = str1.transmute(array_layout, ecx)?;
236        let str2 = str2.transmute(array_layout, ecx)?;
237
238        interp_ok((str1, str2, Some((len1, len2)), imm))
239    } else {
240        let [str1, str2, imm] = ecx.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
241        let imm = ecx.read_scalar(imm)?.to_u8()?;
242
243        let array_layout = array_layout_fn(ecx, imm)?;
244        let str1 = str1.transmute(array_layout, ecx)?;
245        let str2 = str2.transmute(array_layout, ecx)?;
246
247        interp_ok((str1, str2, None, imm))
248    }
249}
250
251/// Calculate the c-style string length for a given string `str`.
252/// The string is either a length 16 array of bytes a length 8 array of two-byte words.
253fn implicit_len<'tcx>(
254    ecx: &mut MiriInterpCx<'tcx>,
255    str: &OpTy<'tcx>,
256    imm: u8,
257) -> InterpResult<'tcx, Option<u64>> {
258    let mut result = None;
259    let zero = ImmTy::from_int(0, str.layout.field(ecx, 0));
260
261    for i in 0..default_len::<u64>(imm) {
262        let ch = ecx.read_immediate(&ecx.project_index(str, i)?)?;
263        let is_zero = ecx.binary_op(mir::BinOp::Eq, &ch, &zero)?;
264        if is_zero.to_scalar().to_bool()? {
265            result = Some(i);
266            break;
267        }
268    }
269    interp_ok(result)
270}
271
272#[inline]
273fn default_len<T: From<u8>>(imm: u8) -> T {
274    if imm & USE_WORDS != 0 { T::from(8u8) } else { T::from(16u8) }
275}
276
277impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
278pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
279    fn emulate_x86_sse42_intrinsic(
280        &mut self,
281        link_name: Symbol,
282        abi: &FnAbi<'tcx, Ty<'tcx>>,
283        args: &[OpTy<'tcx>],
284        dest: &MPlaceTy<'tcx>,
285    ) -> InterpResult<'tcx, EmulateItemResult> {
286        let this = self.eval_context_mut();
287        this.expect_target_feature_for_intrinsic(link_name, "sse4.2")?;
288        // Prefix should have already been checked.
289        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse42.").unwrap();
290
291        match unprefixed_name {
292            // Used to implement the `_mm_cmpestrm` and the `_mm_cmpistrm` functions.
293            // These functions compare the input strings and return the resulting mask.
294            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1044,922
295            "pcmpistrm128" | "pcmpestrm128" => {
296                let (str1, str2, len, imm) =
297                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
298                let mask = compare_strings(this, &str1, &str2, len, imm)?;
299
300                // The sixth bit inside the immediate byte distiguishes
301                // between a bit mask or a byte mask when generating a mask.
302                if imm & 0b100_0000 != 0 {
303                    let (array_layout, size) = if imm & USE_WORDS != 0 {
304                        (this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u16, 8))?, 2)
305                    } else {
306                        (this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u8, 16))?, 1)
307                    };
308                    let size = Size::from_bytes(size);
309                    let dest = dest.transmute(array_layout, this)?;
310
311                    for i in 0..default_len::<u64>(imm) {
312                        let result = helpers::bool_to_simd_element(mask & (1 << i) != 0, size);
313                        this.write_scalar(result, &this.project_index(&dest, i)?)?;
314                    }
315                } else {
316                    let layout = this.layout_of(this.tcx.types.i128)?;
317                    let dest = dest.transmute(layout, this)?;
318                    this.write_scalar(Scalar::from_i128(i128::from(mask)), &dest)?;
319                }
320            }
321
322            // Used to implement the `_mm_cmpestra` and the `_mm_cmpistra` functions.
323            // These functions compare the input strings and return `1` if the end of the second
324            // input string is not reached and the resulting mask is zero, and `0` otherwise.
325            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=919,1041
326            "pcmpistria128" | "pcmpestria128" => {
327                let (str1, str2, len, imm) =
328                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
329                let result = if compare_strings(this, &str1, &str2, len, imm)? != 0 {
330                    false
331                } else if let Some((_, len)) = len {
332                    len >= default_len::<u64>(imm)
333                } else {
334                    implicit_len(this, &str1, imm)?.is_some()
335                };
336
337                this.write_scalar(Scalar::from_i32(i32::from(result)), dest)?;
338            }
339
340            // Used to implement the `_mm_cmpestri` and the `_mm_cmpistri` functions.
341            // These functions compare the input strings and return the bit index
342            // for most significant or least significant bit of the resulting mask.
343            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=921,1043
344            "pcmpistri128" | "pcmpestri128" => {
345                let (str1, str2, len, imm) =
346                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
347                let mask = compare_strings(this, &str1, &str2, len, imm)?;
348
349                let len = default_len::<u32>(imm);
350                // The sixth bit inside the immediate byte distiguishes between the least
351                // significant bit and the most significant bit when generating an index.
352                let result = if imm & 0b100_0000 != 0 {
353                    // most significant bit
354                    31u32.wrapping_sub(mask.leading_zeros()).min(len)
355                } else {
356                    // least significant bit
357                    mask.trailing_zeros().min(len)
358                };
359                this.write_scalar(Scalar::from_i32(i32::try_from(result).unwrap()), dest)?;
360            }
361
362            // Used to implement the `_mm_cmpestro` and the `_mm_cmpistro` functions.
363            // These functions compare the input strings and return the lowest bit of the
364            // resulting mask.
365            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=923,1045
366            "pcmpistrio128" | "pcmpestrio128" => {
367                let (str1, str2, len, imm) =
368                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
369                let mask = compare_strings(this, &str1, &str2, len, imm)?;
370                this.write_scalar(Scalar::from_i32(mask & 1), dest)?;
371            }
372
373            // Used to implement the `_mm_cmpestrc` and the `_mm_cmpistrc` functions.
374            // These functions compare the input strings and return `1` if the resulting
375            // mask was non-zero, and `0` otherwise.
376            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=920,1042
377            "pcmpistric128" | "pcmpestric128" => {
378                let (str1, str2, len, imm) =
379                    deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
380                let mask = compare_strings(this, &str1, &str2, len, imm)?;
381                this.write_scalar(Scalar::from_i32(i32::from(mask != 0)), dest)?;
382            }
383
384            // Used to implement the `_mm_cmpistrz` and the `_mm_cmpistrs` functions.
385            // These functions return `1` if the string end has been reached and `0` otherwise.
386            // Since these functions define the string length implicitly, it is equal to a
387            // search for a null terminator (see `deconstruct_args` for more details).
388            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=924,925
389            "pcmpistriz128" | "pcmpistris128" => {
390                let [str1, str2, imm] =
391                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
392                let imm = this.read_scalar(imm)?.to_u8()?;
393
394                let str = if unprefixed_name == "pcmpistris128" { str1 } else { str2 };
395                let array_layout = if imm & USE_WORDS != 0 {
396                    this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u16, 8))?
397                } else {
398                    this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u8, 16))?
399                };
400                let str = str.transmute(array_layout, this)?;
401                let result = implicit_len(this, &str, imm)?.is_some();
402
403                this.write_scalar(Scalar::from_i32(i32::from(result)), dest)?;
404            }
405
406            // Used to implement the `_mm_cmpestrz` and the `_mm_cmpestrs` functions.
407            // These functions return 1 if the explicitly passed string length is smaller
408            // than 16 for byte-sized operands or 8 for word-sized operands.
409            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1046,1047
410            "pcmpestriz128" | "pcmpestris128" => {
411                let [_, len1, _, len2, imm] =
412                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
413                let len = if unprefixed_name == "pcmpestris128" { len1 } else { len2 };
414                let len = this.read_scalar(len)?.to_i32()?;
415                let imm = this.read_scalar(imm)?.to_u8()?;
416                this.write_scalar(
417                    Scalar::from_i32(i32::from(len < default_len::<i32>(imm))),
418                    dest,
419                )?;
420            }
421
422            // Used to implement the `_mm_crc32_u{8, 16, 32, 64}` functions.
423            // These functions calculate a 32-bit CRC using `0x11EDC6F41`
424            // as the polynomial, also known as CRC32C.
425            // https://datatracker.ietf.org/doc/html/rfc3720#section-12.1
426            "crc32.32.8" | "crc32.32.16" | "crc32.32.32" | "crc32.64.64" => {
427                let bit_size = match unprefixed_name {
428                    "crc32.32.8" => 8,
429                    "crc32.32.16" => 16,
430                    "crc32.32.32" => 32,
431                    "crc32.64.64" => 64,
432                    _ => unreachable!(),
433                };
434
435                if bit_size == 64 && this.tcx.sess.target.arch != Arch::X86_64 {
436                    return interp_ok(EmulateItemResult::NotSupported);
437                }
438
439                let [left, right] =
440                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
441                let left = this.read_scalar(left)?;
442                let right = this.read_scalar(right)?;
443
444                let crc = if bit_size == 64 {
445                    // The 64-bit version will only consider the lower 32 bits,
446                    // while the upper 32 bits get discarded.
447                    #[expect(clippy::as_conversions)]
448                    u128::from((left.to_u64()? as u32).reverse_bits())
449                } else {
450                    u128::from(left.to_u32()?.reverse_bits())
451                };
452                let v = match bit_size {
453                    8 => u128::from(right.to_u8()?.reverse_bits()),
454                    16 => u128::from(right.to_u16()?.reverse_bits()),
455                    32 => u128::from(right.to_u32()?.reverse_bits()),
456                    64 => u128::from(right.to_u64()?.reverse_bits()),
457                    _ => unreachable!(),
458                };
459
460                // Perform polynomial division modulo 2.
461                // The algorithm for the division is an adapted version of the
462                // schoolbook division algorithm used for normal integer or polynomial
463                // division. In this context, the quotient is not calculated, since
464                // only the remainder is needed.
465                //
466                // The algorithm works as follows:
467                // 1. Pull down digits until division can be performed. In the context of division
468                //    modulo 2 it means locating the most significant digit of the dividend and shifting
469                //    the divisor such that the position of the divisors most significand digit and the
470                //    dividends most significand digit match.
471                // 2. Perform a division and determine the remainder. Since it is arithmetic modulo 2,
472                //    this operation is a simple bitwise exclusive or.
473                // 3. Repeat steps 1. and 2. until the full remainder is calculated. This is the case
474                //    once the degree of the remainder polynomial is smaller than the degree of the
475                //    divisor polynomial. In other words, the number of leading zeros of the remainder
476                //    is larger than the number of leading zeros of the divisor. It is important to
477                //    note that standard arithmetic comparison is not applicable here:
478                //    0b10011 / 0b11111 = 0b01100 is a valid division, even though the dividend is
479                //    smaller than the divisor.
480                let mut dividend = (crc << bit_size) ^ (v << 32);
481                const POLYNOMIAL: u128 = 0x11EDC6F41;
482                while dividend.leading_zeros() <= POLYNOMIAL.leading_zeros() {
483                    dividend ^=
484                        (POLYNOMIAL << POLYNOMIAL.leading_zeros()) >> dividend.leading_zeros();
485                }
486
487                let result = u32::try_from(dividend).unwrap().reverse_bits();
488                let result = if bit_size == 64 {
489                    Scalar::from_u64(u64::from(result))
490                } else {
491                    Scalar::from_u32(result)
492                };
493
494                this.write_scalar(result, dest)?;
495            }
496            _ => return interp_ok(EmulateItemResult::NotSupported),
497        }
498        interp_ok(EmulateItemResult::NeedsReturn)
499    }
500}