miri/
eval.rs

1//! Main evaluator loop and setting up the initial stack frame.
2
3use std::ffi::{OsStr, OsString};
4use std::panic::{self, AssertUnwindSafe};
5use std::path::PathBuf;
6use std::task::Poll;
7use std::{iter, thread};
8
9use rustc_abi::ExternAbi;
10use rustc_data_structures::fx::{FxHashMap, FxHashSet};
11use rustc_hir::def::Namespace;
12use rustc_hir::def_id::DefId;
13use rustc_middle::ty::layout::{LayoutCx, LayoutOf};
14use rustc_middle::ty::{self, Ty, TyCtxt};
15use rustc_session::config::EntryFnType;
16
17use crate::concurrency::thread::TlsAllocAction;
18use crate::diagnostics::report_leaks;
19use crate::shims::tls;
20use crate::*;
21
22#[derive(Copy, Clone, Debug)]
23pub enum MiriEntryFnType {
24    MiriStart,
25    Rustc(EntryFnType),
26}
27
28/// When the main thread would exit, we will yield to any other thread that is ready to execute.
29/// But we must only do that a finite number of times, or a background thread running `loop {}`
30/// will hang the program.
31const MAIN_THREAD_YIELDS_AT_SHUTDOWN: u32 = 256;
32
33#[derive(Copy, Clone, Debug, PartialEq)]
34pub enum AlignmentCheck {
35    /// Do not check alignment.
36    None,
37    /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
38    Symbolic,
39    /// Check alignment on the actual physical integer address.
40    Int,
41}
42
43#[derive(Copy, Clone, Debug, PartialEq)]
44pub enum RejectOpWith {
45    /// Isolated op is rejected with an abort of the machine.
46    Abort,
47
48    /// If not Abort, miri returns an error for an isolated op.
49    /// Following options determine if user should be warned about such error.
50    /// Do not print warning about rejected isolated op.
51    NoWarning,
52
53    /// Print a warning about rejected isolated op, with backtrace.
54    Warning,
55
56    /// Print a warning about rejected isolated op, without backtrace.
57    WarningWithoutBacktrace,
58}
59
60#[derive(Copy, Clone, Debug, PartialEq)]
61pub enum IsolatedOp {
62    /// Reject an op requiring communication with the host. By
63    /// default, miri rejects the op with an abort. If not, it returns
64    /// an error code, and prints a warning about it. Warning levels
65    /// are controlled by `RejectOpWith` enum.
66    Reject(RejectOpWith),
67
68    /// Execute op requiring communication with the host, i.e. disable isolation.
69    Allow,
70}
71
72#[derive(Debug, Copy, Clone, PartialEq, Eq)]
73pub enum BacktraceStyle {
74    /// Prints a terser backtrace which ideally only contains relevant information.
75    Short,
76    /// Prints a backtrace with all possible information.
77    Full,
78    /// Prints only the frame that the error occurs in.
79    Off,
80}
81
82#[derive(Debug, Copy, Clone, PartialEq, Eq)]
83pub enum ValidationMode {
84    /// Do not perform any kind of validation.
85    No,
86    /// Validate the interior of the value, but not things behind references.
87    Shallow,
88    /// Fully recursively validate references.
89    Deep,
90}
91
92/// Configuration needed to spawn a Miri instance.
93#[derive(Clone)]
94pub struct MiriConfig {
95    /// The host environment snapshot to use as basis for what is provided to the interpreted program.
96    /// (This is still subject to isolation as well as `forwarded_env_vars`.)
97    pub env: Vec<(OsString, OsString)>,
98    /// Determine if validity checking is enabled.
99    pub validation: ValidationMode,
100    /// Determines if Stacked Borrows or Tree Borrows is enabled.
101    pub borrow_tracker: Option<BorrowTrackerMethod>,
102    /// Whether `core::ptr::Unique` receives special treatment.
103    /// If `true` then `Unique` is reborrowed with its own new tag and permission,
104    /// otherwise `Unique` is just another raw pointer.
105    pub unique_is_unique: bool,
106    /// Controls alignment checking.
107    pub check_alignment: AlignmentCheck,
108    /// Action for an op requiring communication with the host.
109    pub isolated_op: IsolatedOp,
110    /// Determines if memory leaks should be ignored.
111    pub ignore_leaks: bool,
112    /// Environment variables that should always be forwarded from the host.
113    pub forwarded_env_vars: Vec<String>,
114    /// Additional environment variables that should be set in the interpreted program.
115    pub set_env_vars: FxHashMap<String, String>,
116    /// Command-line arguments passed to the interpreted program.
117    pub args: Vec<String>,
118    /// The seed to use when non-determinism or randomness are required (e.g. ptr-to-int cast, `getrandom()`).
119    pub seed: Option<u64>,
120    /// The stacked borrows pointer ids to report about
121    pub tracked_pointer_tags: FxHashSet<BorTag>,
122    /// The allocation ids to report about.
123    pub tracked_alloc_ids: FxHashSet<AllocId>,
124    /// For the tracked alloc ids, also report read/write accesses.
125    pub track_alloc_accesses: bool,
126    /// Determine if data race detection should be enabled
127    pub data_race_detector: bool,
128    /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled
129    pub weak_memory_emulation: bool,
130    /// Track when an outdated (weak memory) load happens.
131    pub track_outdated_loads: bool,
132    /// Rate of spurious failures for compare_exchange_weak atomic operations,
133    /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
134    pub cmpxchg_weak_failure_rate: f64,
135    /// If `Some`, enable the `measureme` profiler, writing results to a file
136    /// with the specified prefix.
137    pub measureme_out: Option<String>,
138    /// Which style to use for printing backtraces.
139    pub backtrace_style: BacktraceStyle,
140    /// Which provenance to use for int2ptr casts
141    pub provenance_mode: ProvenanceMode,
142    /// Whether to ignore any output by the program. This is helpful when debugging miri
143    /// as its messages don't get intermingled with the program messages.
144    pub mute_stdout_stderr: bool,
145    /// The probability of the active thread being preempted at the end of each basic block.
146    pub preemption_rate: f64,
147    /// Report the current instruction being executed every N basic blocks.
148    pub report_progress: Option<u32>,
149    /// Whether Stacked Borrows and Tree Borrows retagging should recurse into fields of datatypes.
150    pub retag_fields: RetagFields,
151    /// The location of a shared object file to load when calling external functions
152    /// FIXME! consider allowing users to specify paths to multiple files, or to a directory
153    pub native_lib: Option<PathBuf>,
154    /// Run a garbage collector for BorTags every N basic blocks.
155    pub gc_interval: u32,
156    /// The number of CPUs to be reported by miri.
157    pub num_cpus: u32,
158    /// Requires Miri to emulate pages of a certain size
159    pub page_size: Option<u64>,
160    /// Whether to collect a backtrace when each allocation is created, just in case it leaks.
161    pub collect_leak_backtraces: bool,
162    /// Probability for address reuse.
163    pub address_reuse_rate: f64,
164    /// Probability for address reuse across threads.
165    pub address_reuse_cross_thread_rate: f64,
166}
167
168impl Default for MiriConfig {
169    fn default() -> MiriConfig {
170        MiriConfig {
171            env: vec![],
172            validation: ValidationMode::Shallow,
173            borrow_tracker: Some(BorrowTrackerMethod::StackedBorrows),
174            unique_is_unique: false,
175            check_alignment: AlignmentCheck::Int,
176            isolated_op: IsolatedOp::Reject(RejectOpWith::Abort),
177            ignore_leaks: false,
178            forwarded_env_vars: vec![],
179            set_env_vars: FxHashMap::default(),
180            args: vec![],
181            seed: None,
182            tracked_pointer_tags: FxHashSet::default(),
183            tracked_alloc_ids: FxHashSet::default(),
184            track_alloc_accesses: false,
185            data_race_detector: true,
186            weak_memory_emulation: true,
187            track_outdated_loads: false,
188            cmpxchg_weak_failure_rate: 0.8, // 80%
189            measureme_out: None,
190            backtrace_style: BacktraceStyle::Short,
191            provenance_mode: ProvenanceMode::Default,
192            mute_stdout_stderr: false,
193            preemption_rate: 0.01, // 1%
194            report_progress: None,
195            retag_fields: RetagFields::Yes,
196            native_lib: None,
197            gc_interval: 10_000,
198            num_cpus: 1,
199            page_size: None,
200            collect_leak_backtraces: true,
201            address_reuse_rate: 0.5,
202            address_reuse_cross_thread_rate: 0.1,
203        }
204    }
205}
206
207/// The state of the main thread. Implementation detail of `on_main_stack_empty`.
208#[derive(Default, Debug)]
209enum MainThreadState<'tcx> {
210    #[default]
211    Running,
212    TlsDtors(tls::TlsDtorsState<'tcx>),
213    Yield {
214        remaining: u32,
215    },
216    Done,
217}
218
219impl<'tcx> MainThreadState<'tcx> {
220    fn on_main_stack_empty(
221        &mut self,
222        this: &mut MiriInterpCx<'tcx>,
223    ) -> InterpResult<'tcx, Poll<()>> {
224        use MainThreadState::*;
225        match self {
226            Running => {
227                *self = TlsDtors(Default::default());
228            }
229            TlsDtors(state) =>
230                match state.on_stack_empty(this)? {
231                    Poll::Pending => {} // just keep going
232                    Poll::Ready(()) => {
233                        // Give background threads a chance to finish by yielding the main thread a
234                        // couple of times -- but only if we would also preempt threads randomly.
235                        if this.machine.preemption_rate > 0.0 {
236                            // There is a non-zero chance they will yield back to us often enough to
237                            // make Miri terminate eventually.
238                            *self = Yield { remaining: MAIN_THREAD_YIELDS_AT_SHUTDOWN };
239                        } else {
240                            // The other threads did not get preempted, so no need to yield back to
241                            // them.
242                            *self = Done;
243                        }
244                    }
245                },
246            Yield { remaining } =>
247                match remaining.checked_sub(1) {
248                    None => *self = Done,
249                    Some(new_remaining) => {
250                        *remaining = new_remaining;
251                        this.yield_active_thread();
252                    }
253                },
254            Done => {
255                // Figure out exit code.
256                let ret_place = this.machine.main_fn_ret_place.clone().unwrap();
257                let exit_code = this.read_target_isize(&ret_place)?;
258                // Rust uses `isize` but the underlying type of an exit code is `i32`.
259                // Do a saturating cast.
260                let exit_code = i32::try_from(exit_code).unwrap_or(if exit_code >= 0 {
261                    i32::MAX
262                } else {
263                    i32::MIN
264                });
265                // Deal with our thread-local memory. We do *not* want to actually free it, instead we consider TLS
266                // to be like a global `static`, so that all memory reached by it is considered to "not leak".
267                this.terminate_active_thread(TlsAllocAction::Leak)?;
268                // Stop interpreter loop.
269                throw_machine_stop!(TerminationInfo::Exit { code: exit_code, leak_check: true });
270            }
271        }
272        interp_ok(Poll::Pending)
273    }
274}
275
276/// Returns a freshly created `InterpCx`.
277/// Public because this is also used by `priroda`.
278pub fn create_ecx<'tcx>(
279    tcx: TyCtxt<'tcx>,
280    entry_id: DefId,
281    entry_type: MiriEntryFnType,
282    config: &MiriConfig,
283) -> InterpResult<'tcx, InterpCx<'tcx, MiriMachine<'tcx>>> {
284    let typing_env = ty::TypingEnv::fully_monomorphized();
285    let layout_cx = LayoutCx::new(tcx, typing_env);
286    let mut ecx =
287        InterpCx::new(tcx, rustc_span::DUMMY_SP, typing_env, MiriMachine::new(config, layout_cx));
288
289    // Some parts of initialization require a full `InterpCx`.
290    MiriMachine::late_init(&mut ecx, config, {
291        let mut state = MainThreadState::default();
292        // Cannot capture anything GC-relevant here.
293        Box::new(move |m| state.on_main_stack_empty(m))
294    })?;
295
296    // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
297    let sentinel =
298        helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
299    if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
300        tcx.dcx().fatal(
301            "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
302            Use `cargo miri setup` to prepare a sysroot that is suitable for Miri."
303        );
304    }
305
306    // Setup first stack frame.
307    let entry_instance = ty::Instance::mono(tcx, entry_id);
308
309    // First argument is constructed later, because it's skipped for `miri_start.`
310
311    // Second argument (argc): length of `config.args`.
312    let argc =
313        ImmTy::from_int(i64::try_from(config.args.len()).unwrap(), ecx.machine.layouts.isize);
314    // Third argument (`argv`): created from `config.args`.
315    let argv = {
316        // Put each argument in memory, collect pointers.
317        let mut argvs = Vec::<Immediate<Provenance>>::with_capacity(config.args.len());
318        for arg in config.args.iter() {
319            // Make space for `0` terminator.
320            let size = u64::try_from(arg.len()).unwrap().strict_add(1);
321            let arg_type = Ty::new_array(tcx, tcx.types.u8, size);
322            let arg_place =
323                ecx.allocate(ecx.layout_of(arg_type)?, MiriMemoryKind::Machine.into())?;
324            ecx.write_os_str_to_c_str(OsStr::new(arg), arg_place.ptr(), size)?;
325            ecx.mark_immutable(&arg_place);
326            argvs.push(arg_place.to_ref(&ecx));
327        }
328        // Make an array with all these pointers, in the Miri memory.
329        let argvs_layout = ecx.layout_of(Ty::new_array(
330            tcx,
331            Ty::new_imm_ptr(tcx, tcx.types.u8),
332            u64::try_from(argvs.len()).unwrap(),
333        ))?;
334        let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?;
335        for (idx, arg) in argvs.into_iter().enumerate() {
336            let place = ecx.project_field(&argvs_place, idx)?;
337            ecx.write_immediate(arg, &place)?;
338        }
339        ecx.mark_immutable(&argvs_place);
340        // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`.
341        {
342            let argc_place =
343                ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
344            ecx.write_immediate(*argc, &argc_place)?;
345            ecx.mark_immutable(&argc_place);
346            ecx.machine.argc = Some(argc_place.ptr());
347
348            let argv_place = ecx.allocate(
349                ecx.layout_of(Ty::new_imm_ptr(tcx, tcx.types.unit))?,
350                MiriMemoryKind::Machine.into(),
351            )?;
352            ecx.write_pointer(argvs_place.ptr(), &argv_place)?;
353            ecx.mark_immutable(&argv_place);
354            ecx.machine.argv = Some(argv_place.ptr());
355        }
356        // Store command line as UTF-16 for Windows `GetCommandLineW`.
357        {
358            // Construct a command string with all the arguments.
359            let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
360
361            let cmd_type =
362                Ty::new_array(tcx, tcx.types.u16, u64::try_from(cmd_utf16.len()).unwrap());
363            let cmd_place =
364                ecx.allocate(ecx.layout_of(cmd_type)?, MiriMemoryKind::Machine.into())?;
365            ecx.machine.cmd_line = Some(cmd_place.ptr());
366            // Store the UTF-16 string. We just allocated so we know the bounds are fine.
367            for (idx, &c) in cmd_utf16.iter().enumerate() {
368                let place = ecx.project_field(&cmd_place, idx)?;
369                ecx.write_scalar(Scalar::from_u16(c), &place)?;
370            }
371            ecx.mark_immutable(&cmd_place);
372        }
373        ecx.mplace_to_ref(&argvs_place)?
374    };
375
376    // Return place (in static memory so that it does not count as leak).
377    let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
378    ecx.machine.main_fn_ret_place = Some(ret_place.clone());
379    // Call start function.
380
381    match entry_type {
382        MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => {
383            let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| {
384                tcx.dcx().fatal("could not find start lang item");
385            });
386            let main_ret_ty = tcx.fn_sig(entry_id).no_bound_vars().unwrap().output();
387            let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
388            let start_instance = ty::Instance::try_resolve(
389                tcx,
390                typing_env,
391                start_id,
392                tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]),
393            )
394            .unwrap()
395            .unwrap();
396
397            let main_ptr = ecx.fn_ptr(FnVal::Instance(entry_instance));
398
399            // Always using DEFAULT is okay since we don't support signals in Miri anyway.
400            // (This means we are effectively ignoring `-Zon-broken-pipe`.)
401            let sigpipe = rustc_session::config::sigpipe::DEFAULT;
402
403            ecx.call_function(
404                start_instance,
405                ExternAbi::Rust,
406                &[
407                    ImmTy::from_scalar(
408                        Scalar::from_pointer(main_ptr, &ecx),
409                        // FIXME use a proper fn ptr type
410                        ecx.machine.layouts.const_raw_ptr,
411                    ),
412                    argc,
413                    argv,
414                    ImmTy::from_uint(sigpipe, ecx.machine.layouts.u8),
415                ],
416                Some(&ret_place),
417                StackPopCleanup::Root { cleanup: true },
418            )?;
419        }
420        MiriEntryFnType::MiriStart => {
421            ecx.call_function(
422                entry_instance,
423                ExternAbi::Rust,
424                &[argc, argv],
425                Some(&ret_place),
426                StackPopCleanup::Root { cleanup: true },
427            )?;
428        }
429    }
430
431    interp_ok(ecx)
432}
433
434/// Evaluates the entry function specified by `entry_id`.
435/// Returns `Some(return_code)` if program execution completed.
436/// Returns `None` if an evaluation error occurred.
437#[expect(clippy::needless_lifetimes)]
438pub fn eval_entry<'tcx>(
439    tcx: TyCtxt<'tcx>,
440    entry_id: DefId,
441    entry_type: MiriEntryFnType,
442    config: MiriConfig,
443) -> Option<i32> {
444    // Copy setting before we move `config`.
445    let ignore_leaks = config.ignore_leaks;
446
447    let mut ecx = match create_ecx(tcx, entry_id, entry_type, &config).report_err() {
448        Ok(v) => v,
449        Err(err) => {
450            let (kind, backtrace) = err.into_parts();
451            backtrace.print_backtrace();
452            panic!("Miri initialization error: {kind:?}")
453        }
454    };
455
456    // Perform the main execution.
457    let res: thread::Result<InterpResult<'_, !>> =
458        panic::catch_unwind(AssertUnwindSafe(|| ecx.run_threads()));
459    let res = res.unwrap_or_else(|panic_payload| {
460        ecx.handle_ice();
461        panic::resume_unwind(panic_payload)
462    });
463    // `Ok` can never happen.
464    let Err(err) = res.report_err();
465
466    // Machine cleanup. Only do this if all threads have terminated; threads that are still running
467    // might cause Stacked Borrows errors (https://github.com/rust-lang/miri/issues/2396).
468    if ecx.have_all_terminated() {
469        // Even if all threads have terminated, we have to beware of data races since some threads
470        // might not have joined the main thread (https://github.com/rust-lang/miri/issues/2020,
471        // https://github.com/rust-lang/miri/issues/2508).
472        ecx.allow_data_races_all_threads_done();
473        EnvVars::cleanup(&mut ecx).expect("error during env var cleanup");
474    }
475
476    // Process the result.
477    let (return_code, leak_check) = report_error(&ecx, err)?;
478    if leak_check && !ignore_leaks {
479        // Check for thread leaks.
480        if !ecx.have_all_terminated() {
481            tcx.dcx().err("the main thread terminated without waiting for all remaining threads");
482            tcx.dcx().note("set `MIRIFLAGS=-Zmiri-ignore-leaks` to disable this check");
483            return None;
484        }
485        // Check for memory leaks.
486        info!("Additional static roots: {:?}", ecx.machine.static_roots);
487        let leaks = ecx.take_leaked_allocations(|ecx| &ecx.machine.static_roots);
488        if !leaks.is_empty() {
489            report_leaks(&ecx, leaks);
490            tcx.dcx().note("set `MIRIFLAGS=-Zmiri-ignore-leaks` to disable this check");
491            // Ignore the provided return code - let the reported error
492            // determine the return code.
493            return None;
494        }
495    }
496    Some(return_code)
497}
498
499/// Turns an array of arguments into a Windows command line string.
500///
501/// The string will be UTF-16 encoded and NUL terminated.
502///
503/// Panics if the zeroth argument contains the `"` character because doublequotes
504/// in `argv[0]` cannot be encoded using the standard command line parsing rules.
505///
506/// Further reading:
507/// * [Parsing C++ command-line arguments](https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments)
508/// * [The C/C++ Parameter Parsing Rules](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES)
509fn args_to_utf16_command_string<I, T>(mut args: I) -> Vec<u16>
510where
511    I: Iterator<Item = T>,
512    T: AsRef<str>,
513{
514    // Parse argv[0]. Slashes aren't escaped. Literal double quotes are not allowed.
515    let mut cmd = {
516        let arg0 = if let Some(arg0) = args.next() {
517            arg0
518        } else {
519            return vec![0];
520        };
521        let arg0 = arg0.as_ref();
522        if arg0.contains('"') {
523            panic!("argv[0] cannot contain a doublequote (\") character");
524        } else {
525            // Always surround argv[0] with quotes.
526            let mut s = String::new();
527            s.push('"');
528            s.push_str(arg0);
529            s.push('"');
530            s
531        }
532    };
533
534    // Build the other arguments.
535    for arg in args {
536        let arg = arg.as_ref();
537        cmd.push(' ');
538        if arg.is_empty() {
539            cmd.push_str("\"\"");
540        } else if !arg.bytes().any(|c| matches!(c, b'"' | b'\t' | b' ')) {
541            // No quote, tab, or space -- no escaping required.
542            cmd.push_str(arg);
543        } else {
544            // Spaces and tabs are escaped by surrounding them in quotes.
545            // Quotes are themselves escaped by using backslashes when in a
546            // quoted block.
547            // Backslashes only need to be escaped when one or more are directly
548            // followed by a quote. Otherwise they are taken literally.
549
550            cmd.push('"');
551            let mut chars = arg.chars().peekable();
552            loop {
553                let mut nslashes = 0;
554                while let Some(&'\\') = chars.peek() {
555                    chars.next();
556                    nslashes += 1;
557                }
558
559                match chars.next() {
560                    Some('"') => {
561                        cmd.extend(iter::repeat_n('\\', nslashes * 2 + 1));
562                        cmd.push('"');
563                    }
564                    Some(c) => {
565                        cmd.extend(iter::repeat_n('\\', nslashes));
566                        cmd.push(c);
567                    }
568                    None => {
569                        cmd.extend(iter::repeat_n('\\', nslashes * 2));
570                        break;
571                    }
572                }
573            }
574            cmd.push('"');
575        }
576    }
577
578    if cmd.contains('\0') {
579        panic!("interior null in command line arguments");
580    }
581    cmd.encode_utf16().chain(iter::once(0)).collect()
582}
583
584#[cfg(test)]
585mod tests {
586    use super::*;
587    #[test]
588    #[should_panic(expected = "argv[0] cannot contain a doublequote (\") character")]
589    fn windows_argv0_panic_on_quote() {
590        args_to_utf16_command_string(["\""].iter());
591    }
592    #[test]
593    fn windows_argv0_no_escape() {
594        // Ensure that a trailing backslash in argv[0] is not escaped.
595        let cmd = String::from_utf16_lossy(&args_to_utf16_command_string(
596            [r"C:\Program Files\", "arg1", "arg 2", "arg \" 3"].iter(),
597        ));
598        assert_eq!(cmd.trim_end_matches('\0'), r#""C:\Program Files\" arg1 "arg 2" "arg \" 3""#);
599    }
600}