miri/shims/
files.rs

1use std::any::Any;
2use std::collections::BTreeMap;
3use std::fs::{File, Metadata};
4use std::io::{ErrorKind, IsTerminal, Seek, SeekFrom, Write};
5use std::marker::CoercePointee;
6use std::ops::Deref;
7use std::rc::{Rc, Weak};
8use std::{fs, io};
9
10use rustc_abi::Size;
11
12use crate::shims::unix::UnixFileDescription;
13use crate::*;
14
15/// A unique id for file descriptions. While we could use the address, considering that
16/// is definitely unique, the address would expose interpreter internal state when used
17/// for sorting things. So instead we generate a unique id per file description which is the same
18/// for all `dup`licates and is never reused.
19#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd)]
20pub struct FdId(usize);
21
22#[derive(Debug, Clone)]
23struct FdIdWith<T: ?Sized> {
24    id: FdId,
25    inner: T,
26}
27
28/// A refcounted pointer to a file description, also tracking the
29/// globally unique ID of this file description.
30#[repr(transparent)]
31#[derive(CoercePointee, Debug)]
32pub struct FileDescriptionRef<T: ?Sized>(Rc<FdIdWith<T>>);
33
34impl<T: ?Sized> Clone for FileDescriptionRef<T> {
35    fn clone(&self) -> Self {
36        FileDescriptionRef(self.0.clone())
37    }
38}
39
40impl<T: ?Sized> Deref for FileDescriptionRef<T> {
41    type Target = T;
42    fn deref(&self) -> &T {
43        &self.0.inner
44    }
45}
46
47impl<T: ?Sized> FileDescriptionRef<T> {
48    pub fn id(&self) -> FdId {
49        self.0.id
50    }
51}
52
53/// Holds a weak reference to the actual file description.
54#[derive(Debug)]
55pub struct WeakFileDescriptionRef<T: ?Sized>(Weak<FdIdWith<T>>);
56
57impl<T: ?Sized> Clone for WeakFileDescriptionRef<T> {
58    fn clone(&self) -> Self {
59        WeakFileDescriptionRef(self.0.clone())
60    }
61}
62
63impl<T: ?Sized> FileDescriptionRef<T> {
64    pub fn downgrade(this: &Self) -> WeakFileDescriptionRef<T> {
65        WeakFileDescriptionRef(Rc::downgrade(&this.0))
66    }
67}
68
69impl<T: ?Sized> WeakFileDescriptionRef<T> {
70    pub fn upgrade(&self) -> Option<FileDescriptionRef<T>> {
71        self.0.upgrade().map(FileDescriptionRef)
72    }
73}
74
75impl<T> VisitProvenance for WeakFileDescriptionRef<T> {
76    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
77        // A weak reference can never be the only reference to some pointer or place.
78        // Since the actual file description is tracked by strong ref somewhere,
79        // it is ok to make this a NOP operation.
80    }
81}
82
83/// A helper trait to indirectly allow downcasting on `Rc<FdIdWith<dyn _>>`.
84/// Ideally we'd just add a `FdIdWith<Self>: Any` bound to the `FileDescription` trait,
85/// but that does not allow upcasting.
86pub trait FileDescriptionExt: 'static {
87    fn into_rc_any(self: FileDescriptionRef<Self>) -> Rc<dyn Any>;
88
89    /// We wrap the regular `close` function generically, so both handle `Rc::into_inner`
90    /// and epoll interest management.
91    fn close_ref<'tcx>(
92        self: FileDescriptionRef<Self>,
93        communicate_allowed: bool,
94        ecx: &mut MiriInterpCx<'tcx>,
95    ) -> InterpResult<'tcx, io::Result<()>>;
96}
97
98impl<T: FileDescription + 'static> FileDescriptionExt for T {
99    fn into_rc_any(self: FileDescriptionRef<Self>) -> Rc<dyn Any> {
100        self.0
101    }
102
103    fn close_ref<'tcx>(
104        self: FileDescriptionRef<Self>,
105        communicate_allowed: bool,
106        ecx: &mut MiriInterpCx<'tcx>,
107    ) -> InterpResult<'tcx, io::Result<()>> {
108        match Rc::into_inner(self.0) {
109            Some(fd) => {
110                // There might have been epolls interested in this FD. Remove that.
111                ecx.machine.epoll_interests.remove_epolls(fd.id);
112
113                fd.inner.destroy(fd.id, communicate_allowed, ecx)
114            }
115            None => {
116                // Not the last reference.
117                interp_ok(Ok(()))
118            }
119        }
120    }
121}
122
123pub type DynFileDescriptionRef = FileDescriptionRef<dyn FileDescription>;
124
125impl FileDescriptionRef<dyn FileDescription> {
126    pub fn downcast<T: FileDescription + 'static>(self) -> Option<FileDescriptionRef<T>> {
127        let inner = self.into_rc_any().downcast::<FdIdWith<T>>().ok()?;
128        Some(FileDescriptionRef(inner))
129    }
130}
131
132/// Represents an open file description.
133pub trait FileDescription: std::fmt::Debug + FileDescriptionExt {
134    fn name(&self) -> &'static str;
135
136    /// Reads as much as possible into the given buffer `ptr`.
137    /// `len` indicates how many bytes we should try to read.
138    ///
139    /// When the read is done, `finish` will be called. Note that `read` itself may return before
140    /// that happens! Everything that should happen "after" the `read` needs to happen inside
141    /// `finish`.
142    fn read<'tcx>(
143        self: FileDescriptionRef<Self>,
144        _communicate_allowed: bool,
145        _ptr: Pointer,
146        _len: usize,
147        _ecx: &mut MiriInterpCx<'tcx>,
148        _finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
149    ) -> InterpResult<'tcx> {
150        throw_unsup_format!("cannot read from {}", self.name());
151    }
152
153    /// Writes as much as possible from the given buffer `ptr`.
154    /// `len` indicates how many bytes we should try to write.
155    ///
156    /// When the write is done, `finish` will be called. Note that `write` itself may return before
157    /// that happens! Everything that should happen "after" the `write` needs to happen inside
158    /// `finish`.
159    fn write<'tcx>(
160        self: FileDescriptionRef<Self>,
161        _communicate_allowed: bool,
162        _ptr: Pointer,
163        _len: usize,
164        _ecx: &mut MiriInterpCx<'tcx>,
165        _finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
166    ) -> InterpResult<'tcx> {
167        throw_unsup_format!("cannot write to {}", self.name());
168    }
169
170    /// Determines whether this FD non-deterministically has its reads and writes shortened.
171    fn short_fd_operations(&self) -> bool {
172        // We only enable this for FD kinds where we think short accesses gain useful test coverage.
173        false
174    }
175
176    /// Seeks to the given offset (which can be relative to the beginning, end, or current position).
177    /// Returns the new position from the start of the stream.
178    fn seek<'tcx>(
179        &self,
180        _communicate_allowed: bool,
181        _offset: SeekFrom,
182    ) -> InterpResult<'tcx, io::Result<u64>> {
183        throw_unsup_format!("cannot seek on {}", self.name());
184    }
185
186    /// Destroys the file description. Only called when the last duplicate file descriptor is closed.
187    ///
188    /// `self_addr` is the address that this file description used to be stored at.
189    fn destroy<'tcx>(
190        self,
191        _self_id: FdId,
192        _communicate_allowed: bool,
193        _ecx: &mut MiriInterpCx<'tcx>,
194    ) -> InterpResult<'tcx, io::Result<()>>
195    where
196        Self: Sized,
197    {
198        throw_unsup_format!("cannot close {}", self.name());
199    }
200
201    fn metadata<'tcx>(&self) -> InterpResult<'tcx, io::Result<fs::Metadata>> {
202        throw_unsup_format!("obtaining metadata is only supported on file-backed file descriptors");
203    }
204
205    fn is_tty(&self, _communicate_allowed: bool) -> bool {
206        // Most FDs are not tty's and the consequence of a wrong `false` are minor,
207        // so we use a default impl here.
208        false
209    }
210
211    fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
212        panic!("Not a unix file descriptor: {}", self.name());
213    }
214
215    /// Implementation of fcntl(F_GETFL) for this FD.
216    fn get_flags<'tcx>(&self, _ecx: &mut MiriInterpCx<'tcx>) -> InterpResult<'tcx, Scalar> {
217        throw_unsup_format!("fcntl: {} is not supported for F_GETFL", self.name());
218    }
219
220    /// Implementation of fcntl(F_SETFL) for this FD.
221    fn set_flags<'tcx>(
222        &self,
223        _flag: i32,
224        _ecx: &mut MiriInterpCx<'tcx>,
225    ) -> InterpResult<'tcx, Scalar> {
226        throw_unsup_format!("fcntl: {} is not supported for F_SETFL", self.name());
227    }
228}
229
230impl FileDescription for io::Stdin {
231    fn name(&self) -> &'static str {
232        "stdin"
233    }
234
235    fn read<'tcx>(
236        self: FileDescriptionRef<Self>,
237        communicate_allowed: bool,
238        ptr: Pointer,
239        len: usize,
240        ecx: &mut MiriInterpCx<'tcx>,
241        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
242    ) -> InterpResult<'tcx> {
243        if !communicate_allowed {
244            // We want isolation mode to be deterministic, so we have to disallow all reads, even stdin.
245            helpers::isolation_abort_error("`read` from stdin")?;
246        }
247
248        let result = ecx.read_from_host(&*self, len, ptr)?;
249        finish.call(ecx, result)
250    }
251
252    fn destroy<'tcx>(
253        self,
254        _self_id: FdId,
255        _communicate_allowed: bool,
256        _ecx: &mut MiriInterpCx<'tcx>,
257    ) -> InterpResult<'tcx, io::Result<()>> {
258        interp_ok(Ok(()))
259    }
260
261    fn is_tty(&self, communicate_allowed: bool) -> bool {
262        communicate_allowed && self.is_terminal()
263    }
264}
265
266impl FileDescription for io::Stdout {
267    fn name(&self) -> &'static str {
268        "stdout"
269    }
270
271    fn write<'tcx>(
272        self: FileDescriptionRef<Self>,
273        _communicate_allowed: bool,
274        ptr: Pointer,
275        len: usize,
276        ecx: &mut MiriInterpCx<'tcx>,
277        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
278    ) -> InterpResult<'tcx> {
279        // We allow writing to stdout even with isolation enabled.
280        let result = ecx.write_to_host(&*self, len, ptr)?;
281        // Stdout is buffered, flush to make sure it appears on the
282        // screen.  This is the write() syscall of the interpreted
283        // program, we want it to correspond to a write() syscall on
284        // the host -- there is no good in adding extra buffering
285        // here.
286        io::stdout().flush().unwrap();
287
288        finish.call(ecx, result)
289    }
290
291    fn destroy<'tcx>(
292        self,
293        _self_id: FdId,
294        _communicate_allowed: bool,
295        _ecx: &mut MiriInterpCx<'tcx>,
296    ) -> InterpResult<'tcx, io::Result<()>> {
297        interp_ok(Ok(()))
298    }
299
300    fn is_tty(&self, communicate_allowed: bool) -> bool {
301        communicate_allowed && self.is_terminal()
302    }
303}
304
305impl FileDescription for io::Stderr {
306    fn name(&self) -> &'static str {
307        "stderr"
308    }
309
310    fn destroy<'tcx>(
311        self,
312        _self_id: FdId,
313        _communicate_allowed: bool,
314        _ecx: &mut MiriInterpCx<'tcx>,
315    ) -> InterpResult<'tcx, io::Result<()>> {
316        interp_ok(Ok(()))
317    }
318
319    fn write<'tcx>(
320        self: FileDescriptionRef<Self>,
321        _communicate_allowed: bool,
322        ptr: Pointer,
323        len: usize,
324        ecx: &mut MiriInterpCx<'tcx>,
325        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
326    ) -> InterpResult<'tcx> {
327        // We allow writing to stderr even with isolation enabled.
328        let result = ecx.write_to_host(&*self, len, ptr)?;
329        // No need to flush, stderr is not buffered.
330        finish.call(ecx, result)
331    }
332
333    fn is_tty(&self, communicate_allowed: bool) -> bool {
334        communicate_allowed && self.is_terminal()
335    }
336}
337
338#[derive(Debug)]
339pub struct FileHandle {
340    pub(crate) file: File,
341    pub(crate) writable: bool,
342}
343
344impl FileDescription for FileHandle {
345    fn name(&self) -> &'static str {
346        "file"
347    }
348
349    fn read<'tcx>(
350        self: FileDescriptionRef<Self>,
351        communicate_allowed: bool,
352        ptr: Pointer,
353        len: usize,
354        ecx: &mut MiriInterpCx<'tcx>,
355        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
356    ) -> InterpResult<'tcx> {
357        assert!(communicate_allowed, "isolation should have prevented even opening a file");
358
359        let result = ecx.read_from_host(&self.file, len, ptr)?;
360        finish.call(ecx, result)
361    }
362
363    fn write<'tcx>(
364        self: FileDescriptionRef<Self>,
365        communicate_allowed: bool,
366        ptr: Pointer,
367        len: usize,
368        ecx: &mut MiriInterpCx<'tcx>,
369        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
370    ) -> InterpResult<'tcx> {
371        assert!(communicate_allowed, "isolation should have prevented even opening a file");
372
373        if !self.writable {
374            // Linux hosts return EBADF here which we can't translate via the platform-independent
375            // code since it does not map to any `io::ErrorKind` -- so if we don't do anything
376            // special, we'd throw an "unsupported error code" here. Windows returns something that
377            // gets translated to `PermissionDenied`. That seems like a good value so let's just use
378            // this everywhere, even if it means behavior on Unix targets does not match the real
379            // thing.
380            return finish.call(ecx, Err(ErrorKind::PermissionDenied.into()));
381        }
382        let result = ecx.write_to_host(&self.file, len, ptr)?;
383        finish.call(ecx, result)
384    }
385
386    fn seek<'tcx>(
387        &self,
388        communicate_allowed: bool,
389        offset: SeekFrom,
390    ) -> InterpResult<'tcx, io::Result<u64>> {
391        assert!(communicate_allowed, "isolation should have prevented even opening a file");
392        interp_ok((&mut &self.file).seek(offset))
393    }
394
395    fn destroy<'tcx>(
396        self,
397        _self_id: FdId,
398        communicate_allowed: bool,
399        _ecx: &mut MiriInterpCx<'tcx>,
400    ) -> InterpResult<'tcx, io::Result<()>> {
401        assert!(communicate_allowed, "isolation should have prevented even opening a file");
402        // We sync the file if it was opened in a mode different than read-only.
403        if self.writable {
404            // `File::sync_all` does the checks that are done when closing a file. We do this to
405            // to handle possible errors correctly.
406            let result = self.file.sync_all();
407            // Now we actually close the file and return the result.
408            drop(self.file);
409            interp_ok(result)
410        } else {
411            // We drop the file, this closes it but ignores any errors
412            // produced when closing it. This is done because
413            // `File::sync_all` cannot be done over files like
414            // `/dev/urandom` which are read-only. Check
415            // https://github.com/rust-lang/miri/issues/999#issuecomment-568920439
416            // for a deeper discussion.
417            drop(self.file);
418            interp_ok(Ok(()))
419        }
420    }
421
422    fn metadata<'tcx>(&self) -> InterpResult<'tcx, io::Result<Metadata>> {
423        interp_ok(self.file.metadata())
424    }
425
426    fn is_tty(&self, communicate_allowed: bool) -> bool {
427        communicate_allowed && self.file.is_terminal()
428    }
429
430    fn short_fd_operations(&self) -> bool {
431        // While short accesses on file-backed FDs are very rare (at least for sufficiently small
432        // accesses), they can realistically happen when a signal interrupts the syscall.
433        // FIXME: we should return `false` if this is a named pipe...
434        true
435    }
436
437    fn as_unix<'tcx>(&self, ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
438        assert!(
439            ecx.target_os_is_unix(),
440            "unix file operations are only available for unix targets"
441        );
442        self
443    }
444}
445
446/// Like /dev/null
447#[derive(Debug)]
448pub struct NullOutput;
449
450impl FileDescription for NullOutput {
451    fn name(&self) -> &'static str {
452        "stderr and stdout"
453    }
454
455    fn write<'tcx>(
456        self: FileDescriptionRef<Self>,
457        _communicate_allowed: bool,
458        _ptr: Pointer,
459        len: usize,
460        ecx: &mut MiriInterpCx<'tcx>,
461        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
462    ) -> InterpResult<'tcx> {
463        // We just don't write anything, but report to the user that we did.
464        finish.call(ecx, Ok(len))
465    }
466
467    fn destroy<'tcx>(
468        self,
469        _self_id: FdId,
470        _communicate_allowed: bool,
471        _ecx: &mut MiriInterpCx<'tcx>,
472    ) -> InterpResult<'tcx, io::Result<()>> {
473        interp_ok(Ok(()))
474    }
475}
476
477/// Internal type of a file-descriptor - this is what [`FdTable`] expects
478pub type FdNum = i32;
479
480/// The file descriptor table
481#[derive(Debug)]
482pub struct FdTable {
483    pub fds: BTreeMap<FdNum, DynFileDescriptionRef>,
484    /// Unique identifier for file description, used to differentiate between various file description.
485    next_file_description_id: FdId,
486}
487
488impl VisitProvenance for FdTable {
489    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
490        // All our FileDescription instances do not have any tags.
491    }
492}
493
494impl FdTable {
495    fn new() -> Self {
496        FdTable { fds: BTreeMap::new(), next_file_description_id: FdId(0) }
497    }
498    pub(crate) fn init(mute_stdout_stderr: bool) -> FdTable {
499        let mut fds = FdTable::new();
500        fds.insert_new(io::stdin());
501        if mute_stdout_stderr {
502            assert_eq!(fds.insert_new(NullOutput), 1);
503            assert_eq!(fds.insert_new(NullOutput), 2);
504        } else {
505            assert_eq!(fds.insert_new(io::stdout()), 1);
506            assert_eq!(fds.insert_new(io::stderr()), 2);
507        }
508        fds
509    }
510
511    pub fn new_ref<T: FileDescription>(&mut self, fd: T) -> FileDescriptionRef<T> {
512        let file_handle =
513            FileDescriptionRef(Rc::new(FdIdWith { id: self.next_file_description_id, inner: fd }));
514        self.next_file_description_id = FdId(self.next_file_description_id.0.strict_add(1));
515        file_handle
516    }
517
518    /// Insert a new file description to the FdTable.
519    pub fn insert_new(&mut self, fd: impl FileDescription) -> FdNum {
520        let fd_ref = self.new_ref(fd);
521        self.insert(fd_ref)
522    }
523
524    pub fn insert(&mut self, fd_ref: DynFileDescriptionRef) -> FdNum {
525        self.insert_with_min_num(fd_ref, 0)
526    }
527
528    /// Insert a file description, giving it a file descriptor that is at least `min_fd_num`.
529    pub fn insert_with_min_num(
530        &mut self,
531        file_handle: DynFileDescriptionRef,
532        min_fd_num: FdNum,
533    ) -> FdNum {
534        // Find the lowest unused FD, starting from min_fd. If the first such unused FD is in
535        // between used FDs, the find_map combinator will return it. If the first such unused FD
536        // is after all other used FDs, the find_map combinator will return None, and we will use
537        // the FD following the greatest FD thus far.
538        let candidate_new_fd =
539            self.fds.range(min_fd_num..).zip(min_fd_num..).find_map(|((fd_num, _fd), counter)| {
540                if *fd_num != counter {
541                    // There was a gap in the fds stored, return the first unused one
542                    // (note that this relies on BTreeMap iterating in key order)
543                    Some(counter)
544                } else {
545                    // This fd is used, keep going
546                    None
547                }
548            });
549        let new_fd_num = candidate_new_fd.unwrap_or_else(|| {
550            // find_map ran out of BTreeMap entries before finding a free fd, use one plus the
551            // maximum fd in the map
552            self.fds.last_key_value().map(|(fd_num, _)| fd_num.strict_add(1)).unwrap_or(min_fd_num)
553        });
554
555        self.fds.try_insert(new_fd_num, file_handle).unwrap();
556        new_fd_num
557    }
558
559    pub fn get(&self, fd_num: FdNum) -> Option<DynFileDescriptionRef> {
560        let fd = self.fds.get(&fd_num)?;
561        Some(fd.clone())
562    }
563
564    pub fn remove(&mut self, fd_num: FdNum) -> Option<DynFileDescriptionRef> {
565        self.fds.remove(&fd_num)
566    }
567
568    pub fn is_fd_num(&self, fd_num: FdNum) -> bool {
569        self.fds.contains_key(&fd_num)
570    }
571}
572
573impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
574pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
575    /// Read data from a host `Read` type, store the result into machine memory,
576    /// and return whether that worked.
577    fn read_from_host(
578        &mut self,
579        mut file: impl io::Read,
580        len: usize,
581        ptr: Pointer,
582    ) -> InterpResult<'tcx, Result<usize, IoError>> {
583        let this = self.eval_context_mut();
584
585        let mut bytes = vec![0; len];
586        let result = file.read(&mut bytes);
587        match result {
588            Ok(read_size) => {
589                // If reading to `bytes` did not fail, we write those bytes to the buffer.
590                // Crucially, if fewer than `bytes.len()` bytes were read, only write
591                // that much into the output buffer!
592                this.write_bytes_ptr(ptr, bytes[..read_size].iter().copied())?;
593                interp_ok(Ok(read_size))
594            }
595            Err(e) => interp_ok(Err(IoError::HostError(e))),
596        }
597    }
598
599    /// Write data to a host `Write` type, withthe bytes taken from machine memory.
600    fn write_to_host(
601        &mut self,
602        mut file: impl io::Write,
603        len: usize,
604        ptr: Pointer,
605    ) -> InterpResult<'tcx, Result<usize, IoError>> {
606        let this = self.eval_context_mut();
607
608        let bytes = this.read_bytes_ptr_strip_provenance(ptr, Size::from_bytes(len))?;
609        let result = file.write(bytes);
610        interp_ok(result.map_err(IoError::HostError))
611    }
612}