Skip to main content

miri/shims/
files.rs

1use std::any::Any;
2use std::collections::BTreeMap;
3use std::fs::File;
4use std::io::{ErrorKind, IsTerminal, Read, Seek, SeekFrom, Write};
5use std::marker::CoercePointee;
6use std::ops::Deref;
7use std::rc::{Rc, Weak};
8use std::{fs, io};
9
10use rustc_abi::Size;
11
12use crate::shims::unix::UnixFileDescription;
13use crate::*;
14
15/// A unique id for file descriptions. While we could use the address, considering that
16/// is definitely unique, the address would expose interpreter internal state when used
17/// for sorting things. So instead we generate a unique id per file description which is the same
18/// for all `dup`licates and is never reused.
19#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd)]
20pub struct FdId(usize);
21
22impl FdId {
23    pub fn to_usize(self) -> usize {
24        self.0
25    }
26
27    /// Create a new fd id from a `usize` without checking if this fd exists.
28    pub fn new_unchecked(id: usize) -> Self {
29        Self(id)
30    }
31}
32
33#[derive(Debug, Clone)]
34struct FdIdWith<T: ?Sized> {
35    id: FdId,
36    inner: T,
37}
38
39/// A refcounted pointer to a file description, also tracking the
40/// globally unique ID of this file description.
41#[repr(transparent)]
42#[derive(CoercePointee, Debug)]
43pub struct FileDescriptionRef<T: ?Sized>(Rc<FdIdWith<T>>);
44
45impl<T: ?Sized> Clone for FileDescriptionRef<T> {
46    fn clone(&self) -> Self {
47        FileDescriptionRef(self.0.clone())
48    }
49}
50
51impl<T: ?Sized> Deref for FileDescriptionRef<T> {
52    type Target = T;
53    fn deref(&self) -> &T {
54        &self.0.inner
55    }
56}
57
58impl<T: ?Sized> FileDescriptionRef<T> {
59    pub fn id(&self) -> FdId {
60        self.0.id
61    }
62}
63
64impl<T: ?Sized> PartialEq for FileDescriptionRef<T> {
65    fn eq(&self, other: &Self) -> bool {
66        self.0.id == other.0.id
67    }
68}
69
70impl<T: ?Sized> Eq for FileDescriptionRef<T> {}
71
72/// Holds a weak reference to the actual file description.
73#[derive(Debug)]
74pub struct WeakFileDescriptionRef<T: ?Sized>(Weak<FdIdWith<T>>);
75
76impl<T: ?Sized> Clone for WeakFileDescriptionRef<T> {
77    fn clone(&self) -> Self {
78        WeakFileDescriptionRef(self.0.clone())
79    }
80}
81
82impl<T: ?Sized> FileDescriptionRef<T> {
83    pub fn downgrade(this: &Self) -> WeakFileDescriptionRef<T> {
84        WeakFileDescriptionRef(Rc::downgrade(&this.0))
85    }
86}
87
88impl<T: ?Sized> WeakFileDescriptionRef<T> {
89    pub fn upgrade(&self) -> Option<FileDescriptionRef<T>> {
90        self.0.upgrade().map(FileDescriptionRef)
91    }
92}
93
94impl<T> VisitProvenance for WeakFileDescriptionRef<T> {
95    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
96        // A weak reference can never be the only reference to some pointer or place.
97        // Since the actual file description is tracked by strong ref somewhere,
98        // it is ok to make this a NOP operation.
99    }
100}
101
102/// A helper trait to indirectly allow downcasting on `Rc<FdIdWith<dyn _>>`.
103/// Ideally we'd just add a `FdIdWith<Self>: Any` bound to the `FileDescription` trait,
104/// but that does not allow upcasting.
105pub trait FileDescriptionExt: 'static {
106    fn into_rc_any(self: FileDescriptionRef<Self>) -> Rc<dyn Any>;
107
108    /// We wrap the regular `close` function generically, so both handle `Rc::into_inner`
109    /// and epoll interest management.
110    fn close_ref<'tcx>(
111        self: FileDescriptionRef<Self>,
112        communicate_allowed: bool,
113        ecx: &mut MiriInterpCx<'tcx>,
114    ) -> InterpResult<'tcx, io::Result<()>>;
115}
116
117impl<T: FileDescription + 'static> FileDescriptionExt for T {
118    fn into_rc_any(self: FileDescriptionRef<Self>) -> Rc<dyn Any> {
119        self.0
120    }
121
122    fn close_ref<'tcx>(
123        self: FileDescriptionRef<Self>,
124        communicate_allowed: bool,
125        ecx: &mut MiriInterpCx<'tcx>,
126    ) -> InterpResult<'tcx, io::Result<()>> {
127        match Rc::into_inner(self.0) {
128            Some(fd) => {
129                // There might have been epolls interested in this FD. Remove that.
130                ecx.machine.epoll_interests.remove_epolls(fd.id);
131
132                fd.inner.destroy(fd.id, communicate_allowed, ecx)
133            }
134            None => {
135                // Not the last reference.
136                interp_ok(Ok(()))
137            }
138        }
139    }
140}
141
142pub type DynFileDescriptionRef = FileDescriptionRef<dyn FileDescription>;
143
144impl FileDescriptionRef<dyn FileDescription> {
145    pub fn downcast<T: FileDescription + 'static>(self) -> Option<FileDescriptionRef<T>> {
146        let inner = self.into_rc_any().downcast::<FdIdWith<T>>().ok()?;
147        Some(FileDescriptionRef(inner))
148    }
149}
150
151/// Represents an open file description.
152pub trait FileDescription: std::fmt::Debug + FileDescriptionExt {
153    fn name(&self) -> &'static str;
154
155    /// Reads as much as possible into the given buffer `ptr`.
156    /// `len` indicates how many bytes we should try to read.
157    ///
158    /// When the read is done, `finish` will be called. Note that `read` itself may return before
159    /// that happens! Everything that should happen "after" the `read` needs to happen inside
160    /// `finish`.
161    fn read<'tcx>(
162        self: FileDescriptionRef<Self>,
163        _communicate_allowed: bool,
164        _ptr: Pointer,
165        _len: usize,
166        _ecx: &mut MiriInterpCx<'tcx>,
167        _finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
168    ) -> InterpResult<'tcx> {
169        throw_unsup_format!("cannot read from {}", self.name());
170    }
171
172    /// Writes as much as possible from the given buffer `ptr`.
173    /// `len` indicates how many bytes we should try to write.
174    ///
175    /// When the write is done, `finish` will be called. Note that `write` itself may return before
176    /// that happens! Everything that should happen "after" the `write` needs to happen inside
177    /// `finish`.
178    fn write<'tcx>(
179        self: FileDescriptionRef<Self>,
180        _communicate_allowed: bool,
181        _ptr: Pointer,
182        _len: usize,
183        _ecx: &mut MiriInterpCx<'tcx>,
184        _finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
185    ) -> InterpResult<'tcx> {
186        throw_unsup_format!("cannot write to {}", self.name());
187    }
188
189    /// Determines whether this FD non-deterministically has its reads and writes shortened.
190    fn short_fd_operations(&self) -> bool {
191        // We only enable this for FD kinds where we think short accesses gain useful test coverage.
192        false
193    }
194
195    /// Seeks to the given offset (which can be relative to the beginning, end, or current position).
196    /// Returns the new position from the start of the stream.
197    fn seek<'tcx>(
198        &self,
199        _communicate_allowed: bool,
200        _offset: SeekFrom,
201    ) -> InterpResult<'tcx, io::Result<u64>> {
202        throw_unsup_format!("cannot seek on {}", self.name());
203    }
204
205    /// Destroys the file description. Only called when the last duplicate file descriptor is closed.
206    ///
207    /// `self_addr` is the address that this file description used to be stored at.
208    fn destroy<'tcx>(
209        self,
210        _self_id: FdId,
211        _communicate_allowed: bool,
212        _ecx: &mut MiriInterpCx<'tcx>,
213    ) -> InterpResult<'tcx, io::Result<()>>
214    where
215        Self: Sized,
216    {
217        throw_unsup_format!("cannot close {}", self.name());
218    }
219
220    /// Returns the metadata for this FD, if available.
221    /// This is either host metadata, or a non-file-backed-FD type.
222    /// The latter is for new represented as a string storing a `libc` name so we only
223    /// support that kind of metadata on Unix targets.
224    fn metadata<'tcx>(&self) -> InterpResult<'tcx, Either<io::Result<fs::Metadata>, &'static str>> {
225        throw_unsup_format!("obtaining metadata is only supported on file-backed file descriptors");
226    }
227
228    fn is_tty(&self, _communicate_allowed: bool) -> bool {
229        // Most FDs are not tty's and the consequence of a wrong `false` are minor,
230        // so we use a default impl here.
231        false
232    }
233
234    fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
235        panic!("Not a unix file descriptor: {}", self.name());
236    }
237
238    /// Implementation of fcntl(F_GETFL) for this FD.
239    fn get_flags<'tcx>(&self, _ecx: &mut MiriInterpCx<'tcx>) -> InterpResult<'tcx, Scalar> {
240        throw_unsup_format!("fcntl: {} is not supported for F_GETFL", self.name());
241    }
242
243    /// Implementation of fcntl(F_SETFL) for this FD.
244    fn set_flags<'tcx>(
245        &self,
246        _flag: i32,
247        _ecx: &mut MiriInterpCx<'tcx>,
248    ) -> InterpResult<'tcx, Scalar> {
249        throw_unsup_format!("fcntl: {} is not supported for F_SETFL", self.name());
250    }
251}
252
253impl FileDescription for io::Stdin {
254    fn name(&self) -> &'static str {
255        "stdin"
256    }
257
258    fn read<'tcx>(
259        self: FileDescriptionRef<Self>,
260        communicate_allowed: bool,
261        ptr: Pointer,
262        len: usize,
263        ecx: &mut MiriInterpCx<'tcx>,
264        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
265    ) -> InterpResult<'tcx> {
266        if !communicate_allowed {
267            // We want isolation mode to be deterministic, so we have to disallow all reads, even stdin.
268            helpers::isolation_abort_error("`read` from stdin")?;
269        }
270
271        let mut stdin = &*self;
272        let result = ecx.read_from_host(|buf| stdin.read(buf), len, ptr)?;
273        finish.call(ecx, result)
274    }
275
276    fn destroy<'tcx>(
277        self,
278        _self_id: FdId,
279        _communicate_allowed: bool,
280        _ecx: &mut MiriInterpCx<'tcx>,
281    ) -> InterpResult<'tcx, io::Result<()>> {
282        interp_ok(Ok(()))
283    }
284
285    fn is_tty(&self, communicate_allowed: bool) -> bool {
286        communicate_allowed && self.is_terminal()
287    }
288}
289
290impl FileDescription for io::Stdout {
291    fn name(&self) -> &'static str {
292        "stdout"
293    }
294
295    fn write<'tcx>(
296        self: FileDescriptionRef<Self>,
297        _communicate_allowed: bool,
298        ptr: Pointer,
299        len: usize,
300        ecx: &mut MiriInterpCx<'tcx>,
301        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
302    ) -> InterpResult<'tcx> {
303        // We allow writing to stdout even with isolation enabled.
304        let result = ecx.write_to_host(&*self, len, ptr)?;
305        // Stdout is buffered, flush to make sure it appears on the
306        // screen.  This is the write() syscall of the interpreted
307        // program, we want it to correspond to a write() syscall on
308        // the host -- there is no good in adding extra buffering
309        // here.
310        io::stdout().flush().unwrap();
311
312        finish.call(ecx, result)
313    }
314
315    fn destroy<'tcx>(
316        self,
317        _self_id: FdId,
318        _communicate_allowed: bool,
319        _ecx: &mut MiriInterpCx<'tcx>,
320    ) -> InterpResult<'tcx, io::Result<()>> {
321        interp_ok(Ok(()))
322    }
323
324    fn is_tty(&self, communicate_allowed: bool) -> bool {
325        communicate_allowed && self.is_terminal()
326    }
327}
328
329impl FileDescription for io::Stderr {
330    fn name(&self) -> &'static str {
331        "stderr"
332    }
333
334    fn destroy<'tcx>(
335        self,
336        _self_id: FdId,
337        _communicate_allowed: bool,
338        _ecx: &mut MiriInterpCx<'tcx>,
339    ) -> InterpResult<'tcx, io::Result<()>> {
340        interp_ok(Ok(()))
341    }
342
343    fn write<'tcx>(
344        self: FileDescriptionRef<Self>,
345        _communicate_allowed: bool,
346        ptr: Pointer,
347        len: usize,
348        ecx: &mut MiriInterpCx<'tcx>,
349        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
350    ) -> InterpResult<'tcx> {
351        // We allow writing to stderr even with isolation enabled.
352        let result = ecx.write_to_host(&*self, len, ptr)?;
353        // No need to flush, stderr is not buffered.
354        finish.call(ecx, result)
355    }
356
357    fn is_tty(&self, communicate_allowed: bool) -> bool {
358        communicate_allowed && self.is_terminal()
359    }
360}
361
362#[derive(Debug)]
363pub struct FileHandle {
364    pub(crate) file: File,
365    pub(crate) writable: bool,
366}
367
368impl FileDescription for FileHandle {
369    fn name(&self) -> &'static str {
370        "file"
371    }
372
373    fn read<'tcx>(
374        self: FileDescriptionRef<Self>,
375        communicate_allowed: bool,
376        ptr: Pointer,
377        len: usize,
378        ecx: &mut MiriInterpCx<'tcx>,
379        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
380    ) -> InterpResult<'tcx> {
381        assert!(communicate_allowed, "isolation should have prevented even opening a file");
382
383        let mut file = &self.file;
384        let result = ecx.read_from_host(|buf| file.read(buf), len, ptr)?;
385        finish.call(ecx, result)
386    }
387
388    fn write<'tcx>(
389        self: FileDescriptionRef<Self>,
390        communicate_allowed: bool,
391        ptr: Pointer,
392        len: usize,
393        ecx: &mut MiriInterpCx<'tcx>,
394        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
395    ) -> InterpResult<'tcx> {
396        assert!(communicate_allowed, "isolation should have prevented even opening a file");
397
398        if !self.writable {
399            // Linux hosts return EBADF here which we can't translate via the platform-independent
400            // code since it does not map to any `io::ErrorKind` -- so if we don't do anything
401            // special, we'd throw an "unsupported error code" here. Windows returns something that
402            // gets translated to `PermissionDenied`. That seems like a good value so let's just use
403            // this everywhere, even if it means behavior on Unix targets does not match the real
404            // thing.
405            return finish.call(ecx, Err(ErrorKind::PermissionDenied.into()));
406        }
407        let result = ecx.write_to_host(&self.file, len, ptr)?;
408        finish.call(ecx, result)
409    }
410
411    fn seek<'tcx>(
412        &self,
413        communicate_allowed: bool,
414        offset: SeekFrom,
415    ) -> InterpResult<'tcx, io::Result<u64>> {
416        assert!(communicate_allowed, "isolation should have prevented even opening a file");
417        interp_ok((&mut &self.file).seek(offset))
418    }
419
420    fn destroy<'tcx>(
421        self,
422        _self_id: FdId,
423        communicate_allowed: bool,
424        _ecx: &mut MiriInterpCx<'tcx>,
425    ) -> InterpResult<'tcx, io::Result<()>> {
426        assert!(communicate_allowed, "isolation should have prevented even opening a file");
427        // We sync the file if it was opened in a mode different than read-only.
428        if self.writable {
429            // `File::sync_all` does the checks that are done when closing a file. We do this to
430            // to handle possible errors correctly.
431            let result = self.file.sync_all();
432            // Now we actually close the file and return the result.
433            drop(self.file);
434            interp_ok(result)
435        } else {
436            // We drop the file, this closes it but ignores any errors
437            // produced when closing it. This is done because
438            // `File::sync_all` cannot be done over files like
439            // `/dev/urandom` which are read-only. Check
440            // https://github.com/rust-lang/miri/issues/999#issuecomment-568920439
441            // for a deeper discussion.
442            drop(self.file);
443            interp_ok(Ok(()))
444        }
445    }
446
447    fn metadata<'tcx>(&self) -> InterpResult<'tcx, Either<io::Result<fs::Metadata>, &'static str>> {
448        interp_ok(Either::Left(self.file.metadata()))
449    }
450
451    fn is_tty(&self, communicate_allowed: bool) -> bool {
452        communicate_allowed && self.file.is_terminal()
453    }
454
455    fn short_fd_operations(&self) -> bool {
456        // While short accesses on file-backed FDs are very rare (at least for sufficiently small
457        // accesses), they can realistically happen when a signal interrupts the syscall.
458        // FIXME: we should return `false` if this is a named pipe...
459        true
460    }
461
462    fn as_unix<'tcx>(&self, ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
463        assert!(
464            ecx.target_os_is_unix(),
465            "unix file operations are only available for unix targets"
466        );
467        self
468    }
469}
470
471/// Like /dev/null
472#[derive(Debug)]
473pub struct NullOutput;
474
475impl FileDescription for NullOutput {
476    fn name(&self) -> &'static str {
477        "stderr and stdout"
478    }
479
480    fn write<'tcx>(
481        self: FileDescriptionRef<Self>,
482        _communicate_allowed: bool,
483        _ptr: Pointer,
484        len: usize,
485        ecx: &mut MiriInterpCx<'tcx>,
486        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
487    ) -> InterpResult<'tcx> {
488        // We just don't write anything, but report to the user that we did.
489        finish.call(ecx, Ok(len))
490    }
491
492    fn destroy<'tcx>(
493        self,
494        _self_id: FdId,
495        _communicate_allowed: bool,
496        _ecx: &mut MiriInterpCx<'tcx>,
497    ) -> InterpResult<'tcx, io::Result<()>> {
498        interp_ok(Ok(()))
499    }
500}
501
502/// Internal type of a file-descriptor - this is what [`FdTable`] expects
503pub type FdNum = i32;
504
505/// The file descriptor table
506#[derive(Debug)]
507pub struct FdTable {
508    pub fds: BTreeMap<FdNum, DynFileDescriptionRef>,
509    /// Unique identifier for file description, used to differentiate between various file description.
510    next_file_description_id: FdId,
511}
512
513impl VisitProvenance for FdTable {
514    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
515        // All our FileDescription instances do not have any tags.
516    }
517}
518
519impl FdTable {
520    fn new() -> Self {
521        FdTable { fds: BTreeMap::new(), next_file_description_id: FdId(0) }
522    }
523    pub(crate) fn init(mute_stdout_stderr: bool) -> FdTable {
524        let mut fds = FdTable::new();
525        fds.insert_new(io::stdin());
526        if mute_stdout_stderr {
527            assert_eq!(fds.insert_new(NullOutput), 1);
528            assert_eq!(fds.insert_new(NullOutput), 2);
529        } else {
530            assert_eq!(fds.insert_new(io::stdout()), 1);
531            assert_eq!(fds.insert_new(io::stderr()), 2);
532        }
533        fds
534    }
535
536    pub fn new_ref<T: FileDescription>(&mut self, fd: T) -> FileDescriptionRef<T> {
537        let file_handle =
538            FileDescriptionRef(Rc::new(FdIdWith { id: self.next_file_description_id, inner: fd }));
539        self.next_file_description_id = FdId(self.next_file_description_id.0.strict_add(1));
540        file_handle
541    }
542
543    /// Insert a new file description to the FdTable.
544    pub fn insert_new(&mut self, fd: impl FileDescription) -> FdNum {
545        let fd_ref = self.new_ref(fd);
546        self.insert(fd_ref)
547    }
548
549    pub fn insert(&mut self, fd_ref: DynFileDescriptionRef) -> FdNum {
550        self.insert_with_min_num(fd_ref, 0)
551    }
552
553    /// Insert a file description, giving it a file descriptor that is at least `min_fd_num`.
554    pub fn insert_with_min_num(
555        &mut self,
556        file_handle: DynFileDescriptionRef,
557        min_fd_num: FdNum,
558    ) -> FdNum {
559        // Find the lowest unused FD, starting from min_fd. If the first such unused FD is in
560        // between used FDs, the find_map combinator will return it. If the first such unused FD
561        // is after all other used FDs, the find_map combinator will return None, and we will use
562        // the FD following the greatest FD thus far.
563        let candidate_new_fd =
564            self.fds.range(min_fd_num..).zip(min_fd_num..).find_map(|((fd_num, _fd), counter)| {
565                if *fd_num != counter {
566                    // There was a gap in the fds stored, return the first unused one
567                    // (note that this relies on BTreeMap iterating in key order)
568                    Some(counter)
569                } else {
570                    // This fd is used, keep going
571                    None
572                }
573            });
574        let new_fd_num = candidate_new_fd.unwrap_or_else(|| {
575            // find_map ran out of BTreeMap entries before finding a free fd, use one plus the
576            // maximum fd in the map
577            self.fds.last_key_value().map(|(fd_num, _)| fd_num.strict_add(1)).unwrap_or(min_fd_num)
578        });
579
580        self.fds.try_insert(new_fd_num, file_handle).unwrap();
581        new_fd_num
582    }
583
584    pub fn get(&self, fd_num: FdNum) -> Option<DynFileDescriptionRef> {
585        let fd = self.fds.get(&fd_num)?;
586        Some(fd.clone())
587    }
588
589    pub fn remove(&mut self, fd_num: FdNum) -> Option<DynFileDescriptionRef> {
590        self.fds.remove(&fd_num)
591    }
592
593    pub fn is_fd_num(&self, fd_num: FdNum) -> bool {
594        self.fds.contains_key(&fd_num)
595    }
596}
597
598impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
599pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
600    /// Read data from a host `Read` type, store the result into machine memory,
601    /// and return whether that worked.
602    fn read_from_host(
603        &mut self,
604        mut read_cb: impl FnMut(&mut [u8]) -> io::Result<usize>,
605        len: usize,
606        ptr: Pointer,
607    ) -> InterpResult<'tcx, Result<usize, IoError>> {
608        let this = self.eval_context_mut();
609
610        let mut bytes = vec![0; len];
611        let result = read_cb(&mut bytes);
612        match result {
613            Ok(read_size) => {
614                // If reading to `bytes` did not fail, we write those bytes to the buffer.
615                // Crucially, if fewer than `bytes.len()` bytes were read, only write
616                // that much into the output buffer!
617                this.write_bytes_ptr(ptr, bytes[..read_size].iter().copied())?;
618                interp_ok(Ok(read_size))
619            }
620            Err(e) => interp_ok(Err(IoError::HostError(e))),
621        }
622    }
623
624    /// Write data to a host `Write` type, with the bytes taken from machine memory.
625    fn write_to_host(
626        &mut self,
627        mut file: impl io::Write,
628        len: usize,
629        ptr: Pointer,
630    ) -> InterpResult<'tcx, Result<usize, IoError>> {
631        let this = self.eval_context_mut();
632
633        let bytes = this.read_bytes_ptr_strip_provenance(ptr, Size::from_bytes(len))?;
634        let result = file.write(bytes);
635        interp_ok(result.map_err(IoError::HostError))
636    }
637}