Skip to main content

miri/shims/unix/linux_like/
eventfd.rs

1//! Linux `eventfd` implementation.
2use std::cell::{Cell, RefCell};
3use std::io;
4use std::io::ErrorKind;
5
6use crate::concurrency::VClock;
7use crate::shims::files::{FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef};
8use crate::shims::unix::UnixFileDescription;
9use crate::shims::unix::linux_like::epoll::{EpollReadiness, EvalContextExt as _};
10use crate::*;
11
12/// Maximum value that the eventfd counter can hold.
13const MAX_COUNTER: u64 = u64::MAX - 1;
14
15/// A kind of file descriptor created by `eventfd`.
16/// The `Event` type isn't currently written to by `eventfd`.
17/// The interface is meant to keep track of objects associated
18/// with a file descriptor. For more information see the man
19/// page below:
20///
21/// <https://man.netbsd.org/eventfd.2>
22#[derive(Debug)]
23struct EventFd {
24    /// The object contains an unsigned 64-bit integer (uint64_t) counter that is maintained by the
25    /// kernel. This counter is initialized with the value specified in the argument initval.
26    counter: Cell<u64>,
27    is_nonblock: bool,
28    clock: RefCell<VClock>,
29    /// A list of thread ids blocked on eventfd::read.
30    blocked_read_tid: RefCell<Vec<ThreadId>>,
31    /// A list of thread ids blocked on eventfd::write.
32    blocked_write_tid: RefCell<Vec<ThreadId>>,
33}
34
35impl FileDescription for EventFd {
36    fn name(&self) -> &'static str {
37        "event"
38    }
39
40    fn metadata<'tcx>(
41        &self,
42    ) -> InterpResult<'tcx, Either<io::Result<std::fs::Metadata>, &'static str>> {
43        // On Linux, eventfd is an "anonymous inode" reported as S_IFREG.
44        interp_ok(Either::Right("S_IFREG"))
45    }
46
47    fn destroy<'tcx>(
48        self,
49        _self_id: FdId,
50        _communicate_allowed: bool,
51        _ecx: &mut MiriInterpCx<'tcx>,
52    ) -> InterpResult<'tcx, io::Result<()>> {
53        interp_ok(Ok(()))
54    }
55
56    /// Read the counter in the buffer and return the counter if succeeded.
57    fn read<'tcx>(
58        self: FileDescriptionRef<Self>,
59        _communicate_allowed: bool,
60        ptr: Pointer,
61        len: usize,
62        ecx: &mut MiriInterpCx<'tcx>,
63        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
64    ) -> InterpResult<'tcx> {
65        // We're treating the buffer as a `u64`.
66        let ty = ecx.machine.layouts.u64;
67        // Check the size of slice, and return error only if the size of the slice < 8.
68        if len < ty.size.bytes_usize() {
69            return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
70        }
71
72        // Turn the pointer into a place at the right type.
73        let buf_place = ecx.ptr_to_mplace_unaligned(ptr, ty);
74
75        eventfd_read(buf_place, self, ecx, finish)
76    }
77
78    /// A write call adds the 8-byte integer value supplied in
79    /// its buffer (in native endianness) to the counter.  The maximum value that may be
80    /// stored in the counter is the largest unsigned 64-bit value
81    /// minus 1 (i.e., 0xfffffffffffffffe).  If the addition would
82    /// cause the counter's value to exceed the maximum, then the
83    /// write either blocks until a read is performed on the
84    /// file descriptor, or fails with the error EAGAIN if the
85    /// file descriptor has been made nonblocking.
86    ///
87    /// A write fails with the error EINVAL if the size of the
88    /// supplied buffer is less than 8 bytes, or if an attempt is
89    /// made to write the value 0xffffffffffffffff.
90    fn write<'tcx>(
91        self: FileDescriptionRef<Self>,
92        _communicate_allowed: bool,
93        ptr: Pointer,
94        len: usize,
95        ecx: &mut MiriInterpCx<'tcx>,
96        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
97    ) -> InterpResult<'tcx> {
98        // We're treating the buffer as a `u64`.
99        let ty = ecx.machine.layouts.u64;
100        // Check the size of slice, and return error if the size is wrong. The docs say we only
101        // error when the size is too small, but Linux seems to also error when the size is too big.
102        if len != ty.layout.size.bytes_usize() {
103            return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
104        }
105
106        // Turn the pointer into a place at the right type.
107        let buf_place = ecx.ptr_to_mplace_unaligned(ptr, ty);
108
109        eventfd_write(buf_place, self, ecx, finish)
110    }
111
112    fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
113        self
114    }
115}
116
117impl UnixFileDescription for EventFd {
118    fn epoll_active_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadiness> {
119        // We only check the status of EPOLLIN and EPOLLOUT flags for eventfd. If other event flags
120        // need to be supported in the future, the check should be added here.
121
122        interp_ok(EpollReadiness {
123            epollin: self.counter.get() != 0,
124            epollout: self.counter.get() != MAX_COUNTER,
125            ..EpollReadiness::empty()
126        })
127    }
128}
129
130impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
131pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
132    /// This function creates an `Event` that is used as an event wait/notify mechanism by
133    /// user-space applications, and by the kernel to notify user-space applications of events.
134    /// The `Event` contains an `u64` counter maintained by the kernel. The counter is initialized
135    /// with the value specified in the `initval` argument.
136    ///
137    /// A new file descriptor referring to the `Event` is returned. The `read`, `write`, `poll`,
138    /// `select`, and `close` operations can be performed on the file descriptor. For more
139    /// information on these operations, see the man page linked below.
140    ///
141    /// The `flags` are not currently implemented for eventfd.
142    /// The `flags` may be bitwise ORed to change the behavior of `eventfd`:
143    /// `EFD_CLOEXEC` - Set the close-on-exec (`FD_CLOEXEC`) flag on the new file descriptor.
144    /// `EFD_NONBLOCK` - Set the `O_NONBLOCK` file status flag on the new open file description.
145    /// `EFD_SEMAPHORE` - miri does not support semaphore-like semantics.
146    ///
147    /// <https://linux.die.net/man/2/eventfd>
148    fn eventfd(&mut self, val: &OpTy<'tcx>, flags: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
149        let this = self.eval_context_mut();
150
151        let val = this.read_scalar(val)?.to_u32()?;
152        let mut flags = this.read_scalar(flags)?.to_i32()?;
153
154        let efd_cloexec = this.eval_libc_i32("EFD_CLOEXEC");
155        let efd_nonblock = this.eval_libc_i32("EFD_NONBLOCK");
156        let efd_semaphore = this.eval_libc_i32("EFD_SEMAPHORE");
157
158        if flags & efd_semaphore == efd_semaphore {
159            throw_unsup_format!("eventfd: EFD_SEMAPHORE is unsupported");
160        }
161
162        let mut is_nonblock = false;
163        // Unset the flag that we support.
164        // After unloading, flags != 0 means other flags are used.
165        if flags & efd_cloexec == efd_cloexec {
166            // cloexec is ignored because Miri does not support exec.
167            flags &= !efd_cloexec;
168        }
169        if flags & efd_nonblock == efd_nonblock {
170            flags &= !efd_nonblock;
171            is_nonblock = true;
172        }
173        if flags != 0 {
174            throw_unsup_format!("eventfd: encountered unknown unsupported flags {:#x}", flags);
175        }
176
177        let fds = &mut this.machine.fds;
178
179        let fd_value = fds.insert_new(EventFd {
180            counter: Cell::new(val.into()),
181            is_nonblock,
182            clock: RefCell::new(VClock::default()),
183            blocked_read_tid: RefCell::new(Vec::new()),
184            blocked_write_tid: RefCell::new(Vec::new()),
185        });
186
187        interp_ok(Scalar::from_i32(fd_value))
188    }
189}
190
191/// Block thread if the value addition will exceed u64::MAX -1,
192/// else just add the user-supplied value to current counter.
193fn eventfd_write<'tcx>(
194    buf_place: MPlaceTy<'tcx>,
195    eventfd: FileDescriptionRef<EventFd>,
196    ecx: &mut MiriInterpCx<'tcx>,
197    finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
198) -> InterpResult<'tcx> {
199    // Figure out which value we should add.
200    let num = ecx.read_scalar(&buf_place)?.to_u64()?;
201    // u64::MAX as input is invalid because the maximum value of counter is u64::MAX - 1.
202    if num == u64::MAX {
203        return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
204    }
205
206    match eventfd.counter.get().checked_add(num) {
207        Some(new_count @ 0..=MAX_COUNTER) => {
208            // Future `read` calls will synchronize with this write, so update the FD clock.
209            ecx.release_clock(|clock| {
210                eventfd.clock.borrow_mut().join(clock);
211            })?;
212
213            // Store new counter value.
214            eventfd.counter.set(new_count);
215
216            // Unblock *all* threads previously blocked on `read`.
217            // We need to take out the blocked thread ids and unblock them together,
218            // because `unblock_threads` may block them again and end up re-adding the
219            // thread to the blocked list.
220            let waiting_threads = std::mem::take(&mut *eventfd.blocked_read_tid.borrow_mut());
221            // FIXME: We can randomize the order of unblocking.
222            for thread_id in waiting_threads {
223                ecx.unblock_thread(thread_id, BlockReason::Eventfd)?;
224            }
225
226            // The state changed; we check and update the status of all supported event
227            // types for current file description.
228            // Linux seems to cause spurious wakeups here, and Tokio seems to rely on that
229            // (see <https://github.com/rust-lang/miri/pull/4676#discussion_r2510528994>
230            // and also <https://www.illumos.org/issues/16700>).
231            ecx.update_epoll_active_events(eventfd, /* force_edge */ true)?;
232
233            // Return how many bytes we consumed from the user-provided buffer.
234            return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
235        }
236        None | Some(u64::MAX) => {
237            // We can't update the state, so we have to block.
238            if eventfd.is_nonblock {
239                return finish.call(ecx, Err(ErrorKind::WouldBlock.into()));
240            }
241
242            eventfd.blocked_write_tid.borrow_mut().push(ecx.active_thread());
243
244            let weak_eventfd = FileDescriptionRef::downgrade(&eventfd);
245            ecx.block_thread(
246                BlockReason::Eventfd,
247                None,
248                callback!(
249                    @capture<'tcx> {
250                        num: u64,
251                        buf_place: MPlaceTy<'tcx>,
252                        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
253                        weak_eventfd: WeakFileDescriptionRef<EventFd>,
254                    }
255                    |this, unblock: UnblockKind| {
256                        assert_eq!(unblock, UnblockKind::Ready);
257                        // When we get unblocked, try again. We know the ref is still valid,
258                        // otherwise there couldn't be a `write` that unblocks us.
259                        let eventfd_ref = weak_eventfd.upgrade().unwrap();
260                        eventfd_write(buf_place, eventfd_ref, this, finish)
261                    }
262                ),
263            );
264        }
265    };
266    interp_ok(())
267}
268
269/// Block thread if the current counter is 0,
270/// else just return the current counter value to the caller and set the counter to 0.
271fn eventfd_read<'tcx>(
272    buf_place: MPlaceTy<'tcx>,
273    eventfd: FileDescriptionRef<EventFd>,
274    ecx: &mut MiriInterpCx<'tcx>,
275    finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
276) -> InterpResult<'tcx> {
277    // Set counter to 0, get old value.
278    let counter = eventfd.counter.replace(0);
279
280    // Block when counter == 0.
281    if counter == 0 {
282        if eventfd.is_nonblock {
283            return finish.call(ecx, Err(ErrorKind::WouldBlock.into()));
284        }
285
286        eventfd.blocked_read_tid.borrow_mut().push(ecx.active_thread());
287
288        let weak_eventfd = FileDescriptionRef::downgrade(&eventfd);
289        ecx.block_thread(
290            BlockReason::Eventfd,
291            None,
292            callback!(
293                @capture<'tcx> {
294                    buf_place: MPlaceTy<'tcx>,
295                    finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
296                    weak_eventfd: WeakFileDescriptionRef<EventFd>,
297                }
298                |this, unblock: UnblockKind| {
299                    assert_eq!(unblock, UnblockKind::Ready);
300                    // When we get unblocked, try again. We know the ref is still valid,
301                    // otherwise there couldn't be a `write` that unblocks us.
302                    let eventfd_ref = weak_eventfd.upgrade().unwrap();
303                    eventfd_read(buf_place, eventfd_ref, this, finish)
304                }
305            ),
306        );
307    } else {
308        // Synchronize with all prior `write` calls to this FD.
309        ecx.acquire_clock(&eventfd.clock.borrow())?;
310
311        // Return old counter value into user-space buffer.
312        ecx.write_int(counter, &buf_place)?;
313
314        // Unblock *all* threads previously blocked on `write`.
315        // We need to take out the blocked thread ids and unblock them together,
316        // because `unblock_threads` may block them again and end up re-adding the
317        // thread to the blocked list.
318        let waiting_threads = std::mem::take(&mut *eventfd.blocked_write_tid.borrow_mut());
319        // FIXME: We can randomize the order of unblocking.
320        for thread_id in waiting_threads {
321            ecx.unblock_thread(thread_id, BlockReason::Eventfd)?;
322        }
323
324        // The state changed; we check and update the status of all supported event
325        // types for current file description.
326        // Linux seems to always emit do notifications here, even if we were already writable.
327        ecx.update_epoll_active_events(eventfd, /* force_edge */ true)?;
328
329        // Tell userspace how many bytes we put into the buffer.
330        return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
331    }
332    interp_ok(())
333}