miri/shims/unix/linux_like/
eventfd.rs

1//! Linux `eventfd` implementation.
2use std::cell::{Cell, RefCell};
3use std::io;
4use std::io::ErrorKind;
5
6use crate::concurrency::VClock;
7use crate::shims::files::{FileDescription, FileDescriptionRef, WeakFileDescriptionRef};
8use crate::shims::unix::UnixFileDescription;
9use crate::shims::unix::linux_like::epoll::{EpollReadyEvents, EvalContextExt as _};
10use crate::*;
11
12/// Maximum value that the eventfd counter can hold.
13const MAX_COUNTER: u64 = u64::MAX - 1;
14
15/// A kind of file descriptor created by `eventfd`.
16/// The `Event` type isn't currently written to by `eventfd`.
17/// The interface is meant to keep track of objects associated
18/// with a file descriptor. For more information see the man
19/// page below:
20///
21/// <https://man.netbsd.org/eventfd.2>
22#[derive(Debug)]
23struct EventFd {
24    /// The object contains an unsigned 64-bit integer (uint64_t) counter that is maintained by the
25    /// kernel. This counter is initialized with the value specified in the argument initval.
26    counter: Cell<u64>,
27    is_nonblock: bool,
28    clock: RefCell<VClock>,
29    /// A list of thread ids blocked on eventfd::read.
30    blocked_read_tid: RefCell<Vec<ThreadId>>,
31    /// A list of thread ids blocked on eventfd::write.
32    blocked_write_tid: RefCell<Vec<ThreadId>>,
33}
34
35impl FileDescription for EventFd {
36    fn name(&self) -> &'static str {
37        "event"
38    }
39
40    fn nondet_short_accesses(&self) -> bool {
41        // We always read and write exactly one `u64`.
42        false
43    }
44
45    fn close<'tcx>(
46        self,
47        _communicate_allowed: bool,
48        _ecx: &mut MiriInterpCx<'tcx>,
49    ) -> InterpResult<'tcx, io::Result<()>> {
50        interp_ok(Ok(()))
51    }
52
53    /// Read the counter in the buffer and return the counter if succeeded.
54    fn read<'tcx>(
55        self: FileDescriptionRef<Self>,
56        _communicate_allowed: bool,
57        ptr: Pointer,
58        len: usize,
59        ecx: &mut MiriInterpCx<'tcx>,
60        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
61    ) -> InterpResult<'tcx> {
62        // We're treating the buffer as a `u64`.
63        let ty = ecx.machine.layouts.u64;
64        // Check the size of slice, and return error only if the size of the slice < 8.
65        if len < ty.size.bytes_usize() {
66            return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
67        }
68
69        // Turn the pointer into a place at the right type.
70        let buf_place = ecx.ptr_to_mplace_unaligned(ptr, ty);
71
72        eventfd_read(buf_place, self, ecx, finish)
73    }
74
75    /// A write call adds the 8-byte integer value supplied in
76    /// its buffer (in native endianness) to the counter.  The maximum value that may be
77    /// stored in the counter is the largest unsigned 64-bit value
78    /// minus 1 (i.e., 0xfffffffffffffffe).  If the addition would
79    /// cause the counter's value to exceed the maximum, then the
80    /// write either blocks until a read is performed on the
81    /// file descriptor, or fails with the error EAGAIN if the
82    /// file descriptor has been made nonblocking.
83    ///
84    /// A write fails with the error EINVAL if the size of the
85    /// supplied buffer is less than 8 bytes, or if an attempt is
86    /// made to write the value 0xffffffffffffffff.
87    fn write<'tcx>(
88        self: FileDescriptionRef<Self>,
89        _communicate_allowed: bool,
90        ptr: Pointer,
91        len: usize,
92        ecx: &mut MiriInterpCx<'tcx>,
93        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
94    ) -> InterpResult<'tcx> {
95        // We're treating the buffer as a `u64`.
96        let ty = ecx.machine.layouts.u64;
97        // Check the size of slice, and return error only if the size of the slice < 8.
98        if len < ty.layout.size.bytes_usize() {
99            return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
100        }
101
102        // Turn the pointer into a place at the right type.
103        let buf_place = ecx.ptr_to_mplace_unaligned(ptr, ty);
104
105        eventfd_write(buf_place, self, ecx, finish)
106    }
107
108    fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
109        self
110    }
111}
112
113impl UnixFileDescription for EventFd {
114    fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> {
115        // We only check the status of EPOLLIN and EPOLLOUT flags for eventfd. If other event flags
116        // need to be supported in the future, the check should be added here.
117
118        interp_ok(EpollReadyEvents {
119            epollin: self.counter.get() != 0,
120            epollout: self.counter.get() != MAX_COUNTER,
121            ..EpollReadyEvents::new()
122        })
123    }
124}
125
126impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
127pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
128    /// This function creates an `Event` that is used as an event wait/notify mechanism by
129    /// user-space applications, and by the kernel to notify user-space applications of events.
130    /// The `Event` contains an `u64` counter maintained by the kernel. The counter is initialized
131    /// with the value specified in the `initval` argument.
132    ///
133    /// A new file descriptor referring to the `Event` is returned. The `read`, `write`, `poll`,
134    /// `select`, and `close` operations can be performed on the file descriptor. For more
135    /// information on these operations, see the man page linked below.
136    ///
137    /// The `flags` are not currently implemented for eventfd.
138    /// The `flags` may be bitwise ORed to change the behavior of `eventfd`:
139    /// `EFD_CLOEXEC` - Set the close-on-exec (`FD_CLOEXEC`) flag on the new file descriptor.
140    /// `EFD_NONBLOCK` - Set the `O_NONBLOCK` file status flag on the new open file description.
141    /// `EFD_SEMAPHORE` - miri does not support semaphore-like semantics.
142    ///
143    /// <https://linux.die.net/man/2/eventfd>
144    fn eventfd(&mut self, val: &OpTy<'tcx>, flags: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
145        let this = self.eval_context_mut();
146
147        let val = this.read_scalar(val)?.to_u32()?;
148        let mut flags = this.read_scalar(flags)?.to_i32()?;
149
150        let efd_cloexec = this.eval_libc_i32("EFD_CLOEXEC");
151        let efd_nonblock = this.eval_libc_i32("EFD_NONBLOCK");
152        let efd_semaphore = this.eval_libc_i32("EFD_SEMAPHORE");
153
154        if flags & efd_semaphore == efd_semaphore {
155            throw_unsup_format!("eventfd: EFD_SEMAPHORE is unsupported");
156        }
157
158        let mut is_nonblock = false;
159        // Unset the flag that we support.
160        // After unloading, flags != 0 means other flags are used.
161        if flags & efd_cloexec == efd_cloexec {
162            // cloexec is ignored because Miri does not support exec.
163            flags &= !efd_cloexec;
164        }
165        if flags & efd_nonblock == efd_nonblock {
166            flags &= !efd_nonblock;
167            is_nonblock = true;
168        }
169        if flags != 0 {
170            throw_unsup_format!("eventfd: encountered unknown unsupported flags {:#x}", flags);
171        }
172
173        let fds = &mut this.machine.fds;
174
175        let fd_value = fds.insert_new(EventFd {
176            counter: Cell::new(val.into()),
177            is_nonblock,
178            clock: RefCell::new(VClock::default()),
179            blocked_read_tid: RefCell::new(Vec::new()),
180            blocked_write_tid: RefCell::new(Vec::new()),
181        });
182
183        interp_ok(Scalar::from_i32(fd_value))
184    }
185}
186
187/// Block thread if the value addition will exceed u64::MAX -1,
188/// else just add the user-supplied value to current counter.
189fn eventfd_write<'tcx>(
190    buf_place: MPlaceTy<'tcx>,
191    eventfd: FileDescriptionRef<EventFd>,
192    ecx: &mut MiriInterpCx<'tcx>,
193    finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
194) -> InterpResult<'tcx> {
195    // Figure out which value we should add.
196    let num = ecx.read_scalar(&buf_place)?.to_u64()?;
197    // u64::MAX as input is invalid because the maximum value of counter is u64::MAX - 1.
198    if num == u64::MAX {
199        return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
200    }
201
202    match eventfd.counter.get().checked_add(num) {
203        Some(new_count @ 0..=MAX_COUNTER) => {
204            // Future `read` calls will synchronize with this write, so update the FD clock.
205            ecx.release_clock(|clock| {
206                eventfd.clock.borrow_mut().join(clock);
207            });
208
209            // Store new counter value.
210            eventfd.counter.set(new_count);
211
212            // Unblock *all* threads previously blocked on `read`.
213            // We need to take out the blocked thread ids and unblock them together,
214            // because `unblock_threads` may block them again and end up re-adding the
215            // thread to the blocked list.
216            let waiting_threads = std::mem::take(&mut *eventfd.blocked_read_tid.borrow_mut());
217            // FIXME: We can randomize the order of unblocking.
218            for thread_id in waiting_threads {
219                ecx.unblock_thread(thread_id, BlockReason::Eventfd)?;
220            }
221
222            // The state changed; we check and update the status of all supported event
223            // types for current file description.
224            ecx.check_and_update_readiness(eventfd)?;
225
226            // Return how many bytes we consumed from the user-provided buffer.
227            return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
228        }
229        None | Some(u64::MAX) => {
230            // We can't update the state, so we have to block.
231            if eventfd.is_nonblock {
232                return finish.call(ecx, Err(ErrorKind::WouldBlock.into()));
233            }
234
235            eventfd.blocked_write_tid.borrow_mut().push(ecx.active_thread());
236
237            let weak_eventfd = FileDescriptionRef::downgrade(&eventfd);
238            ecx.block_thread(
239                BlockReason::Eventfd,
240                None,
241                callback!(
242                    @capture<'tcx> {
243                        num: u64,
244                        buf_place: MPlaceTy<'tcx>,
245                        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
246                        weak_eventfd: WeakFileDescriptionRef<EventFd>,
247                    }
248                    |this, unblock: UnblockKind| {
249                        assert_eq!(unblock, UnblockKind::Ready);
250                        // When we get unblocked, try again. We know the ref is still valid,
251                        // otherwise there couldn't be a `write` that unblocks us.
252                        let eventfd_ref = weak_eventfd.upgrade().unwrap();
253                        eventfd_write(buf_place, eventfd_ref, this, finish)
254                    }
255                ),
256            );
257        }
258    };
259    interp_ok(())
260}
261
262/// Block thread if the current counter is 0,
263/// else just return the current counter value to the caller and set the counter to 0.
264fn eventfd_read<'tcx>(
265    buf_place: MPlaceTy<'tcx>,
266    eventfd: FileDescriptionRef<EventFd>,
267    ecx: &mut MiriInterpCx<'tcx>,
268    finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
269) -> InterpResult<'tcx> {
270    // Set counter to 0, get old value.
271    let counter = eventfd.counter.replace(0);
272
273    // Block when counter == 0.
274    if counter == 0 {
275        if eventfd.is_nonblock {
276            return finish.call(ecx, Err(ErrorKind::WouldBlock.into()));
277        }
278
279        eventfd.blocked_read_tid.borrow_mut().push(ecx.active_thread());
280
281        let weak_eventfd = FileDescriptionRef::downgrade(&eventfd);
282        ecx.block_thread(
283            BlockReason::Eventfd,
284            None,
285            callback!(
286                @capture<'tcx> {
287                    buf_place: MPlaceTy<'tcx>,
288                    finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
289                    weak_eventfd: WeakFileDescriptionRef<EventFd>,
290                }
291                |this, unblock: UnblockKind| {
292                    assert_eq!(unblock, UnblockKind::Ready);
293                    // When we get unblocked, try again. We know the ref is still valid,
294                    // otherwise there couldn't be a `write` that unblocks us.
295                    let eventfd_ref = weak_eventfd.upgrade().unwrap();
296                    eventfd_read(buf_place, eventfd_ref, this, finish)
297                }
298            ),
299        );
300    } else {
301        // Synchronize with all prior `write` calls to this FD.
302        ecx.acquire_clock(&eventfd.clock.borrow());
303
304        // Return old counter value into user-space buffer.
305        ecx.write_int(counter, &buf_place)?;
306
307        // Unblock *all* threads previously blocked on `write`.
308        // We need to take out the blocked thread ids and unblock them together,
309        // because `unblock_threads` may block them again and end up re-adding the
310        // thread to the blocked list.
311        let waiting_threads = std::mem::take(&mut *eventfd.blocked_write_tid.borrow_mut());
312        // FIXME: We can randomize the order of unblocking.
313        for thread_id in waiting_threads {
314            ecx.unblock_thread(thread_id, BlockReason::Eventfd)?;
315        }
316
317        // The state changed; we check and update the status of all supported event
318        // types for current file description.
319        ecx.check_and_update_readiness(eventfd)?;
320
321        // Tell userspace how many bytes we put into the buffer.
322        return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
323    }
324    interp_ok(())
325}