miri/shims/unix/linux_like/eventfd.rs
1//! Linux `eventfd` implementation.
2use std::cell::{Cell, RefCell};
3use std::io;
4use std::io::ErrorKind;
5
6use crate::concurrency::VClock;
7use crate::shims::files::{FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef};
8use crate::shims::unix::UnixFileDescription;
9use crate::shims::unix::linux_like::epoll::{EpollEvents, EvalContextExt as _};
10use crate::*;
11
12/// Maximum value that the eventfd counter can hold.
13const MAX_COUNTER: u64 = u64::MAX - 1;
14
15/// A kind of file descriptor created by `eventfd`.
16/// The `Event` type isn't currently written to by `eventfd`.
17/// The interface is meant to keep track of objects associated
18/// with a file descriptor. For more information see the man
19/// page below:
20///
21/// <https://man.netbsd.org/eventfd.2>
22#[derive(Debug)]
23struct EventFd {
24 /// The object contains an unsigned 64-bit integer (uint64_t) counter that is maintained by the
25 /// kernel. This counter is initialized with the value specified in the argument initval.
26 counter: Cell<u64>,
27 is_nonblock: bool,
28 clock: RefCell<VClock>,
29 /// A list of thread ids blocked on eventfd::read.
30 blocked_read_tid: RefCell<Vec<ThreadId>>,
31 /// A list of thread ids blocked on eventfd::write.
32 blocked_write_tid: RefCell<Vec<ThreadId>>,
33}
34
35impl FileDescription for EventFd {
36 fn name(&self) -> &'static str {
37 "event"
38 }
39
40 fn metadata<'tcx>(
41 &self,
42 ) -> InterpResult<'tcx, Either<io::Result<std::fs::Metadata>, &'static str>> {
43 // On Linux, eventfd is an "anonymous inode" reported as S_IFREG.
44 interp_ok(Either::Right("S_IFREG"))
45 }
46
47 fn destroy<'tcx>(
48 self,
49 _self_id: FdId,
50 _communicate_allowed: bool,
51 _ecx: &mut MiriInterpCx<'tcx>,
52 ) -> InterpResult<'tcx, io::Result<()>> {
53 interp_ok(Ok(()))
54 }
55
56 /// Read the counter in the buffer and return the counter if succeeded.
57 fn read<'tcx>(
58 self: FileDescriptionRef<Self>,
59 _communicate_allowed: bool,
60 ptr: Pointer,
61 len: usize,
62 ecx: &mut MiriInterpCx<'tcx>,
63 finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
64 ) -> InterpResult<'tcx> {
65 // We're treating the buffer as a `u64`.
66 let ty = ecx.machine.layouts.u64;
67 // Check the size of slice, and return error only if the size of the slice < 8.
68 if len < ty.size.bytes_usize() {
69 return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
70 }
71
72 // Turn the pointer into a place at the right type.
73 let buf_place = ecx.ptr_to_mplace_unaligned(ptr, ty);
74
75 eventfd_read(buf_place, self, ecx, finish)
76 }
77
78 /// A write call adds the 8-byte integer value supplied in
79 /// its buffer (in native endianness) to the counter. The maximum value that may be
80 /// stored in the counter is the largest unsigned 64-bit value
81 /// minus 1 (i.e., 0xfffffffffffffffe). If the addition would
82 /// cause the counter's value to exceed the maximum, then the
83 /// write either blocks until a read is performed on the
84 /// file descriptor, or fails with the error EAGAIN if the
85 /// file descriptor has been made nonblocking.
86 ///
87 /// A write fails with the error EINVAL if the size of the
88 /// supplied buffer is less than 8 bytes, or if an attempt is
89 /// made to write the value 0xffffffffffffffff.
90 fn write<'tcx>(
91 self: FileDescriptionRef<Self>,
92 _communicate_allowed: bool,
93 ptr: Pointer,
94 len: usize,
95 ecx: &mut MiriInterpCx<'tcx>,
96 finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
97 ) -> InterpResult<'tcx> {
98 // We're treating the buffer as a `u64`.
99 let ty = ecx.machine.layouts.u64;
100 // Check the size of slice, and return error only if the size of the slice < 8.
101 if len < ty.layout.size.bytes_usize() {
102 return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
103 }
104
105 // Turn the pointer into a place at the right type.
106 let buf_place = ecx.ptr_to_mplace_unaligned(ptr, ty);
107
108 eventfd_write(buf_place, self, ecx, finish)
109 }
110
111 fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
112 self
113 }
114}
115
116impl UnixFileDescription for EventFd {
117 fn epoll_active_events<'tcx>(&self) -> InterpResult<'tcx, EpollEvents> {
118 // We only check the status of EPOLLIN and EPOLLOUT flags for eventfd. If other event flags
119 // need to be supported in the future, the check should be added here.
120
121 interp_ok(EpollEvents {
122 epollin: self.counter.get() != 0,
123 epollout: self.counter.get() != MAX_COUNTER,
124 ..EpollEvents::new()
125 })
126 }
127}
128
129impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
130pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
131 /// This function creates an `Event` that is used as an event wait/notify mechanism by
132 /// user-space applications, and by the kernel to notify user-space applications of events.
133 /// The `Event` contains an `u64` counter maintained by the kernel. The counter is initialized
134 /// with the value specified in the `initval` argument.
135 ///
136 /// A new file descriptor referring to the `Event` is returned. The `read`, `write`, `poll`,
137 /// `select`, and `close` operations can be performed on the file descriptor. For more
138 /// information on these operations, see the man page linked below.
139 ///
140 /// The `flags` are not currently implemented for eventfd.
141 /// The `flags` may be bitwise ORed to change the behavior of `eventfd`:
142 /// `EFD_CLOEXEC` - Set the close-on-exec (`FD_CLOEXEC`) flag on the new file descriptor.
143 /// `EFD_NONBLOCK` - Set the `O_NONBLOCK` file status flag on the new open file description.
144 /// `EFD_SEMAPHORE` - miri does not support semaphore-like semantics.
145 ///
146 /// <https://linux.die.net/man/2/eventfd>
147 fn eventfd(&mut self, val: &OpTy<'tcx>, flags: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
148 let this = self.eval_context_mut();
149
150 let val = this.read_scalar(val)?.to_u32()?;
151 let mut flags = this.read_scalar(flags)?.to_i32()?;
152
153 let efd_cloexec = this.eval_libc_i32("EFD_CLOEXEC");
154 let efd_nonblock = this.eval_libc_i32("EFD_NONBLOCK");
155 let efd_semaphore = this.eval_libc_i32("EFD_SEMAPHORE");
156
157 if flags & efd_semaphore == efd_semaphore {
158 throw_unsup_format!("eventfd: EFD_SEMAPHORE is unsupported");
159 }
160
161 let mut is_nonblock = false;
162 // Unset the flag that we support.
163 // After unloading, flags != 0 means other flags are used.
164 if flags & efd_cloexec == efd_cloexec {
165 // cloexec is ignored because Miri does not support exec.
166 flags &= !efd_cloexec;
167 }
168 if flags & efd_nonblock == efd_nonblock {
169 flags &= !efd_nonblock;
170 is_nonblock = true;
171 }
172 if flags != 0 {
173 throw_unsup_format!("eventfd: encountered unknown unsupported flags {:#x}", flags);
174 }
175
176 let fds = &mut this.machine.fds;
177
178 let fd_value = fds.insert_new(EventFd {
179 counter: Cell::new(val.into()),
180 is_nonblock,
181 clock: RefCell::new(VClock::default()),
182 blocked_read_tid: RefCell::new(Vec::new()),
183 blocked_write_tid: RefCell::new(Vec::new()),
184 });
185
186 interp_ok(Scalar::from_i32(fd_value))
187 }
188}
189
190/// Block thread if the value addition will exceed u64::MAX -1,
191/// else just add the user-supplied value to current counter.
192fn eventfd_write<'tcx>(
193 buf_place: MPlaceTy<'tcx>,
194 eventfd: FileDescriptionRef<EventFd>,
195 ecx: &mut MiriInterpCx<'tcx>,
196 finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
197) -> InterpResult<'tcx> {
198 // Figure out which value we should add.
199 let num = ecx.read_scalar(&buf_place)?.to_u64()?;
200 // u64::MAX as input is invalid because the maximum value of counter is u64::MAX - 1.
201 if num == u64::MAX {
202 return finish.call(ecx, Err(ErrorKind::InvalidInput.into()));
203 }
204
205 match eventfd.counter.get().checked_add(num) {
206 Some(new_count @ 0..=MAX_COUNTER) => {
207 // Future `read` calls will synchronize with this write, so update the FD clock.
208 ecx.release_clock(|clock| {
209 eventfd.clock.borrow_mut().join(clock);
210 })?;
211
212 // Store new counter value.
213 eventfd.counter.set(new_count);
214
215 // Unblock *all* threads previously blocked on `read`.
216 // We need to take out the blocked thread ids and unblock them together,
217 // because `unblock_threads` may block them again and end up re-adding the
218 // thread to the blocked list.
219 let waiting_threads = std::mem::take(&mut *eventfd.blocked_read_tid.borrow_mut());
220 // FIXME: We can randomize the order of unblocking.
221 for thread_id in waiting_threads {
222 ecx.unblock_thread(thread_id, BlockReason::Eventfd)?;
223 }
224
225 // The state changed; we check and update the status of all supported event
226 // types for current file description.
227 // Linux seems to cause spurious wakeups here, and Tokio seems to rely on that
228 // (see <https://github.com/rust-lang/miri/pull/4676#discussion_r2510528994>
229 // and also <https://www.illumos.org/issues/16700>).
230 ecx.update_epoll_active_events(eventfd, /* force_edge */ true)?;
231
232 // Return how many bytes we consumed from the user-provided buffer.
233 return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
234 }
235 None | Some(u64::MAX) => {
236 // We can't update the state, so we have to block.
237 if eventfd.is_nonblock {
238 return finish.call(ecx, Err(ErrorKind::WouldBlock.into()));
239 }
240
241 eventfd.blocked_write_tid.borrow_mut().push(ecx.active_thread());
242
243 let weak_eventfd = FileDescriptionRef::downgrade(&eventfd);
244 ecx.block_thread(
245 BlockReason::Eventfd,
246 None,
247 callback!(
248 @capture<'tcx> {
249 num: u64,
250 buf_place: MPlaceTy<'tcx>,
251 finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
252 weak_eventfd: WeakFileDescriptionRef<EventFd>,
253 }
254 |this, unblock: UnblockKind| {
255 assert_eq!(unblock, UnblockKind::Ready);
256 // When we get unblocked, try again. We know the ref is still valid,
257 // otherwise there couldn't be a `write` that unblocks us.
258 let eventfd_ref = weak_eventfd.upgrade().unwrap();
259 eventfd_write(buf_place, eventfd_ref, this, finish)
260 }
261 ),
262 );
263 }
264 };
265 interp_ok(())
266}
267
268/// Block thread if the current counter is 0,
269/// else just return the current counter value to the caller and set the counter to 0.
270fn eventfd_read<'tcx>(
271 buf_place: MPlaceTy<'tcx>,
272 eventfd: FileDescriptionRef<EventFd>,
273 ecx: &mut MiriInterpCx<'tcx>,
274 finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
275) -> InterpResult<'tcx> {
276 // Set counter to 0, get old value.
277 let counter = eventfd.counter.replace(0);
278
279 // Block when counter == 0.
280 if counter == 0 {
281 if eventfd.is_nonblock {
282 return finish.call(ecx, Err(ErrorKind::WouldBlock.into()));
283 }
284
285 eventfd.blocked_read_tid.borrow_mut().push(ecx.active_thread());
286
287 let weak_eventfd = FileDescriptionRef::downgrade(&eventfd);
288 ecx.block_thread(
289 BlockReason::Eventfd,
290 None,
291 callback!(
292 @capture<'tcx> {
293 buf_place: MPlaceTy<'tcx>,
294 finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
295 weak_eventfd: WeakFileDescriptionRef<EventFd>,
296 }
297 |this, unblock: UnblockKind| {
298 assert_eq!(unblock, UnblockKind::Ready);
299 // When we get unblocked, try again. We know the ref is still valid,
300 // otherwise there couldn't be a `write` that unblocks us.
301 let eventfd_ref = weak_eventfd.upgrade().unwrap();
302 eventfd_read(buf_place, eventfd_ref, this, finish)
303 }
304 ),
305 );
306 } else {
307 // Synchronize with all prior `write` calls to this FD.
308 ecx.acquire_clock(&eventfd.clock.borrow())?;
309
310 // Return old counter value into user-space buffer.
311 ecx.write_int(counter, &buf_place)?;
312
313 // Unblock *all* threads previously blocked on `write`.
314 // We need to take out the blocked thread ids and unblock them together,
315 // because `unblock_threads` may block them again and end up re-adding the
316 // thread to the blocked list.
317 let waiting_threads = std::mem::take(&mut *eventfd.blocked_write_tid.borrow_mut());
318 // FIXME: We can randomize the order of unblocking.
319 for thread_id in waiting_threads {
320 ecx.unblock_thread(thread_id, BlockReason::Eventfd)?;
321 }
322
323 // The state changed; we check and update the status of all supported event
324 // types for current file description.
325 // Linux seems to always emit do notifications here, even if we were already writable.
326 ecx.update_epoll_active_events(eventfd, /* force_edge */ true)?;
327
328 // Tell userspace how many bytes we put into the buffer.
329 return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
330 }
331 interp_ok(())
332}