Skip to main content

miri/shims/unix/linux_like/
epoll.rs

1use std::cell::RefCell;
2use std::collections::{BTreeMap, VecDeque};
3use std::io;
4use std::time::Duration;
5
6use rustc_abi::FieldIdx;
7
8use crate::concurrency::VClock;
9use crate::shims::files::{
10    DynFileDescriptionRef, FdId, FdNum, FileDescription, FileDescriptionRef, WeakFileDescriptionRef,
11};
12use crate::shims::unix::UnixFileDescription;
13use crate::*;
14
15type EpollEventKey = (FdId, FdNum);
16
17/// An `Epoll` file descriptor connects file handles and epoll events
18#[derive(Debug, Default)]
19pub struct Epoll {
20    /// A map of EpollEventInterests registered under this epoll instance. Each entry is
21    /// differentiated using FdId and file descriptor value.
22    interest_list: RefCell<BTreeMap<EpollEventKey, EpollEventInterest>>,
23    /// The subset of interests that is currently considered "ready". Stored separately so we
24    /// can access it more efficiently.
25    /// This is implemented as a queue so that for level-triggered epoll, all events eventually
26    /// get returned from `epoll_wait`. The queue does not contain any duplicates.
27    ready_events: RefCell<VecDeque<EpollEventKey>>,
28    /// The queue of threads blocked on this epoll instance.
29    queue: RefCell<VecDeque<ThreadId>>,
30}
31
32impl VisitProvenance for Epoll {
33    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
34        // No provenance anywhere in this type.
35    }
36}
37
38/// Returns the range of all EpollEventKey for the given FD ID.
39fn range_for_id(id: FdId) -> std::ops::RangeInclusive<EpollEventKey> {
40    (id, 0)..=(id, i32::MAX)
41}
42
43/// Tracks the events that this epoll is interested in for a given file descriptor.
44#[derive(Debug)]
45pub struct EpollEventInterest {
46    /// The events bitmask the epoll is interested in.
47    relevant_events: u32,
48    /// The currently active events for this file descriptor.
49    active_events: u32,
50    /// Boolean whether this is an edge-triggered interest.
51    /// When [`false`] it's a level-triggered interest instead.
52    is_edge_triggered: bool,
53    /// The vector clock for wakeups.
54    clock: VClock,
55    /// User-defined data associated with this interest.
56    /// libc's data field in epoll_event can store integer or pointer,
57    /// but only u64 is supported for now.
58    /// <https://man7.org/linux/man-pages/man3/epoll_event.3type.html>
59    data: u64,
60}
61
62/// Struct reflecting the readiness of a file description.
63#[derive(Debug)]
64pub struct EpollReadiness {
65    /// The associated file is available for read(2) operations, in the sense that a read will not block.
66    /// (I.e., returning EOF is considered "ready".)
67    pub epollin: bool,
68    /// The associated file is available for write(2) operations, in the sense that a write will not block.
69    pub epollout: bool,
70    /// Stream socket peer closed connection, or shut down writing
71    /// half of connection.
72    pub epollrdhup: bool,
73    /// For stream socket, this event merely indicates that the peer
74    /// closed its end of the channel.
75    /// Unlike epollrdhup, this should only be set when the stream is fully closed.
76    /// epollrdhup also gets set when only the write half is closed, which is possible
77    /// via `shutdown(_, SHUT_WR)`.
78    pub epollhup: bool,
79    /// Error condition happened on the associated file descriptor.
80    pub epollerr: bool,
81}
82
83impl EpollReadiness {
84    pub fn empty() -> Self {
85        EpollReadiness {
86            epollin: false,
87            epollout: false,
88            epollrdhup: false,
89            epollhup: false,
90            epollerr: false,
91        }
92    }
93
94    pub fn get_event_bitmask<'tcx>(&self, ecx: &MiriInterpCx<'tcx>) -> u32 {
95        let epollin = ecx.eval_libc_u32("EPOLLIN");
96        let epollout = ecx.eval_libc_u32("EPOLLOUT");
97        let epollrdhup = ecx.eval_libc_u32("EPOLLRDHUP");
98        let epollhup = ecx.eval_libc_u32("EPOLLHUP");
99        let epollerr = ecx.eval_libc_u32("EPOLLERR");
100
101        let mut bitmask = 0;
102        if self.epollin {
103            bitmask |= epollin;
104        }
105        if self.epollout {
106            bitmask |= epollout;
107        }
108        if self.epollrdhup {
109            bitmask |= epollrdhup;
110        }
111        if self.epollhup {
112            bitmask |= epollhup;
113        }
114        if self.epollerr {
115            bitmask |= epollerr;
116        }
117        bitmask
118    }
119}
120
121// Best-effort mapping from cross platform readiness to epoll readiness.
122impl From<&BlockingIoSourceReadiness> for EpollReadiness {
123    fn from(readiness: &BlockingIoSourceReadiness) -> Self {
124        Self {
125            epollin: readiness.readable,
126            epollout: readiness.writable,
127            epollrdhup: readiness.read_closed,
128            epollhup: readiness.write_closed,
129            epollerr: readiness.error,
130        }
131    }
132}
133
134impl FileDescription for Epoll {
135    fn name(&self) -> &'static str {
136        "epoll"
137    }
138
139    fn metadata<'tcx>(
140        &self,
141    ) -> InterpResult<'tcx, Either<io::Result<std::fs::Metadata>, &'static str>> {
142        // On Linux, epoll is an "anonymous inode" reported as S_IFREG.
143        interp_ok(Either::Right("S_IFREG"))
144    }
145
146    fn destroy<'tcx>(
147        mut self,
148        self_id: FdId,
149        _communicate_allowed: bool,
150        ecx: &mut MiriInterpCx<'tcx>,
151    ) -> InterpResult<'tcx, io::Result<()>> {
152        // If we were interested in some FDs, we can remove that now.
153        let mut ids = self.interest_list.get_mut().keys().map(|(id, _num)| *id).collect::<Vec<_>>();
154        ids.dedup(); // they come out of the map sorted
155        for id in ids {
156            ecx.machine.epoll_interests.remove(id, self_id);
157        }
158        interp_ok(Ok(()))
159    }
160
161    fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
162        self
163    }
164}
165
166impl UnixFileDescription for Epoll {}
167
168/// The table of all EpollEventInterest.
169/// This tracks, for each file description, which epoll instances have an interest in events
170/// for this file description. The `FdId` is the ID of the epoll instance, so that we can recognize
171/// it later when it is slated for removal. The vector is sorted by that ID.
172pub struct EpollInterestTable(BTreeMap<FdId, Vec<(FdId, WeakFileDescriptionRef<Epoll>)>>);
173
174impl EpollInterestTable {
175    pub(crate) fn new() -> Self {
176        EpollInterestTable(BTreeMap::new())
177    }
178
179    fn insert(&mut self, id: FdId, epoll: &FileDescriptionRef<Epoll>) {
180        let epolls = self.0.entry(id).or_default();
181        let idx = epolls
182            .binary_search_by_key(&epoll.id(), |&(id, _)| id)
183            .expect_err("trying to add an epoll that's already in the list");
184        epolls.insert(idx, (epoll.id(), FileDescriptionRef::downgrade(epoll)));
185    }
186
187    fn remove(&mut self, id: FdId, epoll_id: FdId) {
188        let epolls = self.0.entry(id).or_default();
189        let idx = epolls
190            .binary_search_by_key(&epoll_id, |&(id, _)| id)
191            .expect("trying to remove an epoll that's not in the list");
192        epolls.remove(idx);
193    }
194
195    fn get_epolls(&self, id: FdId) -> Option<impl Iterator<Item = &WeakFileDescriptionRef<Epoll>>> {
196        self.0.get(&id).map(|epolls| epolls.iter().map(|(_id, epoll)| epoll))
197    }
198
199    pub fn remove_epolls(&mut self, id: FdId) {
200        if let Some(epolls) = self.0.remove(&id) {
201            for epoll in epolls.iter().filter_map(|(_id, epoll)| epoll.upgrade()) {
202                // This is a still-live epoll with interest in this FD. Remove all
203                // relevant interests (including from the ready set).
204                epoll
205                    .interest_list
206                    .borrow_mut()
207                    .extract_if(range_for_id(id), |_, _| true)
208                    // Consume the iterator.
209                    .for_each(drop);
210                // Remove the ready events for this file description.
211                epoll.ready_events.borrow_mut().retain(|(fd_id, _)| fd_id != &id);
212            }
213        }
214    }
215}
216
217impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
218pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
219    /// This function returns a file descriptor referring to the new `Epoll` instance. This file
220    /// descriptor is used for all subsequent calls to the epoll interface. If the `flags` argument
221    /// is 0, then this function is the same as `epoll_create()`.
222    ///
223    /// <https://linux.die.net/man/2/epoll_create1>
224    fn epoll_create1(&mut self, flags: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> {
225        let this = self.eval_context_mut();
226
227        let flags = this.read_scalar(flags)?.to_i32()?;
228
229        let epoll_cloexec = this.eval_libc_i32("EPOLL_CLOEXEC");
230
231        // Miri does not support exec, so EPOLL_CLOEXEC flag has no effect.
232        if flags != epoll_cloexec && flags != 0 {
233            throw_unsup_format!(
234                "epoll_create1: flag {:#x} is unsupported, only 0 or EPOLL_CLOEXEC are allowed",
235                flags
236            );
237        }
238
239        let fd = this.machine.fds.insert_new(Epoll::default());
240        interp_ok(Scalar::from_i32(fd))
241    }
242
243    /// This function performs control operations on the `Epoll` instance referred to by the file
244    /// descriptor `epfd`. It requests that the operation `op` be performed for the target file
245    /// descriptor, `fd`.
246    ///
247    /// Valid values for the op argument are:
248    /// `EPOLL_CTL_ADD` - Register the target file descriptor `fd` on the `Epoll` instance referred
249    /// to by the file descriptor `epfd` and associate the event `event` with the internal file
250    /// linked to `fd`.
251    /// `EPOLL_CTL_MOD` - Change the event `event` associated with the target file descriptor `fd`.
252    /// `EPOLL_CTL_DEL` - Deregister the target file descriptor `fd` from the `Epoll` instance
253    /// referred to by `epfd`. The `event` is ignored and can be null.
254    ///
255    /// <https://linux.die.net/man/2/epoll_ctl>
256    fn epoll_ctl(
257        &mut self,
258        epfd: &OpTy<'tcx>,
259        op: &OpTy<'tcx>,
260        fd: &OpTy<'tcx>,
261        event: &OpTy<'tcx>,
262    ) -> InterpResult<'tcx, Scalar> {
263        let this = self.eval_context_mut();
264
265        let epfd_value = this.read_scalar(epfd)?.to_i32()?;
266        let op = this.read_scalar(op)?.to_i32()?;
267        let fd = this.read_scalar(fd)?.to_i32()?;
268        let event = this.deref_pointer_as(event, this.libc_ty_layout("epoll_event"))?;
269
270        let epoll_ctl_add = this.eval_libc_i32("EPOLL_CTL_ADD");
271        let epoll_ctl_mod = this.eval_libc_i32("EPOLL_CTL_MOD");
272        let epoll_ctl_del = this.eval_libc_i32("EPOLL_CTL_DEL");
273        let epollin = this.eval_libc_u32("EPOLLIN");
274        let epollout = this.eval_libc_u32("EPOLLOUT");
275        let epollrdhup = this.eval_libc_u32("EPOLLRDHUP");
276        let epollet = this.eval_libc_u32("EPOLLET");
277        let epollhup = this.eval_libc_u32("EPOLLHUP");
278        let epollerr = this.eval_libc_u32("EPOLLERR");
279
280        // Throw EFAULT if epfd and fd have the same value.
281        if epfd_value == fd {
282            return this.set_errno_and_return_neg1_i32(LibcError("EFAULT"));
283        }
284
285        // Check if epfd is a valid epoll file descriptor.
286        let Some(epfd) = this.machine.fds.get(epfd_value) else {
287            return this.set_errno_and_return_neg1_i32(LibcError("EBADF"));
288        };
289        let epfd = epfd
290            .downcast::<Epoll>()
291            .ok_or_else(|| err_unsup_format!("non-epoll FD passed to `epoll_ctl`"))?;
292
293        let mut interest_list = epfd.interest_list.borrow_mut();
294
295        let Some(fd_ref) = this.machine.fds.get(fd) else {
296            return this.set_errno_and_return_neg1_i32(LibcError("EBADF"));
297        };
298        let id = fd_ref.id();
299
300        if op == epoll_ctl_add || op == epoll_ctl_mod {
301            // Read event bitmask and data from epoll_event passed by caller.
302            let mut events =
303                this.read_scalar(&this.project_field(&event, FieldIdx::ZERO)?)?.to_u32()?;
304            let data = this.read_scalar(&this.project_field(&event, FieldIdx::ONE)?)?.to_u64()?;
305
306            let is_edge_triggered = if events & epollet == epollet {
307                events &= !epollet;
308                true
309            } else {
310                false
311            };
312
313            // Unset the flag we support to discover if any unsupported flags are used.
314            let mut flags = events;
315            // epoll_wait(2) will always wait for epollhup and epollerr; it is not
316            // necessary to set it in events when calling epoll_ctl().
317            // So we will always set these two event types.
318            events |= epollhup;
319            events |= epollerr;
320
321            if flags & epollin == epollin {
322                flags &= !epollin;
323            }
324            if flags & epollout == epollout {
325                flags &= !epollout;
326            }
327            if flags & epollrdhup == epollrdhup {
328                flags &= !epollrdhup;
329            }
330            if flags & epollhup == epollhup {
331                flags &= !epollhup;
332            }
333            if flags & epollerr == epollerr {
334                flags &= !epollerr;
335            }
336            if flags != 0 {
337                throw_unsup_format!(
338                    "epoll_ctl: encountered unknown unsupported flags {:#x}",
339                    flags
340                );
341            }
342
343            // Add new interest to list. Experiments show that we need to reset all state
344            // on `EPOLL_CTL_MOD`, including the edge tracking.
345            let epoll_key = (id, fd);
346            if op == epoll_ctl_add {
347                if interest_list.range(range_for_id(id)).next().is_none() {
348                    // This is the first time this FD got added to this epoll.
349                    // Remember that in the global list so we get notified about FD events.
350                    this.machine.epoll_interests.insert(id, &epfd);
351                }
352                let new_interest = EpollEventInterest {
353                    relevant_events: events,
354                    is_edge_triggered,
355                    data,
356                    active_events: 0,
357                    clock: VClock::default(),
358                };
359                if interest_list.try_insert(epoll_key, new_interest).is_err() {
360                    // We already had interest in this.
361                    return this.set_errno_and_return_neg1_i32(LibcError("EEXIST"));
362                }
363            } else {
364                // Modify the existing interest.
365                let Some(interest) = interest_list.get_mut(&epoll_key) else {
366                    return this.set_errno_and_return_neg1_i32(LibcError("ENOENT"));
367                };
368                interest.relevant_events = events;
369                interest.is_edge_triggered = is_edge_triggered;
370                interest.data = data;
371            }
372
373            let active_events = fd_ref.as_unix(this).epoll_active_events()?.get_event_bitmask(this);
374
375            // Deliver events for the new interest.
376            update_readiness(
377                this,
378                &epfd,
379                active_events,
380                /* force_edge */ true,
381                move |callback| {
382                    // Need to release the RefCell when this closure returns, so we have to move
383                    // it into the closure, so we have to do a re-lookup here.
384                    callback(epoll_key, interest_list.get_mut(&epoll_key).unwrap())
385                },
386            )?;
387
388            interp_ok(Scalar::from_i32(0))
389        } else if op == epoll_ctl_del {
390            let epoll_key = (id, fd);
391
392            // Remove epoll_event_interest from interest_list and ready_set.
393            if interest_list.remove(&epoll_key).is_none() {
394                // We did not have interest in this.
395                return this.set_errno_and_return_neg1_i32(LibcError("ENOENT"));
396            };
397            // Remove the ready event for this key, should one exist.
398            let mut ready_events = epfd.ready_events.borrow_mut();
399            if let Some(idx) = ready_events.iter().position(|k| k == &epoll_key) {
400                ready_events.remove(idx);
401            }
402            // If this was the last interest in this FD, remove us from the global list
403            // of who is interested in this FD.
404            if interest_list.range(range_for_id(id)).next().is_none() {
405                this.machine.epoll_interests.remove(id, epfd.id());
406            }
407
408            interp_ok(Scalar::from_i32(0))
409        } else {
410            throw_unsup_format!("unsupported epoll_ctl operation: {op}");
411        }
412    }
413
414    /// The `epoll_wait()` system call waits for events on the `Epoll`
415    /// instance referred to by the file descriptor `epfd`. The buffer
416    /// pointed to by `events` is used to return information from the ready
417    /// list about file descriptors in the interest list that have some
418    /// events available. Up to `maxevents` are returned by `epoll_wait()`.
419    /// The `maxevents` argument must be greater than zero.
420    ///
421    /// The `timeout` argument specifies the number of milliseconds that
422    /// `epoll_wait()` will block. Time is measured against the
423    /// CLOCK_MONOTONIC clock. If the timeout is zero, the function will not block,
424    /// while if the timeout is -1, the function will block
425    /// until at least one event has been retrieved (or an error
426    /// occurred).
427    ///
428    /// A call to `epoll_wait()` will block until either:
429    /// • a file descriptor delivers an event;
430    /// • the call is interrupted by a signal handler; or
431    /// • the timeout expires.
432    ///
433    /// Note that the timeout interval will be rounded up to the system
434    /// clock granularity, and kernel scheduling delays mean that the
435    /// blocking interval may overrun by a small amount. Specifying a
436    /// timeout of -1 causes `epoll_wait()` to block indefinitely, while
437    /// specifying a timeout equal to zero cause `epoll_wait()` to return
438    /// immediately, even if no events are available.
439    ///
440    /// On success, `epoll_wait()` returns the number of file descriptors
441    /// ready for the requested I/O, or zero if no file descriptor became
442    /// ready during the requested timeout milliseconds. On failure,
443    /// `epoll_wait()` returns -1 and errno is set to indicate the error.
444    ///
445    /// <https://man7.org/linux/man-pages/man2/epoll_wait.2.html>
446    fn epoll_wait(
447        &mut self,
448        epfd: &OpTy<'tcx>,
449        events_op: &OpTy<'tcx>,
450        maxevents: &OpTy<'tcx>,
451        timeout: &OpTy<'tcx>,
452        dest: &MPlaceTy<'tcx>,
453    ) -> InterpResult<'tcx> {
454        let this = self.eval_context_mut();
455
456        let epfd_value = this.read_scalar(epfd)?.to_i32()?;
457        let events = this.read_immediate(events_op)?;
458        let maxevents = this.read_scalar(maxevents)?.to_i32()?;
459        let timeout = this.read_scalar(timeout)?.to_i32()?;
460
461        if epfd_value <= 0 || maxevents <= 0 {
462            return this.set_errno_and_return_neg1(LibcError("EINVAL"), dest);
463        }
464
465        // This needs to come after the maxevents value check, or else maxevents.try_into().unwrap()
466        // will fail.
467        let event = this.deref_pointer_as(
468            &events,
469            this.libc_array_ty_layout("epoll_event", maxevents.try_into().unwrap()),
470        )?;
471
472        let Some(epfd) = this.machine.fds.get(epfd_value) else {
473            return this.set_errno_and_return_neg1(LibcError("EBADF"), dest);
474        };
475        let Some(epfd) = epfd.downcast::<Epoll>() else {
476            return this.set_errno_and_return_neg1(LibcError("EBADF"), dest);
477        };
478
479        if timeout == 0 || !epfd.ready_events.borrow().is_empty() {
480            // If the timeout is 0 or there is a ready event, we can return immediately.
481            return_ready_list(&epfd, dest, &event, this)?;
482        } else {
483            // Blocking, with a relative timeout.
484            let deadline = match timeout {
485                0.. => {
486                    let duration = Duration::from_millis(timeout.try_into().unwrap());
487                    Some(this.machine.monotonic_clock.now().add_lossy(duration).into())
488                }
489                -1 => None,
490                ..-1 => {
491                    throw_unsup_format!(
492                        "epoll_wait: Only timeout values greater than or equal to -1 are supported."
493                    );
494                }
495            };
496            // Record this thread as blocked.
497            epfd.queue.borrow_mut().push_back(this.active_thread());
498            // And block it.
499            let dest = dest.clone();
500            // We keep a strong ref to the underlying `Epoll` to make sure it sticks around.
501            // This means there'll be a leak if we never wake up, but that anyway would imply
502            // a thread is permanently blocked so this is fine.
503            this.block_thread(
504                BlockReason::Epoll { epfd: epfd.clone() },
505                deadline,
506                callback!(
507                    @capture<'tcx> {
508                        epfd: FileDescriptionRef<Epoll>,
509                        dest: MPlaceTy<'tcx>,
510                        event: MPlaceTy<'tcx>,
511                    }
512                    |this, unblock: UnblockKind| {
513                        match unblock {
514                            UnblockKind::Ready => {
515                                let events = return_ready_list(&epfd, &dest, &event, this)?;
516                                assert!(events > 0, "we got woken up with no events to deliver");
517                                interp_ok(())
518                            },
519                            UnblockKind::TimedOut => {
520                                // Remove the current active thread_id from the blocked thread_id list.
521                                epfd
522                                    .queue.borrow_mut()
523                                    .retain(|&id| id != this.active_thread());
524                                this.write_int(0, &dest)?;
525                                interp_ok(())
526                            },
527                        }
528                    }
529                ),
530            );
531        }
532        interp_ok(())
533    }
534
535    /// For a specific file description, get its currently active events and send it to everyone who
536    /// registered interest in this FD. This function must be called whenever the result of
537    /// `epoll_active_events` might change.
538    ///
539    /// If `force_edge` is set, edge-triggered interests will be triggered even if the set of
540    /// ready events did not change. This can lead to spurious wakeups. Use with caution!
541    fn update_epoll_active_events(
542        &mut self,
543        fd_ref: DynFileDescriptionRef,
544        force_edge: bool,
545    ) -> InterpResult<'tcx> {
546        let this = self.eval_context_mut();
547        let id = fd_ref.id();
548        // Figure out who is interested in this. We need to clone this list since we can't prove
549        // that `send_active_events_to_interest` won't mutate it.
550        let Some(epolls) = this.machine.epoll_interests.get_epolls(id) else {
551            return interp_ok(());
552        };
553        let epolls = epolls
554            .map(|weak| {
555                weak.upgrade()
556                    .expect("someone forgot to remove the garbage from `machine.epoll_interests`")
557            })
558            .collect::<Vec<_>>();
559        let active_events = fd_ref.as_unix(this).epoll_active_events()?.get_event_bitmask(this);
560        for epoll in epolls {
561            update_readiness(this, &epoll, active_events, force_edge, |callback| {
562                for (&key, interest) in epoll.interest_list.borrow_mut().range_mut(range_for_id(id))
563                {
564                    callback(key, interest)?;
565                }
566                interp_ok(())
567            })?;
568        }
569
570        interp_ok(())
571    }
572
573    /// Recursively check whether the [`Epoll`] file description contains
574    /// interests which are host I/O source file descriptions.
575    fn has_epoll_host_interests(&self, epfd: &FileDescriptionRef<Epoll>) -> bool {
576        let this = self.eval_context_ref();
577        epfd.interest_list.borrow().iter().any(|((fd_id, _fd_num), _)| {
578            // By looking up whether the file description is currently registered,
579            // we get whether it's a host I/O source file description.
580            this.machine.blocking_io.contains_source(fd_id)
581        })
582    }
583}
584
585/// Call this when the interests denoted by `for_each_interest` have their active event set changed
586/// to `active_events`. The list is provided indirectly via the `for_each_interest` closure, which
587/// will call its argument closure for each relevant interest.
588///
589/// Any `RefCell` should be released by the time `for_each_interest` returns since we will then
590/// be waking up threads which might require access to those `RefCell`.
591fn update_readiness<'tcx>(
592    ecx: &mut MiriInterpCx<'tcx>,
593    epoll: &FileDescriptionRef<Epoll>,
594    active_events: u32,
595    force_edge: bool,
596    for_each_interest: impl FnOnce(
597        &mut dyn FnMut(EpollEventKey, &mut EpollEventInterest) -> InterpResult<'tcx>,
598    ) -> InterpResult<'tcx>,
599) -> InterpResult<'tcx> {
600    let mut ready_events = epoll.ready_events.borrow_mut();
601    for_each_interest(&mut |key, interest| {
602        // Update the ready events tracked in this interest.
603        let new_readiness = interest.relevant_events & active_events;
604        let prev_readiness = std::mem::replace(&mut interest.active_events, new_readiness);
605        if new_readiness == 0 {
606            // Un-trigger this, there's nothing left to report here.
607            if let Some(idx) = ready_events.iter().position(|k| k == &key) {
608                ready_events.remove(idx);
609            }
610        } else if force_edge || new_readiness != prev_readiness & new_readiness {
611            // Either we force an "edge" to be detected or there's a bit set in `new_readiness`
612            // that was not set in `prev_readiness`. In both cases, this is ready now.
613
614            // We need to ensure that this event is not already part of the
615            // `ready_events` queue before enqueueing:
616            // <https://github.com/torvalds/linux/blob/HEAD/fs/eventpoll.c#L1292-L1296>
617            if !ready_events.contains(&key) {
618                ready_events.push_back(key);
619            }
620
621            // No matter whether this is newly ready or just re-triggered,
622            // the `epoll_wait` fetching this event should sync with the current thread.
623            ecx.release_clock(|clock| {
624                interest.clock.join(clock);
625            })?;
626        }
627        interp_ok(())
628    })?;
629    // While there are events ready to be delivered, wake up a thread to receive them.
630    while !ready_events.is_empty()
631        && let Some(thread_id) = epoll.queue.borrow_mut().pop_front()
632    {
633        drop(ready_events); // release the "lock" so the unblocked thread can have it
634        ecx.unblock_thread(thread_id, BlockReason::Epoll { epfd: epoll.clone() })?;
635        ready_events = epoll.ready_events.borrow_mut();
636    }
637
638    interp_ok(())
639}
640
641/// Stores the ready list of the `epfd` epoll instance into `events` (which must be an array),
642/// and the number of returned events into `dest`.
643fn return_ready_list<'tcx>(
644    epfd: &FileDescriptionRef<Epoll>,
645    dest: &MPlaceTy<'tcx>,
646    events: &MPlaceTy<'tcx>,
647    ecx: &mut MiriInterpCx<'tcx>,
648) -> InterpResult<'tcx, i32> {
649    let mut interest_list = epfd.interest_list.borrow_mut();
650    let mut ready_events = epfd.ready_events.borrow_mut();
651    let mut num_of_events: i32 = 0;
652    let mut array_iter = ecx.project_array_fields(events)?;
653
654    // Sanity-check to ensure that all event info is up-to-date.
655    if cfg!(debug_assertions) {
656        for (key, interest) in interest_list.iter() {
657            // Ensure this matches the latest readiness of this FD.
658            // We have to do an FD lookup by ID for this. The FdNum might be already closed.
659            let fd = ecx.machine.fds.fds.values().find(|fd| fd.id() == key.0).unwrap();
660            let current_active = fd.as_unix(ecx).epoll_active_events()?.get_event_bitmask(ecx);
661            assert_eq!(interest.active_events, current_active & interest.relevant_events);
662        }
663    }
664
665    // We will fill at most the first `ready_events_len` slots of the array.
666    // Bounding the iterator this way ensures that we can re-add events
667    // to the end of the queue during the loop without having them show up in the array.
668    let ready_events_len = u64::try_from(ready_events.len()).unwrap();
669    while let Some((idx, slot)) = array_iter.next(ecx)?
670        && idx < ready_events_len
671        && let Some(key) = ready_events.pop_front()
672    {
673        let interest = interest_list.get_mut(&key).expect("non-existent event in ready set");
674        // Deliver event to caller.
675        ecx.write_int_fields_named(
676            &[("events", interest.active_events.into()), ("u64", interest.data.into())],
677            &slot,
678        )?;
679        num_of_events = num_of_events.strict_add(1);
680        // Synchronize receiving thread with the event of interest.
681        ecx.acquire_clock(&interest.clock)?;
682        if !interest.is_edge_triggered {
683            // This is a level-triggered interest, so we need to re-add the event
684            // at the end of the ready queue:
685            // <https://github.com/torvalds/linux/blob/HEAD/fs/eventpoll.c#L1835-L1847>
686            ready_events.push_back(key);
687        }
688    }
689    ecx.write_int(num_of_events, dest)?;
690    interp_ok(num_of_events)
691}