miri/shims/unix/linux_like/
sync.rs

1use crate::concurrency::sync::FutexRef;
2use crate::helpers::check_min_vararg_count;
3use crate::*;
4
5struct LinuxFutex {
6    futex: FutexRef,
7}
8
9/// Implementation of the SYS_futex syscall.
10/// `args` is the arguments *including* the syscall number.
11pub fn futex<'tcx>(
12    ecx: &mut MiriInterpCx<'tcx>,
13    varargs: &[OpTy<'tcx>],
14    dest: &MPlaceTy<'tcx>,
15) -> InterpResult<'tcx> {
16    // The amount of arguments used depends on the type of futex operation.
17    // The full futex syscall takes six arguments (excluding the syscall
18    // number), which is also the maximum amount of arguments a linux syscall
19    // can take on most architectures.
20    // However, not all futex operations use all six arguments. The unused ones
21    // may or may not be left out from the `syscall()` call.
22    // Therefore we don't use `check_arg_count` here, but only check for the
23    // number of arguments to fall within a range.
24    let [addr, op, val] = check_min_vararg_count("`syscall(SYS_futex, ...)`", varargs)?;
25
26    // The first three arguments (after the syscall number itself) are the same to all futex operations:
27    //     (int *addr, int op, int val).
28    // We checked above that these definitely exist.
29    let addr = ecx.read_pointer(addr)?;
30    let op = ecx.read_scalar(op)?.to_i32()?;
31    let val = ecx.read_scalar(val)?.to_i32()?;
32
33    // This is a vararg function so we have to bring our own type for this pointer.
34    let addr = ecx.ptr_to_mplace(addr, ecx.machine.layouts.i32);
35
36    let futex_private = ecx.eval_libc_i32("FUTEX_PRIVATE_FLAG");
37    let futex_wait = ecx.eval_libc_i32("FUTEX_WAIT");
38    let futex_wait_bitset = ecx.eval_libc_i32("FUTEX_WAIT_BITSET");
39    let futex_wake = ecx.eval_libc_i32("FUTEX_WAKE");
40    let futex_wake_bitset = ecx.eval_libc_i32("FUTEX_WAKE_BITSET");
41    let futex_realtime = ecx.eval_libc_i32("FUTEX_CLOCK_REALTIME");
42
43    // FUTEX_PRIVATE enables an optimization that stops it from working across processes.
44    // Miri doesn't support that anyway, so we ignore that flag.
45    match op & !futex_private {
46        // FUTEX_WAIT: (int *addr, int op = FUTEX_WAIT, int val, const timespec *timeout)
47        // Blocks the thread if *addr still equals val. Wakes up when FUTEX_WAKE is called on the same address,
48        // or *timeout expires. `timeout == null` for an infinite timeout.
49        //
50        // FUTEX_WAIT_BITSET: (int *addr, int op = FUTEX_WAIT_BITSET, int val, const timespec *timeout, int *_ignored, unsigned int bitset)
51        // This is identical to FUTEX_WAIT, except:
52        //  - The timeout is absolute rather than relative.
53        //  - You can specify the bitset to selecting what WAKE operations to respond to.
54        op if op & !futex_realtime == futex_wait || op & !futex_realtime == futex_wait_bitset => {
55            let wait_bitset = op & !futex_realtime == futex_wait_bitset;
56
57            let (timeout, bitset) = if wait_bitset {
58                let [_, _, _, timeout, uaddr2, bitset] = check_min_vararg_count(
59                    "`syscall(SYS_futex, FUTEX_WAIT_BITSET, ...)`",
60                    varargs,
61                )?;
62                let _timeout = ecx.read_pointer(timeout)?;
63                let _uaddr2 = ecx.read_pointer(uaddr2)?;
64                (timeout, ecx.read_scalar(bitset)?.to_u32()?)
65            } else {
66                let [_, _, _, timeout] =
67                    check_min_vararg_count("`syscall(SYS_futex, FUTEX_WAIT, ...)`", varargs)?;
68                (timeout, u32::MAX)
69            };
70
71            if bitset == 0 {
72                return ecx.set_last_error_and_return(LibcError("EINVAL"), dest);
73            }
74
75            let timeout = ecx.deref_pointer_as(timeout, ecx.libc_ty_layout("timespec"))?;
76            let timeout = if ecx.ptr_is_null(timeout.ptr())? {
77                None
78            } else {
79                let duration = match ecx.read_timespec(&timeout)? {
80                    Some(duration) => duration,
81                    None => {
82                        return ecx.set_last_error_and_return(LibcError("EINVAL"), dest);
83                    }
84                };
85                let timeout_clock = if op & futex_realtime == futex_realtime {
86                    ecx.check_no_isolation(
87                        "`futex` syscall with `op=FUTEX_WAIT` and non-null timeout with `FUTEX_CLOCK_REALTIME`",
88                    )?;
89                    TimeoutClock::RealTime
90                } else {
91                    TimeoutClock::Monotonic
92                };
93                let timeout_anchor = if wait_bitset {
94                    // FUTEX_WAIT_BITSET uses an absolute timestamp.
95                    TimeoutAnchor::Absolute
96                } else {
97                    // FUTEX_WAIT uses a relative timestamp.
98                    TimeoutAnchor::Relative
99                };
100                Some((timeout_clock, timeout_anchor, duration))
101            };
102            // There may be a concurrent thread changing the value of addr
103            // and then invoking the FUTEX_WAKE syscall. It is critical that the
104            // effects of this and the other thread are correctly observed,
105            // otherwise we will deadlock.
106            //
107            // There are two scenarios to consider, depending on whether WAIT or WAKE goes first:
108            // 1. If we (FUTEX_WAIT) execute first, we'll push ourselves into the waiters queue and
109            //    go to sleep. They (FUTEX_WAKE) will see us in the queue and wake us up. It doesn't
110            //    matter how the addr write is ordered.
111            // 2. If they (FUTEX_WAKE) execute first, that means the addr write is also before us
112            //    (FUTEX_WAIT). It is crucial that we observe addr's new value. If we see an
113            //    outdated value that happens to equal the expected val, then we'll put ourselves to
114            //    sleep with no one to wake us up, so we end up with a deadlock. This is prevented
115            //    by having a SeqCst fence inside FUTEX_WAKE syscall, and another SeqCst fence here
116            //    in FUTEX_WAIT. The atomic read on addr after the SeqCst fence is guaranteed not to
117            //    see any value older than the addr write immediately before calling FUTEX_WAKE.
118            //    We'll see futex_val != val and return without sleeping.
119            //
120            //    Note that the fences do not create any happens-before relationship.
121            //    The read sees the write immediately before the fence not because
122            //    one happens after the other, but is instead due to a guarantee unique
123            //    to SeqCst fences that restricts what an atomic read placed AFTER the
124            //    fence can see. The read still has to be atomic, otherwise it's a data
125            //    race. This guarantee cannot be achieved with acquire-release fences
126            //    since they only talk about reads placed BEFORE a fence - and places
127            //    no restrictions on what the read itself can see, only that there is
128            //    a happens-before between the fences IF the read happens to see the
129            //    right value. This is useless to us, since we need the read itself
130            //    to see an up-to-date value.
131            //
132            // The above case distinction is valid since both FUTEX_WAIT and FUTEX_WAKE
133            // contain a SeqCst fence, therefore inducing a total order between the operations.
134            // It is also critical that the fence, the atomic load, and the comparison in FUTEX_WAIT
135            // altogether happen atomically. If the other thread's fence in FUTEX_WAKE
136            // gets interleaved after our fence, then we lose the guarantee on the
137            // atomic load being up-to-date; if the other thread's write on addr and FUTEX_WAKE
138            // call are interleaved after the load but before the comparison, then we get a TOCTOU
139            // race condition, and go to sleep thinking the other thread will wake us up,
140            // even though they have already finished.
141            //
142            // Thankfully, preemptions cannot happen inside a Miri shim, so we do not need to
143            // do anything special to guarantee fence-load-comparison atomicity.
144            ecx.atomic_fence(AtomicFenceOrd::SeqCst)?;
145            // Read an `i32` through the pointer, regardless of any wrapper types.
146            // It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`.
147            // We do an acquire read -- it only seems reasonable that if we observe a value here, we
148            // actually establish an ordering with that value.
149            let futex_val = ecx.read_scalar_atomic(&addr, AtomicReadOrd::Acquire)?.to_i32()?;
150            if val == futex_val {
151                // The value still matches, so we block the thread and make it wait for FUTEX_WAKE.
152
153                // This cannot fail since we already did an atomic acquire read on that pointer.
154                // Acquire reads are only allowed on mutable memory.
155                let futex_ref = ecx
156                    .get_sync_or_init(addr.ptr(), |_| LinuxFutex { futex: Default::default() })
157                    .unwrap()
158                    .futex
159                    .clone();
160
161                let dest = dest.clone();
162                ecx.futex_wait(
163                    futex_ref,
164                    bitset,
165                    timeout,
166                    callback!(
167                        @capture<'tcx> {
168                            dest: MPlaceTy<'tcx>,
169                        }
170                        |ecx, unblock: UnblockKind| match unblock {
171                            UnblockKind::Ready => {
172                                ecx.write_int(0, &dest)
173                            }
174                            UnblockKind::TimedOut => {
175                                ecx.set_last_error_and_return(LibcError("ETIMEDOUT"), &dest)
176                            }
177                        }
178                    ),
179                );
180            } else {
181                // The futex value doesn't match the expected value, so we return failure
182                // right away without sleeping: -1 and errno set to EAGAIN.
183                return ecx.set_last_error_and_return(LibcError("EAGAIN"), dest);
184            }
185        }
186        // FUTEX_WAKE: (int *addr, int op = FUTEX_WAKE, int val)
187        // Wakes at most `val` threads waiting on the futex at `addr`.
188        // Returns the amount of threads woken up.
189        // Does not access the futex value at *addr.
190        // FUTEX_WAKE_BITSET: (int *addr, int op = FUTEX_WAKE, int val, const timespect *_unused, int *_unused, unsigned int bitset)
191        // Same as FUTEX_WAKE, but allows you to specify a bitset to select which threads to wake up.
192        op if op == futex_wake || op == futex_wake_bitset => {
193            let Some(futex_ref) =
194                ecx.get_sync_or_init(addr.ptr(), |_| LinuxFutex { futex: Default::default() })
195            else {
196                // No AllocId, or no live allocation at that AllocId.
197                // Return an error code. (That seems nicer than silently doing something non-intuitive.)
198                // This means that if an address gets reused by a new allocation,
199                // we'll use an independent futex queue for this... that seems acceptable.
200                return ecx.set_last_error_and_return(LibcError("EFAULT"), dest);
201            };
202            let futex_ref = futex_ref.futex.clone();
203
204            let bitset = if op == futex_wake_bitset {
205                let [_, _, _, timeout, uaddr2, bitset] = check_min_vararg_count(
206                    "`syscall(SYS_futex, FUTEX_WAKE_BITSET, ...)`",
207                    varargs,
208                )?;
209                let _timeout = ecx.read_pointer(timeout)?;
210                let _uaddr2 = ecx.read_pointer(uaddr2)?;
211                ecx.read_scalar(bitset)?.to_u32()?
212            } else {
213                u32::MAX
214            };
215            if bitset == 0 {
216                return ecx.set_last_error_and_return(LibcError("EINVAL"), dest);
217            }
218            // Together with the SeqCst fence in futex_wait, this makes sure that futex_wait
219            // will see the latest value on addr which could be changed by our caller
220            // before doing the syscall.
221            ecx.atomic_fence(AtomicFenceOrd::SeqCst)?;
222            let woken = ecx.futex_wake(&futex_ref, bitset, val.try_into().unwrap())?;
223            ecx.write_scalar(Scalar::from_target_isize(woken.try_into().unwrap(), ecx), dest)?;
224        }
225        op => throw_unsup_format!("Miri does not support `futex` syscall with op={}", op),
226    }
227
228    interp_ok(())
229}