miri/shims/unix/linux_like/sync.rs
1use crate::concurrency::sync::{FutexRef, SyncObj};
2use crate::shims::sig::check_min_vararg_count;
3use crate::*;
4
5struct LinuxFutex {
6 futex: FutexRef,
7}
8
9impl SyncObj for LinuxFutex {}
10
11/// Implementation of the SYS_futex syscall.
12/// `args` is the arguments *including* the syscall number.
13pub fn futex<'tcx>(
14 ecx: &mut MiriInterpCx<'tcx>,
15 varargs: &[OpTy<'tcx>],
16 dest: &MPlaceTy<'tcx>,
17) -> InterpResult<'tcx> {
18 let [addr, op, val] = check_min_vararg_count("`syscall(SYS_futex, ...)`", varargs)?;
19
20 // See <https://man7.org/linux/man-pages/man2/futex.2.html> for docs.
21 // The first three arguments (after the syscall number itself) are the same to all futex operations:
22 // (uint32_t *addr, int op, uint32_t val).
23 // We checked above that these definitely exist.
24 let addr = ecx.read_pointer(addr)?;
25 let op = ecx.read_scalar(op)?.to_i32()?;
26 let val = ecx.read_scalar(val)?.to_u32()?;
27
28 // This is a vararg function so we have to bring our own type for this pointer.
29 let addr = ecx.ptr_to_mplace(addr, ecx.machine.layouts.i32);
30
31 let futex_private = ecx.eval_libc_i32("FUTEX_PRIVATE_FLAG");
32 let futex_wait = ecx.eval_libc_i32("FUTEX_WAIT");
33 let futex_wait_bitset = ecx.eval_libc_i32("FUTEX_WAIT_BITSET");
34 let futex_wake = ecx.eval_libc_i32("FUTEX_WAKE");
35 let futex_wake_bitset = ecx.eval_libc_i32("FUTEX_WAKE_BITSET");
36 let futex_realtime = ecx.eval_libc_i32("FUTEX_CLOCK_REALTIME");
37
38 // FUTEX_PRIVATE enables an optimization that stops it from working across processes.
39 // Miri doesn't support that anyway, so we ignore that flag.
40 match op & !futex_private {
41 // FUTEX_WAIT: (int *addr, int op = FUTEX_WAIT, int val, const timespec *timeout)
42 // Blocks the thread if *addr still equals val. Wakes up when FUTEX_WAKE is called on the same address,
43 // or *timeout expires. `timeout == null` for an infinite timeout.
44 //
45 // FUTEX_WAIT_BITSET: (int *addr, int op = FUTEX_WAIT_BITSET, int val, const timespec *timeout, int *_ignored, unsigned int bitset)
46 // This is identical to FUTEX_WAIT, except:
47 // - The timeout is absolute rather than relative.
48 // - You can specify the bitset to selecting what WAKE operations to respond to.
49 op if op & !futex_realtime == futex_wait || op & !futex_realtime == futex_wait_bitset => {
50 let wait_bitset = op & !futex_realtime == futex_wait_bitset;
51
52 let (timeout, bitset) = if wait_bitset {
53 let [_, _, _, timeout, uaddr2, bitset] = check_min_vararg_count(
54 "`syscall(SYS_futex, FUTEX_WAIT_BITSET, ...)`",
55 varargs,
56 )?;
57 let _timeout = ecx.read_pointer(timeout)?;
58 let _uaddr2 = ecx.read_pointer(uaddr2)?;
59 (timeout, ecx.read_scalar(bitset)?.to_u32()?)
60 } else {
61 let [_, _, _, timeout] =
62 check_min_vararg_count("`syscall(SYS_futex, FUTEX_WAIT, ...)`", varargs)?;
63 (timeout, u32::MAX)
64 };
65
66 if bitset == 0 {
67 return ecx.set_last_error_and_return(LibcError("EINVAL"), dest);
68 }
69
70 let timeout = ecx.deref_pointer_as(timeout, ecx.libc_ty_layout("timespec"))?;
71 let timeout = if ecx.ptr_is_null(timeout.ptr())? {
72 None
73 } else {
74 let duration = match ecx.read_timespec(&timeout)? {
75 Some(duration) => duration,
76 None => {
77 return ecx.set_last_error_and_return(LibcError("EINVAL"), dest);
78 }
79 };
80 let timeout_clock = if op & futex_realtime == futex_realtime {
81 ecx.check_no_isolation(
82 "`futex` syscall with `op=FUTEX_WAIT` and non-null timeout with `FUTEX_CLOCK_REALTIME`",
83 )?;
84 TimeoutClock::RealTime
85 } else {
86 TimeoutClock::Monotonic
87 };
88 let timeout_anchor = if wait_bitset {
89 // FUTEX_WAIT_BITSET uses an absolute timestamp.
90 TimeoutAnchor::Absolute
91 } else {
92 // FUTEX_WAIT uses a relative timestamp.
93 TimeoutAnchor::Relative
94 };
95 Some((timeout_clock, timeout_anchor, duration))
96 };
97 // There may be a concurrent thread changing the value of addr
98 // and then invoking the FUTEX_WAKE syscall. It is critical that the
99 // effects of this and the other thread are correctly observed,
100 // otherwise we will deadlock.
101 //
102 // There are two scenarios to consider, depending on whether WAIT or WAKE goes first:
103 // 1. If we (FUTEX_WAIT) execute first, we'll push ourselves into the waiters queue and
104 // go to sleep. They (FUTEX_WAKE) will see us in the queue and wake us up. It doesn't
105 // matter how the addr write is ordered.
106 // 2. If they (FUTEX_WAKE) execute first, that means the addr write is also before us
107 // (FUTEX_WAIT). It is crucial that we observe addr's new value. If we see an
108 // outdated value that happens to equal the expected val, then we'll put ourselves to
109 // sleep with no one to wake us up, so we end up with a deadlock. This is prevented
110 // by having a SeqCst fence inside FUTEX_WAKE syscall, and another SeqCst fence here
111 // in FUTEX_WAIT. The atomic read on addr after the SeqCst fence is guaranteed not to
112 // see any value older than the addr write immediately before calling FUTEX_WAKE.
113 // We'll see futex_val != val and return without sleeping.
114 //
115 // Note that the fences do not create any happens-before relationship.
116 // The read sees the write immediately before the fence not because
117 // one happens after the other, but is instead due to a guarantee unique
118 // to SeqCst fences that restricts what an atomic read placed AFTER the
119 // fence can see. The read still has to be atomic, otherwise it's a data
120 // race. This guarantee cannot be achieved with acquire-release fences
121 // since they only talk about reads placed BEFORE a fence - and places
122 // no restrictions on what the read itself can see, only that there is
123 // a happens-before between the fences IF the read happens to see the
124 // right value. This is useless to us, since we need the read itself
125 // to see an up-to-date value.
126 //
127 // The above case distinction is valid since both FUTEX_WAIT and FUTEX_WAKE
128 // contain a SeqCst fence, therefore inducing a total order between the operations.
129 // It is also critical that the fence, the atomic load, and the comparison in FUTEX_WAIT
130 // altogether happen atomically. If the other thread's fence in FUTEX_WAKE
131 // gets interleaved after our fence, then we lose the guarantee on the
132 // atomic load being up-to-date; if the other thread's write on addr and FUTEX_WAKE
133 // call are interleaved after the load but before the comparison, then we get a TOCTOU
134 // race condition, and go to sleep thinking the other thread will wake us up,
135 // even though they have already finished.
136 //
137 // Thankfully, preemptions cannot happen inside a Miri shim, so we do not need to
138 // do anything special to guarantee fence-load-comparison atomicity.
139 ecx.atomic_fence(AtomicFenceOrd::SeqCst)?;
140 // Read an `i32` through the pointer, regardless of any wrapper types.
141 // It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`.
142 // We do an acquire read -- it only seems reasonable that if we observe a value here, we
143 // actually establish an ordering with that value.
144 let futex_val = ecx.read_scalar_atomic(&addr, AtomicReadOrd::Acquire)?.to_u32()?;
145 if val == futex_val {
146 // The value still matches, so we block the thread and make it wait for FUTEX_WAKE.
147
148 // This cannot fail since we already did an atomic acquire read on that pointer.
149 // Acquire reads are only allowed on mutable memory.
150 let futex_ref = ecx
151 .get_sync_or_init(addr.ptr(), |_| LinuxFutex { futex: Default::default() })
152 .unwrap()
153 .futex
154 .clone();
155
156 let dest = dest.clone();
157 ecx.futex_wait(
158 futex_ref,
159 bitset,
160 timeout,
161 callback!(
162 @capture<'tcx> {
163 dest: MPlaceTy<'tcx>,
164 }
165 |ecx, unblock: UnblockKind| match unblock {
166 UnblockKind::Ready => {
167 ecx.write_int(0, &dest)
168 }
169 UnblockKind::TimedOut => {
170 ecx.set_last_error_and_return(LibcError("ETIMEDOUT"), &dest)
171 }
172 }
173 ),
174 );
175 } else {
176 // The futex value doesn't match the expected value, so we return failure
177 // right away without sleeping: -1 and errno set to EAGAIN.
178 return ecx.set_last_error_and_return(LibcError("EAGAIN"), dest);
179 }
180 }
181 // FUTEX_WAKE: (int *addr, int op = FUTEX_WAKE, int val)
182 // Wakes at most `val` threads waiting on the futex at `addr`.
183 // Returns the amount of threads woken up.
184 // Does not access the futex value at *addr.
185 // FUTEX_WAKE_BITSET: (int *addr, int op = FUTEX_WAKE, int val, const timespect *_unused, int *_unused, unsigned int bitset)
186 // Same as FUTEX_WAKE, but allows you to specify a bitset to select which threads to wake up.
187 op if op == futex_wake || op == futex_wake_bitset => {
188 let Some(futex_ref) =
189 ecx.get_sync_or_init(addr.ptr(), |_| LinuxFutex { futex: Default::default() })
190 else {
191 // No AllocId, or no live allocation at that AllocId.
192 // Return an error code. (That seems nicer than silently doing something non-intuitive.)
193 // This means that if an address gets reused by a new allocation,
194 // we'll use an independent futex queue for this... that seems acceptable.
195 return ecx.set_last_error_and_return(LibcError("EFAULT"), dest);
196 };
197 let futex_ref = futex_ref.futex.clone();
198
199 let bitset = if op == futex_wake_bitset {
200 let [_, _, _, timeout, uaddr2, bitset] = check_min_vararg_count(
201 "`syscall(SYS_futex, FUTEX_WAKE_BITSET, ...)`",
202 varargs,
203 )?;
204 let _timeout = ecx.read_pointer(timeout)?;
205 let _uaddr2 = ecx.read_pointer(uaddr2)?;
206 ecx.read_scalar(bitset)?.to_u32()?
207 } else {
208 u32::MAX
209 };
210 if bitset == 0 {
211 return ecx.set_last_error_and_return(LibcError("EINVAL"), dest);
212 }
213 // Together with the SeqCst fence in futex_wait, this makes sure that futex_wait
214 // will see the latest value on addr which could be changed by our caller
215 // before doing the syscall.
216 ecx.atomic_fence(AtomicFenceOrd::SeqCst)?;
217 let woken = ecx.futex_wake(&futex_ref, bitset, val.try_into().unwrap())?;
218 ecx.write_scalar(Scalar::from_target_isize(woken.try_into().unwrap(), ecx), dest)?;
219 }
220 op => throw_unsup_format!("Miri does not support `futex` syscall with op={}", op),
221 }
222
223 interp_ok(())
224}