std/sys/pal/unix/stack_overflow/
thread_info.rs

1//! TLS, but async-signal-safe.
2//!
3//! Unfortunately, because thread local storage isn't async-signal-safe, we
4//! cannot soundly use it in our stack overflow handler. While this works
5//! without problems on most platforms, it can lead to undefined behaviour
6//! on others (such as GNU/Linux). Luckily, the POSIX specification documents
7//! two thread-specific values that can be accessed in asynchronous signal
8//! handlers: the value of `pthread_self()` and the address of `errno`. As
9//! `pthread_t` is an opaque platform-specific type, we use the address of
10//! `errno` here. As it is thread-specific and does not change over the
11//! lifetime of a thread, we can use `&errno` as a key for a `BTreeMap`
12//! that stores thread-specific data.
13//!
14//! Concurrent access to this map is synchronized by two locks – an outer
15//! [`Mutex`] and an inner spin lock that also remembers the identity of
16//! the lock owner:
17//! * The spin lock is the primary means of synchronization: since it only
18//!   uses native atomics, it can be soundly used inside the signal handle
19//!   as opposed to [`Mutex`], which might not be async-signal-safe.
20//! * The [`Mutex`] prevents busy-waiting in the setup logic, as all accesses
21//!   there are performed with the [`Mutex`] held, which makes the spin-lock
22//!   redundant in the common case.
23//! * Finally, by using the `errno` address as the locked value of the spin
24//!   lock, we can detect cases where a SIGSEGV occurred while the thread
25//!   info is being modified.
26
27use crate::collections::BTreeMap;
28use crate::hint::spin_loop;
29use crate::ops::Range;
30use crate::sync::Mutex;
31use crate::sync::atomic::{AtomicUsize, Ordering};
32use crate::sys::os::errno_location;
33
34pub struct ThreadInfo {
35    pub tid: u64,
36    pub name: Option<Box<str>>,
37    pub guard_page_range: Range<usize>,
38}
39
40static LOCK: Mutex<()> = Mutex::new(());
41static SPIN_LOCK: AtomicUsize = AtomicUsize::new(0);
42// This uses a `BTreeMap` instead of a hashmap since it supports constant
43// initialization and automatically reduces the amount of memory used when
44// items are removed.
45static mut THREAD_INFO: BTreeMap<usize, ThreadInfo> = BTreeMap::new();
46
47struct UnlockOnDrop;
48
49impl Drop for UnlockOnDrop {
50    fn drop(&mut self) {
51        SPIN_LOCK.store(0, Ordering::Release);
52    }
53}
54
55/// Get the current thread's information, if available.
56///
57/// Calling this function might freeze other threads if they attempt to modify
58/// their thread information. Thus, the caller should ensure that the process
59/// is aborted shortly after this function is called.
60///
61/// This function is guaranteed to be async-signal-safe if `f` is too.
62pub fn with_current_info<R>(f: impl FnOnce(Option<&ThreadInfo>) -> R) -> R {
63    let this = errno_location().addr();
64    let mut attempt = 0;
65    let _guard = loop {
66        // If we are just spinning endlessly, it's very likely that the thread
67        // modifying the thread info map has a lower priority than us and will
68        // not continue until we stop running. Just give up in that case.
69        if attempt == 10_000_000 {
70            rtprintpanic!("deadlock in SIGSEGV handler");
71            return f(None);
72        }
73
74        match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
75            Ok(_) => break UnlockOnDrop,
76            Err(owner) if owner == this => {
77                rtabort!("a thread received SIGSEGV while modifying its stack overflow information")
78            }
79            // Spin until the lock can be acquired – there is nothing better to
80            // do. This is unfortunately a priority hole, but a stack overflow
81            // is a fatal error anyway.
82            Err(_) => {
83                spin_loop();
84                attempt += 1;
85            }
86        }
87    };
88
89    // SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
90    let thread_info = unsafe { &*(&raw const THREAD_INFO) };
91    f(thread_info.get(&this))
92}
93
94fn spin_lock_in_setup(this: usize) -> UnlockOnDrop {
95    loop {
96        match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
97            Ok(_) => return UnlockOnDrop,
98            Err(owner) if owner == this => {
99                unreachable!("the thread info setup logic isn't recursive")
100            }
101            // This function is always called with the outer lock held,
102            // meaning the only time locking can fail is if another thread has
103            // encountered a stack overflow. Since that will abort the process,
104            // we just stop the current thread until that time. We use `pause`
105            // instead of spinning to avoid priority inversion.
106            // SAFETY: this doesn't have any safety preconditions.
107            Err(_) => drop(unsafe { libc::pause() }),
108        }
109    }
110}
111
112pub fn set_current_info(guard_page_range: Range<usize>) {
113    let tid = crate::thread::current_os_id();
114    let name = crate::thread::with_current_name(|name| name.map(Box::from));
115
116    let this = errno_location().addr();
117    let _lock_guard = LOCK.lock();
118    let _spin_guard = spin_lock_in_setup(this);
119
120    // SAFETY: we own the spin lock, so `THREAD_INFO` cannot be aliased.
121    let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
122    thread_info.insert(this, ThreadInfo { tid, name, guard_page_range });
123}
124
125pub fn delete_current_info() {
126    let this = errno_location().addr();
127    let _lock_guard = LOCK.lock();
128    let _spin_guard = spin_lock_in_setup(this);
129
130    // SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
131    let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
132    thread_info.remove(&this);
133}