std/sys/pal/unix/stack_overflow/thread_info.rs
1//! TLS, but async-signal-safe.
2//!
3//! Unfortunately, because thread local storage isn't async-signal-safe, we
4//! cannot soundly use it in our stack overflow handler. While this works
5//! without problems on most platforms, it can lead to undefined behaviour
6//! on others (such as GNU/Linux). Luckily, the POSIX specification documents
7//! two thread-specific values that can be accessed in asynchronous signal
8//! handlers: the value of `pthread_self()` and the address of `errno`. As
9//! `pthread_t` is an opaque platform-specific type, we use the address of
10//! `errno` here. As it is thread-specific and does not change over the
11//! lifetime of a thread, we can use `&errno` as a key for a `BTreeMap`
12//! that stores thread-specific data.
13//!
14//! Concurrent access to this map is synchronized by two locks – an outer
15//! [`Mutex`] and an inner spin lock that also remembers the identity of
16//! the lock owner:
17//! * The spin lock is the primary means of synchronization: since it only
18//! uses native atomics, it can be soundly used inside the signal handle
19//! as opposed to [`Mutex`], which might not be async-signal-safe.
20//! * The [`Mutex`] prevents busy-waiting in the setup logic, as all accesses
21//! there are performed with the [`Mutex`] held, which makes the spin-lock
22//! redundant in the common case.
23//! * Finally, by using the `errno` address as the locked value of the spin
24//! lock, we can detect cases where a SIGSEGV occurred while the thread
25//! info is being modified.
26
27use crate::collections::BTreeMap;
28use crate::hint::spin_loop;
29use crate::ops::Range;
30use crate::sync::Mutex;
31use crate::sync::atomic::{AtomicUsize, Ordering};
32use crate::sys::os::errno_location;
33
34pub struct ThreadInfo {
35 pub tid: u64,
36 pub name: Option<Box<str>>,
37 pub guard_page_range: Range<usize>,
38}
39
40static LOCK: Mutex<()> = Mutex::new(());
41static SPIN_LOCK: AtomicUsize = AtomicUsize::new(0);
42// This uses a `BTreeMap` instead of a hashmap since it supports constant
43// initialization and automatically reduces the amount of memory used when
44// items are removed.
45static mut THREAD_INFO: BTreeMap<usize, ThreadInfo> = BTreeMap::new();
46
47struct UnlockOnDrop;
48
49impl Drop for UnlockOnDrop {
50 fn drop(&mut self) {
51 SPIN_LOCK.store(0, Ordering::Release);
52 }
53}
54
55/// Get the current thread's information, if available.
56///
57/// Calling this function might freeze other threads if they attempt to modify
58/// their thread information. Thus, the caller should ensure that the process
59/// is aborted shortly after this function is called.
60///
61/// This function is guaranteed to be async-signal-safe if `f` is too.
62pub fn with_current_info<R>(f: impl FnOnce(Option<&ThreadInfo>) -> R) -> R {
63 let this = errno_location().addr();
64 let mut attempt = 0;
65 let _guard = loop {
66 // If we are just spinning endlessly, it's very likely that the thread
67 // modifying the thread info map has a lower priority than us and will
68 // not continue until we stop running. Just give up in that case.
69 if attempt == 10_000_000 {
70 rtprintpanic!("deadlock in SIGSEGV handler");
71 return f(None);
72 }
73
74 match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
75 Ok(_) => break UnlockOnDrop,
76 Err(owner) if owner == this => {
77 rtabort!("a thread received SIGSEGV while modifying its stack overflow information")
78 }
79 // Spin until the lock can be acquired – there is nothing better to
80 // do. This is unfortunately a priority hole, but a stack overflow
81 // is a fatal error anyway.
82 Err(_) => {
83 spin_loop();
84 attempt += 1;
85 }
86 }
87 };
88
89 // SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
90 let thread_info = unsafe { &*(&raw const THREAD_INFO) };
91 f(thread_info.get(&this))
92}
93
94fn spin_lock_in_setup(this: usize) -> UnlockOnDrop {
95 loop {
96 match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
97 Ok(_) => return UnlockOnDrop,
98 Err(owner) if owner == this => {
99 unreachable!("the thread info setup logic isn't recursive")
100 }
101 // This function is always called with the outer lock held,
102 // meaning the only time locking can fail is if another thread has
103 // encountered a stack overflow. Since that will abort the process,
104 // we just stop the current thread until that time. We use `pause`
105 // instead of spinning to avoid priority inversion.
106 // SAFETY: this doesn't have any safety preconditions.
107 Err(_) => drop(unsafe { libc::pause() }),
108 }
109 }
110}
111
112pub fn set_current_info(guard_page_range: Range<usize>) {
113 let tid = crate::thread::current_os_id();
114 let name = crate::thread::with_current_name(|name| name.map(Box::from));
115
116 let this = errno_location().addr();
117 let _lock_guard = LOCK.lock();
118 let _spin_guard = spin_lock_in_setup(this);
119
120 // SAFETY: we own the spin lock, so `THREAD_INFO` cannot be aliased.
121 let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
122 thread_info.insert(this, ThreadInfo { tid, name, guard_page_range });
123}
124
125pub fn delete_current_info() {
126 let this = errno_location().addr();
127 let _lock_guard = LOCK.lock();
128 let _spin_guard = spin_lock_in_setup(this);
129
130 // SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
131 let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
132 thread_info.remove(&this);
133}