std/sys/pal/unix/
thread.rs

1use crate::ffi::CStr;
2use crate::mem::{self, ManuallyDrop};
3use crate::num::NonZero;
4#[cfg(all(target_os = "linux", target_env = "gnu"))]
5use crate::sys::weak::dlsym;
6#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
7use crate::sys::weak::weak;
8use crate::sys::{os, stack_overflow};
9use crate::time::{Duration, Instant};
10use crate::{cmp, io, ptr};
11#[cfg(not(any(
12    target_os = "l4re",
13    target_os = "vxworks",
14    target_os = "espidf",
15    target_os = "nuttx"
16)))]
17pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
18#[cfg(target_os = "l4re")]
19pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
20#[cfg(target_os = "vxworks")]
21pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
22#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
23pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
24
25struct ThreadData {
26    name: Option<Box<str>>,
27    f: Box<dyn FnOnce()>,
28}
29
30pub struct Thread {
31    id: libc::pthread_t,
32}
33
34// Some platforms may have pthread_t as a pointer in which case we still want
35// a thread to be Send/Sync
36unsafe impl Send for Thread {}
37unsafe impl Sync for Thread {}
38
39impl Thread {
40    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
41    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
42    pub unsafe fn new(
43        stack: usize,
44        name: Option<&str>,
45        f: Box<dyn FnOnce()>,
46    ) -> io::Result<Thread> {
47        let data = Box::into_raw(Box::new(ThreadData { name: name.map(Box::from), f }));
48        let mut native: libc::pthread_t = mem::zeroed();
49        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
50        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
51
52        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
53        if stack > 0 {
54            // Only set the stack if a non-zero value is passed
55            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
56            assert_eq!(
57                libc::pthread_attr_setstacksize(
58                    attr.as_mut_ptr(),
59                    cmp::max(stack, min_stack_size(attr.as_ptr()))
60                ),
61                0
62            );
63        }
64
65        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
66        {
67            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
68
69            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
70                0 => {}
71                n => {
72                    assert_eq!(n, libc::EINVAL);
73                    // EINVAL means |stack_size| is either too small or not a
74                    // multiple of the system page size. Because it's definitely
75                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
76                    // Round up to the nearest page and try again.
77                    let page_size = os::page_size();
78                    let stack_size =
79                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
80                    assert_eq!(libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size), 0);
81                }
82            };
83        }
84
85        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, data as *mut _);
86        // Note: if the thread creation fails and this assert fails, then p will
87        // be leaked. However, an alternative design could cause double-free
88        // which is clearly worse.
89        assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
90
91        return if ret != 0 {
92            // The thread failed to start and as a result p was not consumed. Therefore, it is
93            // safe to reconstruct the box so that it gets deallocated.
94            drop(Box::from_raw(data));
95            Err(io::Error::from_raw_os_error(ret))
96        } else {
97            Ok(Thread { id: native })
98        };
99
100        extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
101            unsafe {
102                let data = Box::from_raw(data as *mut ThreadData);
103                // Next, set up our stack overflow handler which may get triggered if we run
104                // out of stack.
105                let _handler = stack_overflow::Handler::new(data.name);
106                // Finally, let's run some code.
107                (data.f)();
108            }
109            ptr::null_mut()
110        }
111    }
112
113    pub fn yield_now() {
114        let ret = unsafe { libc::sched_yield() };
115        debug_assert_eq!(ret, 0);
116    }
117
118    #[cfg(target_os = "android")]
119    pub fn set_name(name: &CStr) {
120        const PR_SET_NAME: libc::c_int = 15;
121        unsafe {
122            let res = libc::prctl(
123                PR_SET_NAME,
124                name.as_ptr(),
125                0 as libc::c_ulong,
126                0 as libc::c_ulong,
127                0 as libc::c_ulong,
128            );
129            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
130            debug_assert_eq!(res, 0);
131        }
132    }
133
134    #[cfg(any(
135        target_os = "linux",
136        target_os = "freebsd",
137        target_os = "dragonfly",
138        target_os = "nuttx",
139        target_os = "cygwin"
140    ))]
141    pub fn set_name(name: &CStr) {
142        unsafe {
143            cfg_if::cfg_if! {
144                if #[cfg(any(target_os = "linux", target_os = "cygwin"))] {
145                    // Linux and Cygwin limits the allowed length of the name.
146                    const TASK_COMM_LEN: usize = 16;
147                    let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
148                } else {
149                    // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
150                }
151            };
152            // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
153            // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
154            let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
155            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
156            debug_assert_eq!(res, 0);
157        }
158    }
159
160    #[cfg(target_os = "openbsd")]
161    pub fn set_name(name: &CStr) {
162        unsafe {
163            libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
164        }
165    }
166
167    #[cfg(target_vendor = "apple")]
168    pub fn set_name(name: &CStr) {
169        unsafe {
170            let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
171            let res = libc::pthread_setname_np(name.as_ptr());
172            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
173            debug_assert_eq!(res, 0);
174        }
175    }
176
177    #[cfg(target_os = "netbsd")]
178    pub fn set_name(name: &CStr) {
179        unsafe {
180            let res = libc::pthread_setname_np(
181                libc::pthread_self(),
182                c"%s".as_ptr(),
183                name.as_ptr() as *mut libc::c_void,
184            );
185            debug_assert_eq!(res, 0);
186        }
187    }
188
189    #[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
190    pub fn set_name(name: &CStr) {
191        weak!(
192            fn pthread_setname_np(
193                thread: libc::pthread_t,
194                name: *const libc::c_char,
195            ) -> libc::c_int;
196        );
197
198        if let Some(f) = pthread_setname_np.get() {
199            #[cfg(target_os = "nto")]
200            const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
201            #[cfg(any(target_os = "solaris", target_os = "illumos"))]
202            const THREAD_NAME_MAX: usize = 32;
203
204            let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
205            let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
206            debug_assert_eq!(res, 0);
207        }
208    }
209
210    #[cfg(target_os = "fuchsia")]
211    pub fn set_name(name: &CStr) {
212        use super::fuchsia::*;
213        unsafe {
214            zx_object_set_property(
215                zx_thread_self(),
216                ZX_PROP_NAME,
217                name.as_ptr() as *const libc::c_void,
218                name.to_bytes().len(),
219            );
220        }
221    }
222
223    #[cfg(target_os = "haiku")]
224    pub fn set_name(name: &CStr) {
225        unsafe {
226            let thread_self = libc::find_thread(ptr::null_mut());
227            let res = libc::rename_thread(thread_self, name.as_ptr());
228            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
229            debug_assert_eq!(res, libc::B_OK);
230        }
231    }
232
233    #[cfg(target_os = "vxworks")]
234    pub fn set_name(name: &CStr) {
235        let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
236        let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
237        debug_assert_eq!(res, libc::OK);
238    }
239
240    #[cfg(any(
241        target_env = "newlib",
242        target_os = "l4re",
243        target_os = "emscripten",
244        target_os = "redox",
245        target_os = "hurd",
246        target_os = "aix",
247    ))]
248    pub fn set_name(_name: &CStr) {
249        // Newlib and Emscripten have no way to set a thread name.
250    }
251
252    #[cfg(not(target_os = "espidf"))]
253    pub fn sleep(dur: Duration) {
254        let mut secs = dur.as_secs();
255        let mut nsecs = dur.subsec_nanos() as _;
256
257        // If we're awoken with a signal then the return value will be -1 and
258        // nanosleep will fill in `ts` with the remaining time.
259        unsafe {
260            while secs > 0 || nsecs > 0 {
261                let mut ts = libc::timespec {
262                    tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
263                    tv_nsec: nsecs,
264                };
265                secs -= ts.tv_sec as u64;
266                let ts_ptr = &raw mut ts;
267                if libc::nanosleep(ts_ptr, ts_ptr) == -1 {
268                    assert_eq!(os::errno(), libc::EINTR);
269                    secs += ts.tv_sec as u64;
270                    nsecs = ts.tv_nsec;
271                } else {
272                    nsecs = 0;
273                }
274            }
275        }
276    }
277
278    #[cfg(target_os = "espidf")]
279    pub fn sleep(dur: Duration) {
280        // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
281        // As per the documentation of `usleep`, it is expected to support
282        // sleep times as big as at least up to 1 second.
283        //
284        // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
285        // `usleep` implementation
286        // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
287        // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
288        // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
289        const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
290
291        // Add any nanoseconds smaller than a microsecond as an extra microsecond
292        // so as to comply with the `std::thread::sleep` contract which mandates
293        // implementations to sleep for _at least_ the provided `dur`.
294        // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
295        // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
296        // (i.e. < 1_000_000_000)
297        let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
298
299        while micros > 0 {
300            let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
301            unsafe {
302                libc::usleep(st);
303            }
304
305            micros -= st as u128;
306        }
307    }
308
309    // Any unix that has clock_nanosleep
310    // If this list changes update the MIRI chock_nanosleep shim
311    #[cfg(any(
312        target_os = "freebsd",
313        target_os = "netbsd",
314        target_os = "linux",
315        target_os = "android",
316        target_os = "solaris",
317        target_os = "illumos",
318        target_os = "dragonfly",
319        target_os = "hurd",
320        target_os = "fuchsia",
321        target_os = "vxworks",
322    ))]
323    pub fn sleep_until(deadline: Instant) {
324        let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
325            // The deadline is further in the future then can be passed to
326            // clock_nanosleep. We have to use Self::sleep instead. This might
327            // happen on 32 bit platforms, especially closer to 2038.
328            let now = Instant::now();
329            if let Some(delay) = deadline.checked_duration_since(now) {
330                Self::sleep(delay);
331            }
332            return;
333        };
334
335        unsafe {
336            // When we get interrupted (res = EINTR) call clock_nanosleep again
337            loop {
338                let res = libc::clock_nanosleep(
339                    super::time::Instant::CLOCK_ID,
340                    libc::TIMER_ABSTIME,
341                    &ts,
342                    core::ptr::null_mut(), // not required with TIMER_ABSTIME
343                );
344
345                if res == 0 {
346                    break;
347                } else {
348                    assert_eq!(
349                        res,
350                        libc::EINTR,
351                        "timespec is in range,
352                         clockid is valid and kernel should support it"
353                    );
354                }
355            }
356        }
357    }
358
359    // Any unix that does not have clock_nanosleep
360    #[cfg(not(any(
361        target_os = "freebsd",
362        target_os = "netbsd",
363        target_os = "linux",
364        target_os = "android",
365        target_os = "solaris",
366        target_os = "illumos",
367        target_os = "dragonfly",
368        target_os = "hurd",
369        target_os = "fuchsia",
370        target_os = "vxworks",
371    )))]
372    pub fn sleep_until(deadline: Instant) {
373        let now = Instant::now();
374        if let Some(delay) = deadline.checked_duration_since(now) {
375            Self::sleep(delay);
376        }
377    }
378
379    pub fn join(self) {
380        let id = self.into_id();
381        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
382        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
383    }
384
385    pub fn id(&self) -> libc::pthread_t {
386        self.id
387    }
388
389    pub fn into_id(self) -> libc::pthread_t {
390        ManuallyDrop::new(self).id
391    }
392}
393
394impl Drop for Thread {
395    fn drop(&mut self) {
396        let ret = unsafe { libc::pthread_detach(self.id) };
397        debug_assert_eq!(ret, 0);
398    }
399}
400
401#[cfg(any(
402    target_os = "linux",
403    target_os = "nto",
404    target_os = "solaris",
405    target_os = "illumos",
406    target_os = "vxworks",
407    target_os = "cygwin",
408    target_vendor = "apple",
409))]
410fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
411    let mut result = [0; MAX_WITH_NUL];
412    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
413        *dst = *src as libc::c_char;
414    }
415    result
416}
417
418pub fn available_parallelism() -> io::Result<NonZero<usize>> {
419    cfg_if::cfg_if! {
420        if #[cfg(any(
421            target_os = "android",
422            target_os = "emscripten",
423            target_os = "fuchsia",
424            target_os = "hurd",
425            target_os = "linux",
426            target_os = "aix",
427            target_vendor = "apple",
428            target_os = "cygwin",
429        ))] {
430            #[allow(unused_assignments)]
431            #[allow(unused_mut)]
432            let mut quota = usize::MAX;
433
434            #[cfg(any(target_os = "android", target_os = "linux"))]
435            {
436                quota = cgroups::quota().max(1);
437                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
438                unsafe {
439                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
440                        let count = libc::CPU_COUNT(&set) as usize;
441                        let count = count.min(quota);
442
443                        // According to sched_getaffinity's API it should always be non-zero, but
444                        // some old MIPS kernels were buggy and zero-initialized the mask if
445                        // none was explicitly set.
446                        // In that case we use the sysconf fallback.
447                        if let Some(count) = NonZero::new(count) {
448                            return Ok(count)
449                        }
450                    }
451                }
452            }
453            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
454                -1 => Err(io::Error::last_os_error()),
455                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
456                cpus => {
457                    let count = cpus as usize;
458                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
459                    let count = count.min(quota);
460                    Ok(unsafe { NonZero::new_unchecked(count) })
461                }
462            }
463        } else if #[cfg(any(
464                   target_os = "freebsd",
465                   target_os = "dragonfly",
466                   target_os = "openbsd",
467                   target_os = "netbsd",
468               ))] {
469            use crate::ptr;
470
471            #[cfg(target_os = "freebsd")]
472            {
473                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
474                unsafe {
475                    if libc::cpuset_getaffinity(
476                        libc::CPU_LEVEL_WHICH,
477                        libc::CPU_WHICH_PID,
478                        -1,
479                        size_of::<libc::cpuset_t>(),
480                        &mut set,
481                    ) == 0 {
482                        let count = libc::CPU_COUNT(&set) as usize;
483                        if count > 0 {
484                            return Ok(NonZero::new_unchecked(count));
485                        }
486                    }
487                }
488            }
489
490            #[cfg(target_os = "netbsd")]
491            {
492                unsafe {
493                    let set = libc::_cpuset_create();
494                    if !set.is_null() {
495                        let mut count: usize = 0;
496                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
497                            for i in 0..libc::cpuid_t::MAX {
498                                match libc::_cpuset_isset(i, set) {
499                                    -1 => break,
500                                    0 => continue,
501                                    _ => count = count + 1,
502                                }
503                            }
504                        }
505                        libc::_cpuset_destroy(set);
506                        if let Some(count) = NonZero::new(count) {
507                            return Ok(count);
508                        }
509                    }
510                }
511            }
512
513            let mut cpus: libc::c_uint = 0;
514            let mut cpus_size = size_of_val(&cpus);
515
516            unsafe {
517                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
518            }
519
520            // Fallback approach in case of errors or no hardware threads.
521            if cpus < 1 {
522                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
523                let res = unsafe {
524                    libc::sysctl(
525                        mib.as_mut_ptr(),
526                        2,
527                        (&raw mut cpus) as *mut _,
528                        (&raw mut cpus_size) as *mut _,
529                        ptr::null_mut(),
530                        0,
531                    )
532                };
533
534                // Handle errors if any.
535                if res == -1 {
536                    return Err(io::Error::last_os_error());
537                } else if cpus == 0 {
538                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
539                }
540            }
541
542            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
543        } else if #[cfg(target_os = "nto")] {
544            unsafe {
545                use libc::_syspage_ptr;
546                if _syspage_ptr.is_null() {
547                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
548                } else {
549                    let cpus = (*_syspage_ptr).num_cpu;
550                    NonZero::new(cpus as usize)
551                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
552                }
553            }
554        } else if #[cfg(any(target_os = "solaris", target_os = "illumos"))] {
555            let mut cpus = 0u32;
556            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
557                return Err(io::Error::UNKNOWN_THREAD_COUNT);
558            }
559            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
560        } else if #[cfg(target_os = "haiku")] {
561            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
562            // `get_system_info` calls then `smp_get_num_cpus`
563            unsafe {
564                let mut sinfo: libc::system_info = crate::mem::zeroed();
565                let res = libc::get_system_info(&mut sinfo);
566
567                if res != libc::B_OK {
568                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
569                }
570
571                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
572            }
573        } else if #[cfg(target_os = "vxworks")] {
574            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
575            // expectations than the actual cores availability.
576            unsafe extern "C" {
577                fn vxCpuEnabledGet() -> libc::cpuset_t;
578            }
579
580            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
581            unsafe{
582                let set = vxCpuEnabledGet();
583                Ok(NonZero::new_unchecked(set.count_ones() as usize))
584            }
585        } else {
586            // FIXME: implement on Redox, l4re
587            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
588        }
589    }
590}
591
592#[cfg(any(target_os = "android", target_os = "linux"))]
593mod cgroups {
594    //! Currently not covered
595    //! * cgroup v2 in non-standard mountpoints
596    //! * paths containing control characters or spaces, since those would be escaped in procfs
597    //!   output and we don't unescape
598
599    use crate::borrow::Cow;
600    use crate::ffi::OsString;
601    use crate::fs::{File, exists};
602    use crate::io::{BufRead, Read};
603    use crate::os::unix::ffi::OsStringExt;
604    use crate::path::{Path, PathBuf};
605    use crate::str::from_utf8;
606
607    #[derive(PartialEq)]
608    enum Cgroup {
609        V1,
610        V2,
611    }
612
613    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
614    /// be determined or is not set.
615    pub(super) fn quota() -> usize {
616        let mut quota = usize::MAX;
617        if cfg!(miri) {
618            // Attempting to open a file fails under default flags due to isolation.
619            // And Miri does not have parallelism anyway.
620            return quota;
621        }
622
623        let _: Option<()> = try {
624            let mut buf = Vec::with_capacity(128);
625            // find our place in the cgroup hierarchy
626            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
627            let (cgroup_path, version) =
628                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
629                    let mut fields = line.splitn(3, |&c| c == b':');
630                    // 2nd field is a list of controllers for v1 or empty for v2
631                    let version = match fields.nth(1) {
632                        Some(b"") => Cgroup::V2,
633                        Some(controllers)
634                            if from_utf8(controllers)
635                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
636                        {
637                            Cgroup::V1
638                        }
639                        _ => return previous,
640                    };
641
642                    // already-found v1 trumps v2 since it explicitly specifies its controllers
643                    if previous.is_some() && version == Cgroup::V2 {
644                        return previous;
645                    }
646
647                    let path = fields.last()?;
648                    // skip leading slash
649                    Some((path[1..].to_owned(), version))
650                })?;
651            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
652
653            quota = match version {
654                Cgroup::V1 => quota_v1(cgroup_path),
655                Cgroup::V2 => quota_v2(cgroup_path),
656            };
657        };
658
659        quota
660    }
661
662    fn quota_v2(group_path: PathBuf) -> usize {
663        let mut quota = usize::MAX;
664
665        let mut path = PathBuf::with_capacity(128);
666        let mut read_buf = String::with_capacity(20);
667
668        // standard mount location defined in file-hierarchy(7) manpage
669        let cgroup_mount = "/sys/fs/cgroup";
670
671        path.push(cgroup_mount);
672        path.push(&group_path);
673
674        path.push("cgroup.controllers");
675
676        // skip if we're not looking at cgroup2
677        if matches!(exists(&path), Err(_) | Ok(false)) {
678            return usize::MAX;
679        };
680
681        path.pop();
682
683        let _: Option<()> = try {
684            while path.starts_with(cgroup_mount) {
685                path.push("cpu.max");
686
687                read_buf.clear();
688
689                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
690                    let raw_quota = read_buf.lines().next()?;
691                    let mut raw_quota = raw_quota.split(' ');
692                    let limit = raw_quota.next()?;
693                    let period = raw_quota.next()?;
694                    match (limit.parse::<usize>(), period.parse::<usize>()) {
695                        (Ok(limit), Ok(period)) if period > 0 => {
696                            quota = quota.min(limit / period);
697                        }
698                        _ => {}
699                    }
700                }
701
702                path.pop(); // pop filename
703                path.pop(); // pop dir
704            }
705        };
706
707        quota
708    }
709
710    fn quota_v1(group_path: PathBuf) -> usize {
711        let mut quota = usize::MAX;
712        let mut path = PathBuf::with_capacity(128);
713        let mut read_buf = String::with_capacity(20);
714
715        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
716        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
717        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
718            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
719            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
720            // this can be expensive on systems with tons of mountpoints
721            // but we only get to this point when /proc/self/cgroups explicitly indicated
722            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
723            find_mountpoint,
724        ];
725
726        for mount in mounts {
727            let Some((mount, group_path)) = mount(&group_path) else { continue };
728
729            path.clear();
730            path.push(mount.as_ref());
731            path.push(&group_path);
732
733            // skip if we guessed the mount incorrectly
734            if matches!(exists(&path), Err(_) | Ok(false)) {
735                continue;
736            }
737
738            while path.starts_with(mount.as_ref()) {
739                let mut parse_file = |name| {
740                    path.push(name);
741                    read_buf.clear();
742
743                    let f = File::open(&path);
744                    path.pop(); // restore buffer before any early returns
745                    f.ok()?.read_to_string(&mut read_buf).ok()?;
746                    let parsed = read_buf.trim().parse::<usize>().ok()?;
747
748                    Some(parsed)
749                };
750
751                let limit = parse_file("cpu.cfs_quota_us");
752                let period = parse_file("cpu.cfs_period_us");
753
754                match (limit, period) {
755                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
756                    _ => {}
757                }
758
759                path.pop();
760            }
761
762            // we passed the try_exists above so we should have traversed the correct hierarchy
763            // when reaching this line
764            break;
765        }
766
767        quota
768    }
769
770    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
771    ///
772    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
773    /// over the already-included prefix
774    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
775        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
776        let mut line = String::with_capacity(256);
777        loop {
778            line.clear();
779            if reader.read_line(&mut line).ok()? == 0 {
780                break;
781            }
782
783            let line = line.trim();
784            let mut items = line.split(' ');
785
786            let sub_path = items.nth(3)?;
787            let mount_point = items.next()?;
788            let mount_opts = items.next_back()?;
789            let filesystem_type = items.nth_back(1)?;
790
791            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
792                // not a cgroup / not a cpu-controller
793                continue;
794            }
795
796            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
797
798            if !group_path.starts_with(sub_path) {
799                // this is a bind-mount and the bound subdirectory
800                // does not contain the cgroup this process belongs to
801                continue;
802            }
803
804            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
805
806            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
807        }
808
809        None
810    }
811}
812
813// glibc >= 2.15 has a __pthread_get_minstack() function that returns
814// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
815// We need that information to avoid blowing up when a small stack
816// is created in an application with big thread-local storage requirements.
817// See #6233 for rationale and details.
818#[cfg(all(target_os = "linux", target_env = "gnu"))]
819unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
820    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
821    // We shouldn't really be using such an internal symbol, but there's currently
822    // no other way to account for the TLS size.
823    dlsym!(
824        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
825    );
826
827    match __pthread_get_minstack.get() {
828        None => libc::PTHREAD_STACK_MIN,
829        Some(f) => unsafe { f(attr) },
830    }
831}
832
833// No point in looking up __pthread_get_minstack() on non-glibc platforms.
834#[cfg(all(
835    not(all(target_os = "linux", target_env = "gnu")),
836    not(any(target_os = "netbsd", target_os = "nuttx"))
837))]
838unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
839    libc::PTHREAD_STACK_MIN
840}
841
842#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
843unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
844    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
845
846    *STACK.get_or_init(|| {
847        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
848        if stack < 0 {
849            stack = 2048; // just a guess
850        }
851
852        stack as usize
853    })
854}