core/slice/sort/unstable/
mod.rs

1//! This module contains the entry points for `slice::sort_unstable`.
2
3use crate::mem::SizedTypeProperties;
4#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
5use crate::slice::sort::shared::find_existing_run;
6#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
7use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
8use crate::{cfg_match, intrinsics};
9
10pub(crate) mod heapsort;
11pub(crate) mod quicksort;
12
13/// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters.
14/// Design document:
15/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/ipnsort_introduction/text.md>
16///
17/// Upholds all safety properties outlined here:
18/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/sort_safety/text.md>
19#[inline(always)]
20pub fn sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
21    // Arrays of zero-sized types are always all-equal, and thus sorted.
22    if T::IS_ZST {
23        return;
24    }
25
26    // Instrumenting the standard library showed that 90+% of the calls to sort
27    // by rustc are either of size 0 or 1.
28    let len = v.len();
29    if intrinsics::likely(len < 2) {
30        return;
31    }
32
33    cfg_match! {
34        any(feature = "optimize_for_size", target_pointer_width = "16") => {
35            heapsort::heapsort(v, is_less);
36        }
37        _ => {
38            // More advanced sorting methods than insertion sort are faster if called in
39            // a hot loop for small inputs, but for general-purpose code the small
40            // binary size of insertion sort is more important. The instruction cache in
41            // modern processors is very valuable, and for a single sort call in general
42            // purpose code any gains from an advanced method are cancelled by i-cache
43            // misses during the sort, and thrashing the i-cache for surrounding code.
44            const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20;
45            if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) {
46                insertion_sort_shift_left(v, 1, is_less);
47                return;
48            }
49
50            ipnsort(v, is_less);
51        }
52    }
53}
54
55/// See [`sort`]
56///
57/// Deliberately don't inline the main sorting routine entrypoint to ensure the
58/// inlined insertion sort i-cache footprint remains minimal.
59#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
60#[inline(never)]
61fn ipnsort<T, F>(v: &mut [T], is_less: &mut F)
62where
63    F: FnMut(&T, &T) -> bool,
64{
65    let len = v.len();
66    let (run_len, was_reversed) = find_existing_run(v, is_less);
67
68    // SAFETY: find_existing_run promises to return a valid run_len.
69    unsafe { intrinsics::assume(run_len <= len) };
70
71    if run_len == len {
72        if was_reversed {
73            v.reverse();
74        }
75
76        // It would be possible to a do in-place merging here for a long existing streak. But that
77        // makes the implementation a lot bigger, users can use `slice::sort` for that use-case.
78        return;
79    }
80
81    // Limit the number of imbalanced partitions to `2 * floor(log2(len))`.
82    // The binary OR by one is used to eliminate the zero-check in the logarithm.
83    let limit = 2 * (len | 1).ilog2();
84    crate::slice::sort::unstable::quicksort::quicksort(v, None, limit, is_less);
85}