core/num/int_sqrt.rs
1//! These functions use the [Karatsuba square root algorithm][1] to compute the
2//! [integer square root](https://en.wikipedia.org/wiki/Integer_square_root)
3//! for the primitive integer types.
4//!
5//! The signed integer functions can only handle **nonnegative** inputs, so
6//! that must be checked before calling those.
7//!
8//! [1]: <https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf>
9//! "Paul Zimmermann. Karatsuba Square Root. \[Research Report\] RR-3805,
10//! INRIA. 1999, pp.8. (inria-00072854)"
11
12/// This array stores the [integer square roots](
13/// https://en.wikipedia.org/wiki/Integer_square_root) and remainders of each
14/// [`u8`](prim@u8) value. For example, `U8_ISQRT_WITH_REMAINDER[17]` will be
15/// `(4, 1)` because the integer square root of 17 is 4 and because 17 is 1
16/// higher than 4 squared.
17const U8_ISQRT_WITH_REMAINDER: [(u8, u8); 256] = {
18 let mut result = [(0, 0); 256];
19
20 let mut n: usize = 0;
21 let mut isqrt_n: usize = 0;
22 while n < result.len() {
23 result[n] = (isqrt_n as u8, (n - isqrt_n.pow(2)) as u8);
24
25 n += 1;
26 if n == (isqrt_n + 1).pow(2) {
27 isqrt_n += 1;
28 }
29 }
30
31 result
32};
33
34/// Returns the [integer square root](
35/// https://en.wikipedia.org/wiki/Integer_square_root) of any [`u8`](prim@u8)
36/// input.
37#[must_use = "this returns the result of the operation, \
38 without modifying the original"]
39#[inline]
40pub(super) const fn u8(n: u8) -> u8 {
41 U8_ISQRT_WITH_REMAINDER[n as usize].0
42}
43
44/// Generates an `i*` function that returns the [integer square root](
45/// https://en.wikipedia.org/wiki/Integer_square_root) of any **nonnegative**
46/// input of a specific signed integer type.
47macro_rules! signed_fn {
48 ($SignedT:ident, $UnsignedT:ident) => {
49 /// Returns the [integer square root](
50 /// https://en.wikipedia.org/wiki/Integer_square_root) of any
51 /// **nonnegative**
52 #[doc = concat!("[`", stringify!($SignedT), "`](prim@", stringify!($SignedT), ")")]
53 /// input.
54 ///
55 /// # Safety
56 ///
57 /// This results in undefined behavior when the input is negative.
58 #[must_use = "this returns the result of the operation, \
59 without modifying the original"]
60 #[inline]
61 pub(super) const unsafe fn $SignedT(n: $SignedT) -> $SignedT {
62 debug_assert!(n >= 0, "Negative input inside `isqrt`.");
63 $UnsignedT(n as $UnsignedT) as $SignedT
64 }
65 };
66}
67
68signed_fn!(i8, u8);
69signed_fn!(i16, u16);
70signed_fn!(i32, u32);
71signed_fn!(i64, u64);
72signed_fn!(i128, u128);
73
74/// Generates a `u*` function that returns the [integer square root](
75/// https://en.wikipedia.org/wiki/Integer_square_root) of any input of
76/// a specific unsigned integer type.
77macro_rules! unsigned_fn {
78 ($UnsignedT:ident, $HalfBitsT:ident, $stages:ident) => {
79 /// Returns the [integer square root](
80 /// https://en.wikipedia.org/wiki/Integer_square_root) of any
81 #[doc = concat!("[`", stringify!($UnsignedT), "`](prim@", stringify!($UnsignedT), ")")]
82 /// input.
83 #[must_use = "this returns the result of the operation, \
84 without modifying the original"]
85 #[inline]
86 pub(super) const fn $UnsignedT(mut n: $UnsignedT) -> $UnsignedT {
87 if n <= <$HalfBitsT>::MAX as $UnsignedT {
88 $HalfBitsT(n as $HalfBitsT) as $UnsignedT
89 } else {
90 // The normalization shift satisfies the Karatsuba square root
91 // algorithm precondition "a₃ ≥ b/4" where a₃ is the most
92 // significant quarter of `n`'s bits and b is the number of
93 // values that can be represented by that quarter of the bits.
94 //
95 // b/4 would then be all 0s except the second most significant
96 // bit (010...0) in binary. Since a₃ must be at least b/4, a₃'s
97 // most significant bit or its neighbor must be a 1. Since a₃'s
98 // most significant bits are `n`'s most significant bits, the
99 // same applies to `n`.
100 //
101 // The reason to shift by an even number of bits is because an
102 // even number of bits produces the square root shifted to the
103 // left by half of the normalization shift:
104 //
105 // sqrt(n << (2 * p))
106 // sqrt(2.pow(2 * p) * n)
107 // sqrt(2.pow(2 * p)) * sqrt(n)
108 // 2.pow(p) * sqrt(n)
109 // sqrt(n) << p
110 //
111 // Shifting by an odd number of bits leaves an ugly sqrt(2)
112 // multiplied in:
113 //
114 // sqrt(n << (2 * p + 1))
115 // sqrt(2.pow(2 * p + 1) * n)
116 // sqrt(2 * 2.pow(2 * p) * n)
117 // sqrt(2) * sqrt(2.pow(2 * p)) * sqrt(n)
118 // sqrt(2) * 2.pow(p) * sqrt(n)
119 // sqrt(2) * (sqrt(n) << p)
120 const EVEN_MAKING_BITMASK: u32 = !1;
121 let normalization_shift = n.leading_zeros() & EVEN_MAKING_BITMASK;
122 n <<= normalization_shift;
123
124 let s = $stages(n);
125
126 let denormalization_shift = normalization_shift >> 1;
127 s >> denormalization_shift
128 }
129 }
130 };
131}
132
133/// Generates the first stage of the computation after normalization.
134///
135/// # Safety
136///
137/// `$n` must be nonzero.
138macro_rules! first_stage {
139 ($original_bits:literal, $n:ident) => {{
140 debug_assert!($n != 0, "`$n` is zero in `first_stage!`.");
141
142 const N_SHIFT: u32 = $original_bits - 8;
143 let n = $n >> N_SHIFT;
144
145 let (s, r) = U8_ISQRT_WITH_REMAINDER[n as usize];
146
147 // Inform the optimizer that `s` is nonzero. This will allow it to
148 // avoid generating code to handle division-by-zero panics in the next
149 // stage.
150 //
151 // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn`
152 // macro recurses instead of continuing to this point, so the original
153 // `$n` wasn't a 0 if we've reached here.
154 //
155 // Then the `unsigned_fn` macro normalizes `$n` so that at least one of
156 // its two most-significant bits is a 1.
157 //
158 // Then this stage puts the eight most-significant bits of `$n` into
159 // `n`. This means that `n` here has at least one 1 bit in its two
160 // most-significant bits, making `n` nonzero.
161 //
162 // `U8_ISQRT_WITH_REMAINDER[n as usize]` will give a nonzero `s` when
163 // given a nonzero `n`.
164 unsafe { crate::hint::assert_unchecked(s != 0) };
165 (s, r)
166 }};
167}
168
169/// Generates a middle stage of the computation.
170///
171/// # Safety
172///
173/// `$s` must be nonzero.
174macro_rules! middle_stage {
175 ($original_bits:literal, $ty:ty, $n:ident, $s:ident, $r:ident) => {{
176 debug_assert!($s != 0, "`$s` is zero in `middle_stage!`.");
177
178 const N_SHIFT: u32 = $original_bits - <$ty>::BITS;
179 let n = ($n >> N_SHIFT) as $ty;
180
181 const HALF_BITS: u32 = <$ty>::BITS >> 1;
182 const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
183 const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
184 const LOWEST_QUARTER_1_BITS: $ty = (1 << QUARTER_BITS) - 1;
185
186 let lo = n & LOWER_HALF_1_BITS;
187 let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
188 let denominator = ($s as $ty) << 1;
189 let q = numerator / denominator;
190 let u = numerator % denominator;
191
192 let mut s = ($s << QUARTER_BITS) as $ty + q;
193 let (mut r, overflow) =
194 ((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q);
195 if overflow {
196 r = r.wrapping_add(2 * s - 1);
197 s -= 1;
198 }
199
200 // Inform the optimizer that `s` is nonzero. This will allow it to
201 // avoid generating code to handle division-by-zero panics in the next
202 // stage.
203 //
204 // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn`
205 // macro recurses instead of continuing to this point, so the original
206 // `$n` wasn't a 0 if we've reached here.
207 //
208 // Then the `unsigned_fn` macro normalizes `$n` so that at least one of
209 // its two most-significant bits is a 1.
210 //
211 // Then these stages take as many of the most-significant bits of `$n`
212 // as will fit in this stage's type. For example, the stage that
213 // handles `u32` deals with the 32 most-significant bits of `$n`. This
214 // means that each stage has at least one 1 bit in `n`'s two
215 // most-significant bits, making `n` nonzero.
216 //
217 // Then this stage will produce the correct integer square root for
218 // that `n` value. Since `n` is nonzero, `s` will also be nonzero.
219 unsafe { crate::hint::assert_unchecked(s != 0) };
220 (s, r)
221 }};
222}
223
224/// Generates the last stage of the computation before denormalization.
225///
226/// # Safety
227///
228/// `$s` must be nonzero.
229macro_rules! last_stage {
230 ($ty:ty, $n:ident, $s:ident, $r:ident) => {{
231 debug_assert!($s != 0, "`$s` is zero in `last_stage!`.");
232
233 const HALF_BITS: u32 = <$ty>::BITS >> 1;
234 const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
235 const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
236
237 let lo = $n & LOWER_HALF_1_BITS;
238 let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
239 let denominator = ($s as $ty) << 1;
240
241 let q = numerator / denominator;
242 let mut s = ($s << QUARTER_BITS) as $ty + q;
243 let (s_squared, overflow) = s.overflowing_mul(s);
244 if overflow || s_squared > $n {
245 s -= 1;
246 }
247 s
248 }};
249}
250
251/// Takes the normalized [`u16`](prim@u16) input and gets its normalized
252/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
253///
254/// # Safety
255///
256/// `n` must be nonzero.
257#[inline]
258const fn u16_stages(n: u16) -> u16 {
259 let (s, r) = first_stage!(16, n);
260 last_stage!(u16, n, s, r)
261}
262
263/// Takes the normalized [`u32`](prim@u32) input and gets its normalized
264/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
265///
266/// # Safety
267///
268/// `n` must be nonzero.
269#[inline]
270const fn u32_stages(n: u32) -> u32 {
271 let (s, r) = first_stage!(32, n);
272 let (s, r) = middle_stage!(32, u16, n, s, r);
273 last_stage!(u32, n, s, r)
274}
275
276/// Takes the normalized [`u64`](prim@u64) input and gets its normalized
277/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
278///
279/// # Safety
280///
281/// `n` must be nonzero.
282#[inline]
283const fn u64_stages(n: u64) -> u64 {
284 let (s, r) = first_stage!(64, n);
285 let (s, r) = middle_stage!(64, u16, n, s, r);
286 let (s, r) = middle_stage!(64, u32, n, s, r);
287 last_stage!(u64, n, s, r)
288}
289
290/// Takes the normalized [`u128`](prim@u128) input and gets its normalized
291/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root).
292///
293/// # Safety
294///
295/// `n` must be nonzero.
296#[inline]
297const fn u128_stages(n: u128) -> u128 {
298 let (s, r) = first_stage!(128, n);
299 let (s, r) = middle_stage!(128, u16, n, s, r);
300 let (s, r) = middle_stage!(128, u32, n, s, r);
301 let (s, r) = middle_stage!(128, u64, n, s, r);
302 last_stage!(u128, n, s, r)
303}
304
305unsigned_fn!(u16, u8, u16_stages);
306unsigned_fn!(u32, u16, u32_stages);
307unsigned_fn!(u64, u32, u64_stages);
308unsigned_fn!(u128, u64, u128_stages);
309
310/// Instantiate this panic logic once, rather than for all the isqrt methods
311/// on every single primitive type.
312#[cold]
313#[track_caller]
314pub(super) const fn panic_for_negative_argument() -> ! {
315 panic!("argument of integer square root cannot be negative")
316}