rustfmt_nightly/
sort.rs

1use itertools::EitherOrBoth;
2use itertools::Itertools;
3
4/// Iterator which breaks an identifier into various [VersionChunk]s.
5struct VersionChunkIter<'a> {
6    ident: &'a str,
7    start: usize,
8}
9
10impl<'a> VersionChunkIter<'a> {
11    pub(crate) fn new(ident: &'a str) -> Self {
12        Self { ident, start: 0 }
13    }
14
15    fn parse_numeric_chunk(
16        &mut self,
17        mut chars: std::str::CharIndices<'a>,
18    ) -> Option<VersionChunk<'a>> {
19        let mut end = self.start;
20        let mut is_end_of_chunk = false;
21
22        while let Some((idx, c)) = chars.next() {
23            end = self.start + idx;
24
25            if c.is_ascii_digit() {
26                continue;
27            }
28
29            is_end_of_chunk = true;
30            break;
31        }
32
33        let source = if is_end_of_chunk {
34            let value = &self.ident[self.start..end];
35            self.start = end;
36            value
37        } else {
38            let value = &self.ident[self.start..];
39            self.start = self.ident.len();
40            value
41        };
42
43        let zeros = source.chars().take_while(|c| *c == '0').count();
44        let value = source.parse::<usize>().ok()?;
45
46        Some(VersionChunk::Number {
47            value,
48            zeros,
49            source,
50        })
51    }
52
53    fn parse_str_chunk(
54        &mut self,
55        mut chars: std::str::CharIndices<'a>,
56    ) -> Option<VersionChunk<'a>> {
57        let mut end = self.start;
58        let mut is_end_of_chunk = false;
59
60        while let Some((idx, c)) = chars.next() {
61            end = self.start + idx;
62
63            if c == '_' {
64                is_end_of_chunk = true;
65                break;
66            }
67
68            if !c.is_numeric() {
69                continue;
70            }
71
72            is_end_of_chunk = true;
73            break;
74        }
75
76        let source = if is_end_of_chunk {
77            let value = &self.ident[self.start..end];
78            self.start = end;
79            value
80        } else {
81            let value = &self.ident[self.start..];
82            self.start = self.ident.len();
83            value
84        };
85
86        Some(VersionChunk::Str(source))
87    }
88}
89
90impl<'a> Iterator for VersionChunkIter<'a> {
91    type Item = VersionChunk<'a>;
92
93    fn next(&mut self) -> Option<Self::Item> {
94        let mut chars = self.ident[self.start..].char_indices();
95        let (_, next) = chars.next()?;
96
97        if next == '_' {
98            self.start = self.start + next.len_utf8();
99            return Some(VersionChunk::Underscore);
100        }
101
102        if next.is_ascii_digit() {
103            return self.parse_numeric_chunk(chars);
104        }
105
106        self.parse_str_chunk(chars)
107    }
108}
109
110/// Represents a chunk in the version-sort algorithm
111#[derive(Debug, PartialEq, Eq)]
112enum VersionChunk<'a> {
113    /// A single `_` in an identifier. Underscores are sorted before all other characters.
114    Underscore,
115    /// A &str chunk in the version sort.
116    Str(&'a str),
117    /// A numeric chunk in the version sort. Keeps track of the numeric value and leading zeros.
118    Number {
119        value: usize,
120        zeros: usize,
121        source: &'a str,
122    },
123}
124
125/// Determine which side of the version-sort comparison had more leading zeros.
126#[derive(Debug, PartialEq, Eq)]
127enum MoreLeadingZeros {
128    Left,
129    Right,
130    Equal,
131}
132
133/// Compare two identifiers based on the version sorting algorithm described in [the style guide]
134///
135/// [the style guide]: https://doc.rust-lang.org/nightly/style-guide/#sorting
136pub(crate) fn version_sort(a: &str, b: &str) -> std::cmp::Ordering {
137    let iter_a = VersionChunkIter::new(a);
138    let iter_b = VersionChunkIter::new(b);
139    let mut more_leading_zeros = MoreLeadingZeros::Equal;
140
141    for either_or_both in iter_a.zip_longest(iter_b) {
142        match either_or_both {
143            EitherOrBoth::Left(_) => return std::cmp::Ordering::Greater,
144            EitherOrBoth::Right(_) => return std::cmp::Ordering::Less,
145            EitherOrBoth::Both(a, b) => match (a, b) {
146                (VersionChunk::Underscore, VersionChunk::Underscore) => {
147                    continue;
148                }
149                (VersionChunk::Underscore, _) => return std::cmp::Ordering::Less,
150                (_, VersionChunk::Underscore) => return std::cmp::Ordering::Greater,
151                (VersionChunk::Str(ca), VersionChunk::Str(cb))
152                | (VersionChunk::Str(ca), VersionChunk::Number { source: cb, .. })
153                | (VersionChunk::Number { source: ca, .. }, VersionChunk::Str(cb)) => {
154                    match ca.cmp(&cb) {
155                        std::cmp::Ordering::Equal => {
156                            continue;
157                        }
158                        order @ _ => return order,
159                    }
160                }
161                (
162                    VersionChunk::Number {
163                        value: va,
164                        zeros: lza,
165                        ..
166                    },
167                    VersionChunk::Number {
168                        value: vb,
169                        zeros: lzb,
170                        ..
171                    },
172                ) => match va.cmp(&vb) {
173                    std::cmp::Ordering::Equal => {
174                        if lza == lzb {
175                            continue;
176                        }
177
178                        if more_leading_zeros == MoreLeadingZeros::Equal && lza > lzb {
179                            more_leading_zeros = MoreLeadingZeros::Left;
180                        } else if more_leading_zeros == MoreLeadingZeros::Equal && lza < lzb {
181                            more_leading_zeros = MoreLeadingZeros::Right;
182                        }
183                        continue;
184                    }
185                    order @ _ => return order,
186                },
187            },
188        }
189    }
190
191    match more_leading_zeros {
192        MoreLeadingZeros::Equal => std::cmp::Ordering::Equal,
193        MoreLeadingZeros::Left => std::cmp::Ordering::Less,
194        MoreLeadingZeros::Right => std::cmp::Ordering::Greater,
195    }
196}
197
198#[cfg(test)]
199mod test {
200    use super::*;
201
202    #[test]
203    fn test_chunks() {
204        let mut iter = VersionChunkIter::new("x86_128");
205        assert_eq!(iter.next(), Some(VersionChunk::Str("x")));
206        assert_eq!(
207            iter.next(),
208            Some(VersionChunk::Number {
209                value: 86,
210                zeros: 0,
211                source: "86"
212            })
213        );
214        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
215        assert_eq!(
216            iter.next(),
217            Some(VersionChunk::Number {
218                value: 128,
219                zeros: 0,
220                source: "128"
221            })
222        );
223        assert_eq!(iter.next(), None);
224
225        let mut iter = VersionChunkIter::new("w005s09t");
226        assert_eq!(iter.next(), Some(VersionChunk::Str("w")));
227        assert_eq!(
228            iter.next(),
229            Some(VersionChunk::Number {
230                value: 5,
231                zeros: 2,
232                source: "005"
233            })
234        );
235        assert_eq!(iter.next(), Some(VersionChunk::Str("s")));
236        assert_eq!(
237            iter.next(),
238            Some(VersionChunk::Number {
239                value: 9,
240                zeros: 1,
241                source: "09"
242            })
243        );
244        assert_eq!(iter.next(), Some(VersionChunk::Str("t")));
245        assert_eq!(iter.next(), None);
246
247        let mut iter = VersionChunkIter::new("ZY_WX");
248        assert_eq!(iter.next(), Some(VersionChunk::Str("ZY")));
249        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
250        assert_eq!(iter.next(), Some(VersionChunk::Str("WX")));
251
252        let mut iter = VersionChunkIter::new("_v1");
253        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
254        assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
255        assert_eq!(
256            iter.next(),
257            Some(VersionChunk::Number {
258                value: 1,
259                zeros: 0,
260                source: "1"
261            })
262        );
263
264        let mut iter = VersionChunkIter::new("_1v");
265        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
266        assert_eq!(
267            iter.next(),
268            Some(VersionChunk::Number {
269                value: 1,
270                zeros: 0,
271                source: "1"
272            })
273        );
274        assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
275
276        let mut iter = VersionChunkIter::new("v009");
277        assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
278        assert_eq!(
279            iter.next(),
280            Some(VersionChunk::Number {
281                value: 9,
282                zeros: 2,
283                source: "009"
284            })
285        );
286    }
287
288    #[test]
289    fn test_version_sort() {
290        let mut input = vec!["", "b", "a"];
291        let expected = vec!["", "a", "b"];
292        input.sort_by(|a, b| version_sort(a, b));
293        assert_eq!(input, expected);
294
295        let mut input = vec!["x7x", "xxx"];
296        let expected = vec!["x7x", "xxx"];
297        input.sort_by(|a, b| version_sort(a, b));
298        assert_eq!(input, expected);
299
300        let mut input = vec!["applesauce", "apple"];
301        let expected = vec!["apple", "applesauce"];
302        input.sort_by(|a, b| version_sort(a, b));
303        assert_eq!(input, expected);
304
305        let mut input = vec!["aaaaa", "aaa_a"];
306        let expected = vec!["aaa_a", "aaaaa"];
307        input.sort_by(|a, b| version_sort(a, b));
308        assert_eq!(input, expected);
309
310        let mut input = vec!["AAAAA", "AAA1A", "BBBBB", "BB_BB", "C3CCC"];
311        let expected = vec!["AAA1A", "AAAAA", "BB_BB", "BBBBB", "C3CCC"];
312        input.sort_by(|a, b| version_sort(a, b));
313        assert_eq!(input, expected);
314
315        let mut input = vec!["1_000_000", "1_010_001"];
316        let expected = vec!["1_000_000", "1_010_001"];
317        input.sort_by(|a, b| version_sort(a, b));
318        assert_eq!(input, expected);
319
320        let mut input = vec![
321            "5", "50", "500", "5_000", "5_005", "5_050", "5_500", "50_000", "50_005", "50_050",
322            "50_500",
323        ];
324        let expected = vec![
325            "5", "5_000", "5_005", "5_050", "5_500", "50", "50_000", "50_005", "50_050", "50_500",
326            "500",
327        ];
328        input.sort_by(|a, b| version_sort(a, b));
329        assert_eq!(input, expected);
330
331        let mut input = vec!["X86_64", "x86_64", "X86_128", "x86_128"];
332        let expected = vec!["X86_64", "X86_128", "x86_64", "x86_128"];
333        input.sort_by(|a, b| version_sort(a, b));
334        assert_eq!(input, expected);
335
336        let mut input = vec!["__", "_"];
337        let expected = vec!["_", "__"];
338        input.sort_by(|a, b| version_sort(a, b));
339        assert_eq!(input, expected);
340
341        let mut input = vec!["foo_", "foo"];
342        let expected = vec!["foo", "foo_"];
343        input.sort_by(|a, b| version_sort(a, b));
344        assert_eq!(input, expected);
345
346        let mut input = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
347        let expected = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
348        input.sort_by(|a, b| version_sort(a, b));
349        assert_eq!(input, expected);
350
351        let mut input = vec![
352            "x86_128", "usize", "uz", "v000", "v00", "v0", "v0s", "v00t", "v0u", "v001", "v01",
353            "v1", "v009", "x87", "zyxw", "_ZYXW", "_abcd", "A2", "ABCD", "Z_YXW", "ZY_XW", "ZY_XW",
354            "ZYXW", "v09", "v9", "v010", "v10", "w005s09t", "w5s009t", "x64", "x86", "x86_32",
355            "ua", "x86_64", "ZYXW_", "a1", "abcd", "u_zzz", "u8", "u16", "u32", "u64", "u128",
356            "u256",
357        ];
358        let expected = vec![
359            "_ZYXW", "_abcd", "A2", "ABCD", "Z_YXW", "ZY_XW", "ZY_XW", "ZYXW", "ZYXW_", "a1",
360            "abcd", "u_zzz", "u8", "u16", "u32", "u64", "u128", "u256", "ua", "usize", "uz",
361            "v000", "v00", "v0", "v0s", "v00t", "v0u", "v001", "v01", "v1", "v009", "v09", "v9",
362            "v010", "v10", "w005s09t", "w5s009t", "x64", "x86", "x86_32", "x86_64", "x86_128",
363            "x87", "zyxw",
364        ];
365        input.sort_by(|a, b| version_sort(a, b));
366        assert_eq!(input, expected)
367    }
368}