Skip to main content

rustfmt_nightly/
sort.rs

1use itertools::EitherOrBoth;
2use itertools::Itertools;
3
4/// Iterator which breaks an identifier into various [VersionChunk]s.
5struct VersionChunkIter<'a> {
6    ident: &'a str,
7    start: usize,
8}
9
10impl<'a> VersionChunkIter<'a> {
11    pub(crate) fn new(ident: &'a str) -> Self {
12        Self { ident, start: 0 }
13    }
14
15    fn parse_numeric_chunk(
16        &mut self,
17        mut chars: std::str::CharIndices<'a>,
18    ) -> Option<VersionChunk<'a>> {
19        let mut end = self.start;
20        let mut is_end_of_chunk = false;
21
22        while let Some((idx, c)) = chars.next() {
23            end = self.start + idx;
24
25            if c.is_ascii_digit() {
26                continue;
27            }
28
29            is_end_of_chunk = true;
30            break;
31        }
32
33        let source = if is_end_of_chunk {
34            let value = &self.ident[self.start..end];
35            self.start = end;
36            value
37        } else {
38            let value = &self.ident[self.start..];
39            self.start = self.ident.len();
40            value
41        };
42
43        let zeros = source.chars().take_while(|c| *c == '0').count();
44        let value = source.parse::<usize>().ok()?;
45
46        Some(VersionChunk::Number {
47            value,
48            zeros,
49            source,
50        })
51    }
52
53    fn parse_str_chunk(
54        &mut self,
55        mut chars: std::str::CharIndices<'a>,
56    ) -> Option<VersionChunk<'a>> {
57        let mut end = self.start;
58        let mut is_end_of_chunk = false;
59
60        while let Some((idx, c)) = chars.next() {
61            end = self.start + idx;
62
63            if c == '_' {
64                is_end_of_chunk = true;
65                break;
66            }
67
68            if !c.is_ascii_digit() {
69                continue;
70            }
71
72            is_end_of_chunk = true;
73            break;
74        }
75
76        let source = if is_end_of_chunk {
77            let value = &self.ident[self.start..end];
78            self.start = end;
79            value
80        } else {
81            let value = &self.ident[self.start..];
82            self.start = self.ident.len();
83            value
84        };
85
86        Some(VersionChunk::Str(source))
87    }
88}
89
90impl<'a> Iterator for VersionChunkIter<'a> {
91    type Item = VersionChunk<'a>;
92
93    fn next(&mut self) -> Option<Self::Item> {
94        let mut chars = self.ident[self.start..].char_indices();
95        let (_, next) = chars.next()?;
96
97        if next == '_' {
98            self.start = self.start + next.len_utf8();
99            return Some(VersionChunk::Underscore);
100        }
101
102        if next.is_ascii_digit() {
103            return self.parse_numeric_chunk(chars);
104        }
105
106        self.parse_str_chunk(chars)
107    }
108}
109
110/// Represents a chunk in the version-sort algorithm
111#[derive(Debug, PartialEq, Eq)]
112enum VersionChunk<'a> {
113    /// A single `_` in an identifier. Underscores are sorted before all other characters.
114    Underscore,
115    /// A &str chunk in the version sort.
116    Str(&'a str),
117    /// A numeric chunk in the version sort. Keeps track of the numeric value and leading zeros.
118    Number {
119        value: usize,
120        zeros: usize,
121        source: &'a str,
122    },
123}
124
125/// Determine which side of the version-sort comparison had more leading zeros.
126#[derive(Debug, PartialEq, Eq)]
127enum MoreLeadingZeros {
128    Left,
129    Right,
130    Equal,
131}
132
133/// Compare two identifiers based on the version sorting algorithm described in [the style guide]
134///
135/// [the style guide]: https://doc.rust-lang.org/nightly/style-guide/#sorting
136pub(crate) fn version_sort(a: &str, b: &str) -> std::cmp::Ordering {
137    let iter_a = VersionChunkIter::new(a);
138    let iter_b = VersionChunkIter::new(b);
139    let mut more_leading_zeros = MoreLeadingZeros::Equal;
140
141    for either_or_both in iter_a.zip_longest(iter_b) {
142        match either_or_both {
143            EitherOrBoth::Left(_) => return std::cmp::Ordering::Greater,
144            EitherOrBoth::Right(_) => return std::cmp::Ordering::Less,
145            EitherOrBoth::Both(a, b) => match (a, b) {
146                (VersionChunk::Underscore, VersionChunk::Underscore) => {
147                    continue;
148                }
149                (VersionChunk::Underscore, _) => return std::cmp::Ordering::Less,
150                (_, VersionChunk::Underscore) => return std::cmp::Ordering::Greater,
151                (VersionChunk::Str(ca), VersionChunk::Str(cb))
152                | (VersionChunk::Str(ca), VersionChunk::Number { source: cb, .. })
153                | (VersionChunk::Number { source: ca, .. }, VersionChunk::Str(cb)) => {
154                    match ca.cmp(&cb) {
155                        std::cmp::Ordering::Equal => {
156                            continue;
157                        }
158                        order @ _ => return order,
159                    }
160                }
161                (
162                    VersionChunk::Number {
163                        value: va,
164                        zeros: lza,
165                        ..
166                    },
167                    VersionChunk::Number {
168                        value: vb,
169                        zeros: lzb,
170                        ..
171                    },
172                ) => match va.cmp(&vb) {
173                    std::cmp::Ordering::Equal => {
174                        if lza == lzb {
175                            continue;
176                        }
177
178                        if more_leading_zeros == MoreLeadingZeros::Equal && lza > lzb {
179                            more_leading_zeros = MoreLeadingZeros::Left;
180                        } else if more_leading_zeros == MoreLeadingZeros::Equal && lza < lzb {
181                            more_leading_zeros = MoreLeadingZeros::Right;
182                        }
183                        continue;
184                    }
185                    order @ _ => return order,
186                },
187            },
188        }
189    }
190
191    match more_leading_zeros {
192        MoreLeadingZeros::Equal => std::cmp::Ordering::Equal,
193        MoreLeadingZeros::Left => std::cmp::Ordering::Less,
194        MoreLeadingZeros::Right => std::cmp::Ordering::Greater,
195    }
196}
197
198#[cfg(test)]
199mod test {
200    use super::*;
201
202    #[test]
203    fn test_chunks() {
204        let mut iter = VersionChunkIter::new("x86_128");
205        assert_eq!(iter.next(), Some(VersionChunk::Str("x")));
206        assert_eq!(
207            iter.next(),
208            Some(VersionChunk::Number {
209                value: 86,
210                zeros: 0,
211                source: "86"
212            })
213        );
214        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
215        assert_eq!(
216            iter.next(),
217            Some(VersionChunk::Number {
218                value: 128,
219                zeros: 0,
220                source: "128"
221            })
222        );
223        assert_eq!(iter.next(), None);
224
225        let mut iter = VersionChunkIter::new("w005s09t");
226        assert_eq!(iter.next(), Some(VersionChunk::Str("w")));
227        assert_eq!(
228            iter.next(),
229            Some(VersionChunk::Number {
230                value: 5,
231                zeros: 2,
232                source: "005"
233            })
234        );
235        assert_eq!(iter.next(), Some(VersionChunk::Str("s")));
236        assert_eq!(
237            iter.next(),
238            Some(VersionChunk::Number {
239                value: 9,
240                zeros: 1,
241                source: "09"
242            })
243        );
244        assert_eq!(iter.next(), Some(VersionChunk::Str("t")));
245        assert_eq!(iter.next(), None);
246
247        let mut iter = VersionChunkIter::new("ZY_WX");
248        assert_eq!(iter.next(), Some(VersionChunk::Str("ZY")));
249        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
250        assert_eq!(iter.next(), Some(VersionChunk::Str("WX")));
251
252        let mut iter = VersionChunkIter::new("_v1");
253        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
254        assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
255        assert_eq!(
256            iter.next(),
257            Some(VersionChunk::Number {
258                value: 1,
259                zeros: 0,
260                source: "1"
261            })
262        );
263
264        let mut iter = VersionChunkIter::new("_1v");
265        assert_eq!(iter.next(), Some(VersionChunk::Underscore));
266        assert_eq!(
267            iter.next(),
268            Some(VersionChunk::Number {
269                value: 1,
270                zeros: 0,
271                source: "1"
272            })
273        );
274        assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
275
276        let mut iter = VersionChunkIter::new("v009");
277        assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
278        assert_eq!(
279            iter.next(),
280            Some(VersionChunk::Number {
281                value: 9,
282                zeros: 2,
283                source: "009"
284            })
285        );
286
287        // '๙' = U+0E59 THAI DIGIT NINE, General Category Nd
288        let mut iter = VersionChunkIter::new("x๙v");
289        assert_eq!(iter.next(), Some(VersionChunk::Str("x๙v")));
290    }
291
292    #[test]
293    fn test_version_sort() {
294        let mut input = vec!["", "b", "a"];
295        let expected = vec!["", "a", "b"];
296        input.sort_by(|a, b| version_sort(a, b));
297        assert_eq!(input, expected);
298
299        let mut input = vec!["x7x", "xxx"];
300        let expected = vec!["x7x", "xxx"];
301        input.sort_by(|a, b| version_sort(a, b));
302        assert_eq!(input, expected);
303
304        let mut input = vec!["x๙x", "xéx", "x0x"];
305        let expected = vec!["x0x", "xéx", "x๙x"];
306        input.sort_by(|a, b| version_sort(a, b));
307        assert_eq!(input, expected);
308
309        let mut input = vec!["applesauce", "apple"];
310        let expected = vec!["apple", "applesauce"];
311        input.sort_by(|a, b| version_sort(a, b));
312        assert_eq!(input, expected);
313
314        let mut input = vec!["aaaaa", "aaa_a"];
315        let expected = vec!["aaa_a", "aaaaa"];
316        input.sort_by(|a, b| version_sort(a, b));
317        assert_eq!(input, expected);
318
319        let mut input = vec!["AAAAA", "AAA1A", "BBBBB", "BB_BB", "C3CCC"];
320        let expected = vec!["AAA1A", "AAAAA", "BB_BB", "BBBBB", "C3CCC"];
321        input.sort_by(|a, b| version_sort(a, b));
322        assert_eq!(input, expected);
323
324        let mut input = vec!["1_000_000", "1_010_001"];
325        let expected = vec!["1_000_000", "1_010_001"];
326        input.sort_by(|a, b| version_sort(a, b));
327        assert_eq!(input, expected);
328
329        let mut input = vec![
330            "5", "50", "500", "5_000", "5_005", "5_050", "5_500", "50_000", "50_005", "50_050",
331            "50_500",
332        ];
333        let expected = vec![
334            "5", "5_000", "5_005", "5_050", "5_500", "50", "50_000", "50_005", "50_050", "50_500",
335            "500",
336        ];
337        input.sort_by(|a, b| version_sort(a, b));
338        assert_eq!(input, expected);
339
340        let mut input = vec!["X86_64", "x86_64", "X86_128", "x86_128"];
341        let expected = vec!["X86_64", "X86_128", "x86_64", "x86_128"];
342        input.sort_by(|a, b| version_sort(a, b));
343        assert_eq!(input, expected);
344
345        let mut input = vec!["__", "_"];
346        let expected = vec!["_", "__"];
347        input.sort_by(|a, b| version_sort(a, b));
348        assert_eq!(input, expected);
349
350        let mut input = vec!["foo_", "foo"];
351        let expected = vec!["foo", "foo_"];
352        input.sort_by(|a, b| version_sort(a, b));
353        assert_eq!(input, expected);
354
355        let mut input = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
356        let expected = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
357        input.sort_by(|a, b| version_sort(a, b));
358        assert_eq!(input, expected);
359
360        let mut input = vec![
361            "x86_128", "usize", "uz", "v000", "v00", "v0", "v0s", "v00t", "v0u", "v001", "v01",
362            "v1", "v009", "x87", "zyxw", "_ZYXW", "_abcd", "A2", "ABCD", "Z_YXW", "ZY_XW", "ZY_XW",
363            "ZYXW", "v09", "v9", "v010", "v10", "w005s09t", "w5s009t", "x64", "x86", "x86_32",
364            "ua", "x86_64", "ZYXW_", "a1", "abcd", "u_zzz", "u8", "u16", "u32", "u64", "u128",
365            "u256",
366        ];
367        let expected = vec![
368            "_ZYXW", "_abcd", "A2", "ABCD", "Z_YXW", "ZY_XW", "ZY_XW", "ZYXW", "ZYXW_", "a1",
369            "abcd", "u_zzz", "u8", "u16", "u32", "u64", "u128", "u256", "ua", "usize", "uz",
370            "v000", "v00", "v0", "v0s", "v00t", "v0u", "v001", "v01", "v1", "v009", "v09", "v9",
371            "v010", "v10", "w005s09t", "w5s009t", "x64", "x86", "x86_32", "x86_64", "x86_128",
372            "x87", "zyxw",
373        ];
374        input.sort_by(|a, b| version_sort(a, b));
375        assert_eq!(input, expected)
376    }
377}