tidy/
alphabetical.rs

1//! Checks that a list of items is in alphabetical order
2//!
3//! Use the following marker in the code:
4//! ```rust
5//! // tidy-alphabetical-start
6//! fn aaa() {}
7//! fn eee() {}
8//! fn z() {}
9//! // tidy-alphabetical-end
10//! ```
11//!
12//! Empty lines and lines starting (ignoring spaces) with `//` or `#` (except those starting with
13//! `#!`) are considered comments are are sorted together with the next line (but do not affect
14//! sorting).
15//!
16//! If the following lines have higher indentation we effectively join them with the current line
17//! before comparing it. If the next line with the same indentation starts (ignoring spaces) with
18//! a closing delimiter (`)`, `[`, `}`) it is joined as well.
19//!
20//! E.g.
21//!
22//! ```rust,ignore ilustrative example for sorting mentioning non-existent functions
23//! foo(a,
24//!     b);
25//! bar(
26//!   a,
27//!   b
28//! );
29//! // are treated for sorting purposes as
30//! foo(a, b);
31//! bar(a, b);
32//! ```
33
34use std::cmp::Ordering;
35use std::fs;
36use std::io::{Seek, Write};
37use std::iter::Peekable;
38use std::ops::{Range, RangeBounds};
39use std::path::Path;
40
41use crate::diagnostics::{CheckId, RunningCheck, TidyCtx};
42use crate::walk::{filter_dirs, walk};
43
44#[cfg(test)]
45mod tests;
46
47fn indentation(line: &str) -> usize {
48    line.find(|c| c != ' ').unwrap_or(0)
49}
50
51fn is_close_bracket(c: char) -> bool {
52    matches!(c, ')' | ']' | '}')
53}
54
55fn is_empty_or_comment(line: &&str) -> bool {
56    let trimmed_line = line.trim_start_matches(' ').trim_end_matches('\n');
57
58    trimmed_line.is_empty()
59        || trimmed_line.starts_with("//")
60        || (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
61}
62
63const START_MARKER: &str = "tidy-alphabetical-start";
64const END_MARKER: &str = "tidy-alphabetical-end";
65
66/// Given contents of a section that is enclosed between [`START_MARKER`] and [`END_MARKER`], sorts
67/// them according to the rules described at the top of the module.
68fn sort_section(section: &str) -> String {
69    /// A sortable item
70    struct Item {
71        /// Full contents including comments and whitespace
72        full: String,
73        /// Trimmed contents for sorting
74        trimmed: String,
75    }
76
77    let mut items = Vec::new();
78    let mut lines = section.split_inclusive('\n').peekable();
79
80    let end_comments = loop {
81        let mut full = String::new();
82        let mut trimmed = String::new();
83
84        while let Some(comment) = lines.next_if(is_empty_or_comment) {
85            full.push_str(comment);
86        }
87
88        let Some(line) = lines.next() else {
89            // remember comments at the end of a block
90            break full;
91        };
92
93        let mut push = |line| {
94            full.push_str(line);
95            trimmed.push_str(line.trim_start_matches(' ').trim_end_matches('\n'))
96        };
97
98        push(line);
99
100        let indent = indentation(line);
101        let mut multiline = false;
102
103        // If the item is split between multiple lines...
104        while let Some(more_indented) =
105            lines.next_if(|&line: &&_| indent < indentation(line) || line == "\n")
106        {
107            multiline = true;
108            push(more_indented);
109        }
110
111        if multiline
112            && let Some(indented) =
113                // Only append next indented line if it looks like a closing bracket.
114                // Otherwise we incorrectly merge code like this (can be seen in
115                // compiler/rustc_session/src/options.rs):
116                //
117                // force_unwind_tables: Option<bool> = (None, parse_opt_bool, [TRACKED],
118                //     "force use of unwind tables"),
119                // incremental: Option<String> = (None, parse_opt_string, [UNTRACKED],
120                //     "enable incremental compilation"),
121                lines.next_if(|l| {
122                    indentation(l) == indent
123                        && l.trim_start_matches(' ').starts_with(is_close_bracket)
124                })
125        {
126            push(indented);
127        }
128
129        items.push(Item { full, trimmed });
130    };
131
132    items.sort_by(|a, b| version_sort(&a.trimmed, &b.trimmed));
133    items.into_iter().map(|l| l.full).chain([end_comments]).collect()
134}
135
136fn check_lines<'a>(path: &Path, content: &'a str, tidy_ctx: &TidyCtx, check: &mut RunningCheck) {
137    let mut offset = 0;
138
139    loop {
140        let rest = &content[offset..];
141        let start = rest.find(START_MARKER);
142        let end = rest.find(END_MARKER);
143
144        match (start, end) {
145            // error handling
146
147            // end before start
148            (Some(start), Some(end)) if end < start => {
149                check.error(format!(
150                    "{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
151                    path = path.display(),
152                    line_number = content[..offset + end].lines().count(),
153                ));
154                break;
155            }
156
157            // end without a start
158            (None, Some(end)) => {
159                check.error(format!(
160                    "{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
161                    path = path.display(),
162                    line_number = content[..offset + end].lines().count(),
163                ));
164                break;
165            }
166
167            // start without an end
168            (Some(start), None) => {
169                check.error(format!(
170                    "{path}:{line_number} `{START_MARKER}` without a matching `{END_MARKER}`",
171                    path = path.display(),
172                    line_number = content[..offset + start].lines().count(),
173                ));
174                break;
175            }
176
177            // a second start in between start/end pair
178            (Some(start), Some(end))
179                if rest[start + START_MARKER.len()..end].contains(START_MARKER) =>
180            {
181                check.error(format!(
182                    "{path}:{line_number} found `{START_MARKER}` expecting `{END_MARKER}`",
183                    path = path.display(),
184                    line_number = content[..offset
185                        + sub_find(rest, start + START_MARKER.len()..end, START_MARKER)
186                            .unwrap()
187                            .start]
188                        .lines()
189                        .count()
190                ));
191                break;
192            }
193
194            // happy happy path :3
195            (Some(start), Some(end)) => {
196                assert!(start <= end);
197
198                // "...␤// tidy-alphabetical-start␤...␤// tidy-alphabetical-end␤..."
199                //                  start_nl_end --^  ^-- end_nl_start          ^-- end_nl_end
200
201                // Position after the newline after start marker
202                let start_nl_end = sub_find(rest, start + START_MARKER.len().., "\n").unwrap().end;
203
204                // Position before the new line before the end marker
205                let end_nl_start = rest[..end].rfind('\n').unwrap();
206
207                // Position after the newline after end marker
208                let end_nl_end = sub_find(rest, end + END_MARKER.len().., "\n")
209                    .map(|r| r.end)
210                    .unwrap_or(content.len() - offset);
211
212                let section = &rest[start_nl_end..=end_nl_start];
213                let sorted = sort_section(section);
214
215                // oh nyooo :(
216                if sorted != section {
217                    if !tidy_ctx.is_bless_enabled() {
218                        let base_line_number = content[..offset + start_nl_end].lines().count();
219                        let line_offset = sorted
220                            .lines()
221                            .zip(section.lines())
222                            .enumerate()
223                            .find(|(_, (a, b))| a != b)
224                            .unwrap()
225                            .0;
226                        let line_number = base_line_number + line_offset;
227
228                        check.error(format!(
229                            "{path}:{line_number}: line not in alphabetical order (tip: use --bless to sort this list)",
230                            path = path.display(),
231                        ));
232                    } else {
233                        // Use atomic rename as to not corrupt the file upon crashes/ctrl+c
234                        let mut tempfile =
235                            tempfile::Builder::new().tempfile_in(path.parent().unwrap()).unwrap();
236
237                        fs::copy(path, tempfile.path()).unwrap();
238
239                        tempfile
240                            .as_file_mut()
241                            .seek(std::io::SeekFrom::Start((offset + start_nl_end) as u64))
242                            .unwrap();
243                        tempfile.as_file_mut().write_all(sorted.as_bytes()).unwrap();
244
245                        tempfile.persist(path).unwrap();
246                    }
247                }
248
249                // Start the next search after the end section
250                offset += end_nl_end;
251            }
252
253            // No more alphabetical lists, yay :3
254            (None, None) => break,
255        }
256    }
257}
258
259pub fn check(path: &Path, tidy_ctx: TidyCtx) {
260    let mut check = tidy_ctx.start_check(CheckId::new("alphabetical").path(path));
261
262    let skip = |path: &_, _is_dir| {
263        filter_dirs(path)
264            || path.ends_with("tidy/src/alphabetical.rs")
265            || path.ends_with("tidy/src/alphabetical/tests.rs")
266    };
267
268    walk(path, skip, &mut |entry, content| {
269        check_lines(entry.path(), content, &tidy_ctx, &mut check)
270    });
271}
272
273fn consume_numeric_prefix<I: Iterator<Item = char>>(it: &mut Peekable<I>) -> String {
274    let mut result = String::new();
275
276    while let Some(&c) = it.peek() {
277        if !c.is_numeric() {
278            break;
279        }
280
281        result.push(c);
282        it.next();
283    }
284
285    result
286}
287
288// A sorting function that is case-sensitive, and sorts sequences of digits by their numeric value,
289// so that `9` sorts before `12`.
290fn version_sort(a: &str, b: &str) -> Ordering {
291    let mut it1 = a.chars().peekable();
292    let mut it2 = b.chars().peekable();
293
294    while let (Some(x), Some(y)) = (it1.peek(), it2.peek()) {
295        match (x.is_numeric(), y.is_numeric()) {
296            (true, true) => {
297                let num1: String = consume_numeric_prefix(it1.by_ref());
298                let num2: String = consume_numeric_prefix(it2.by_ref());
299
300                let int1: u64 = num1.parse().unwrap();
301                let int2: u64 = num2.parse().unwrap();
302
303                // Compare strings when the numeric value is equal to handle "00" versus "0".
304                match int1.cmp(&int2).then_with(|| num1.cmp(&num2)) {
305                    Ordering::Equal => continue,
306                    different => return different,
307                }
308            }
309            (false, false) => match x.cmp(y) {
310                Ordering::Equal => {
311                    it1.next();
312                    it2.next();
313                    continue;
314                }
315                different => return different,
316            },
317            (false, true) | (true, false) => {
318                return x.cmp(y);
319            }
320        }
321    }
322
323    it1.next().cmp(&it2.next())
324}
325
326/// Finds `pat` in `s[range]` and returns a range such that `s[ret] == pat`.
327fn sub_find(s: &str, range: impl RangeBounds<usize>, pat: &str) -> Option<Range<usize>> {
328    s[(range.start_bound().cloned(), range.end_bound().cloned())]
329        .find(pat)
330        .map(|pos| {
331            pos + match range.start_bound().cloned() {
332                std::ops::Bound::Included(x) => x,
333                std::ops::Bound::Excluded(x) => x + 1,
334                std::ops::Bound::Unbounded => 0,
335            }
336        })
337        .map(|pos| pos..pos + pat.len())
338}