rustfmt_nightly/
comment.rs

1// Formatting and tools for comments.
2
3use std::{borrow::Cow, iter};
4
5use itertools::{Itertools as _, MultiPeek, multipeek};
6use rustc_span::Span;
7use tracing::{debug, trace};
8
9use crate::config::Config;
10use crate::rewrite::{RewriteContext, RewriteErrorExt, RewriteResult};
11use crate::shape::{Indent, Shape};
12use crate::string::{StringFormat, rewrite_string};
13use crate::utils::{
14    count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
15    trimmed_last_line_width, unicode_str_width,
16};
17use crate::{ErrorKind, FormattingError};
18
19fn is_custom_comment(comment: &str) -> bool {
20    if !comment.starts_with("//") {
21        false
22    } else if let Some(c) = comment.chars().nth(2) {
23        !c.is_alphanumeric() && !c.is_whitespace()
24    } else {
25        false
26    }
27}
28
29#[derive(Copy, Clone, PartialEq, Eq)]
30pub(crate) enum CommentStyle<'a> {
31    DoubleSlash,
32    TripleSlash,
33    Doc,
34    SingleBullet,
35    DoubleBullet,
36    Exclamation,
37    Custom(&'a str),
38}
39
40fn custom_opener(s: &str) -> &str {
41    s.lines().next().map_or("", |first_line| {
42        first_line
43            .find(' ')
44            .map_or(first_line, |space_index| &first_line[0..=space_index])
45    })
46}
47
48impl<'a> CommentStyle<'a> {
49    /// Returns `true` if the commenting style cannot span multiple lines.
50    pub(crate) fn is_line_comment(&self) -> bool {
51        matches!(
52            self,
53            CommentStyle::DoubleSlash
54                | CommentStyle::TripleSlash
55                | CommentStyle::Doc
56                | CommentStyle::Custom(_)
57        )
58    }
59
60    /// Returns `true` if the commenting style can span multiple lines.
61    pub(crate) fn is_block_comment(&self) -> bool {
62        matches!(
63            self,
64            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation
65        )
66    }
67
68    /// Returns `true` if the commenting style is for documentation.
69    pub(crate) fn is_doc_comment(&self) -> bool {
70        matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
71    }
72
73    pub(crate) fn opener(&self) -> &'a str {
74        match *self {
75            CommentStyle::DoubleSlash => "// ",
76            CommentStyle::TripleSlash => "/// ",
77            CommentStyle::Doc => "//! ",
78            CommentStyle::SingleBullet => "/* ",
79            CommentStyle::DoubleBullet => "/** ",
80            CommentStyle::Exclamation => "/*! ",
81            CommentStyle::Custom(opener) => opener,
82        }
83    }
84
85    pub(crate) fn closer(&self) -> &'a str {
86        match *self {
87            CommentStyle::DoubleSlash
88            | CommentStyle::TripleSlash
89            | CommentStyle::Custom(..)
90            | CommentStyle::Doc => "",
91            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
92                " */"
93            }
94        }
95    }
96
97    pub(crate) fn line_start(&self) -> &'a str {
98        match *self {
99            CommentStyle::DoubleSlash => "// ",
100            CommentStyle::TripleSlash => "/// ",
101            CommentStyle::Doc => "//! ",
102            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
103                " * "
104            }
105            CommentStyle::Custom(opener) => opener,
106        }
107    }
108
109    pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
110        (self.opener(), self.closer(), self.line_start())
111    }
112}
113
114pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
115    if !normalize_comments {
116        if orig.starts_with("/**") && !orig.starts_with("/**/") {
117            CommentStyle::DoubleBullet
118        } else if orig.starts_with("/*!") {
119            CommentStyle::Exclamation
120        } else if orig.starts_with("/*") {
121            CommentStyle::SingleBullet
122        } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
123            CommentStyle::TripleSlash
124        } else if orig.starts_with("//!") {
125            CommentStyle::Doc
126        } else if is_custom_comment(orig) {
127            CommentStyle::Custom(custom_opener(orig))
128        } else {
129            CommentStyle::DoubleSlash
130        }
131    } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
132        || (orig.starts_with("/**") && !orig.starts_with("/**/"))
133    {
134        CommentStyle::TripleSlash
135    } else if orig.starts_with("//!") || orig.starts_with("/*!") {
136        CommentStyle::Doc
137    } else if is_custom_comment(orig) {
138        CommentStyle::Custom(custom_opener(orig))
139    } else {
140        CommentStyle::DoubleSlash
141    }
142}
143
144/// Returns true if the last line of the passed string finishes with a block-comment.
145pub(crate) fn is_last_comment_block(s: &str) -> bool {
146    s.trim_end().ends_with("*/")
147}
148
149/// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
150/// comments between two strings. If there are such comments, then that will be
151/// recovered. If `allow_extend` is true and there is no comment between the two
152/// strings, then they will be put on a single line as long as doing so does not
153/// exceed max width.
154pub(crate) fn combine_strs_with_missing_comments(
155    context: &RewriteContext<'_>,
156    prev_str: &str,
157    next_str: &str,
158    span: Span,
159    shape: Shape,
160    allow_extend: bool,
161) -> RewriteResult {
162    trace!(
163        "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
164        prev_str, next_str, span, shape
165    );
166
167    let mut result =
168        String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
169    result.push_str(prev_str);
170    let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
171    let first_sep =
172        if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
173            ""
174        } else {
175            " "
176        };
177    let mut one_line_width =
178        last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
179
180    let config = context.config;
181    let indent = shape.indent;
182    let missing_comment = rewrite_missing_comment(span, shape, context)?;
183
184    if missing_comment.is_empty() {
185        if allow_extend && one_line_width <= shape.width {
186            result.push_str(first_sep);
187        } else if !prev_str.is_empty() {
188            result.push_str(&indent.to_string_with_newline(config))
189        }
190        result.push_str(next_str);
191        return Ok(result);
192    }
193
194    // We have a missing comment between the first expression and the second expression.
195
196    // Peek the original source code and find out whether there is a newline between the first
197    // expression and the second expression or the missing comment. We will preserve the original
198    // layout whenever possible.
199    let original_snippet = context.snippet(span);
200    let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
201        !original_snippet[..pos].contains('\n')
202    } else {
203        !original_snippet.contains('\n')
204    };
205
206    one_line_width -= first_sep.len();
207    let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
208        Cow::from("")
209    } else {
210        let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
211        if prefer_same_line && one_line_width <= shape.width {
212            Cow::from(" ")
213        } else {
214            indent.to_string_with_newline(config)
215        }
216    };
217    result.push_str(&first_sep);
218    result.push_str(&missing_comment);
219
220    let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
221        Cow::from("")
222    } else if missing_comment.starts_with("//") {
223        indent.to_string_with_newline(config)
224    } else {
225        one_line_width += missing_comment.len() + first_sep.len() + 1;
226        allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
227        if prefer_same_line && allow_one_line && one_line_width <= shape.width {
228            Cow::from(" ")
229        } else {
230            indent.to_string_with_newline(config)
231        }
232    };
233    result.push_str(&second_sep);
234    result.push_str(next_str);
235
236    Ok(result)
237}
238
239pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> RewriteResult {
240    identify_comment(orig, false, shape, config, true)
241}
242
243pub(crate) fn rewrite_comment(
244    orig: &str,
245    block_style: bool,
246    shape: Shape,
247    config: &Config,
248) -> RewriteResult {
249    identify_comment(orig, block_style, shape, config, false)
250}
251
252fn identify_comment(
253    orig: &str,
254    block_style: bool,
255    shape: Shape,
256    config: &Config,
257    is_doc_comment: bool,
258) -> RewriteResult {
259    let style = comment_style(orig, false);
260
261    // Computes the byte length of line taking into account a newline if the line is part of a
262    // paragraph.
263    fn compute_len(orig: &str, line: &str) -> usize {
264        if orig.len() > line.len() {
265            if orig.as_bytes()[line.len()] == b'\r' {
266                line.len() + 2
267            } else {
268                line.len() + 1
269            }
270        } else {
271            line.len()
272        }
273    }
274
275    // Get the first group of line comments having the same commenting style.
276    //
277    // Returns a tuple with:
278    // - a boolean indicating if there is a blank line
279    // - a number indicating the size of the first group of comments
280    fn consume_same_line_comments(
281        style: CommentStyle<'_>,
282        orig: &str,
283        line_start: &str,
284    ) -> (bool, usize) {
285        let mut first_group_ending = 0;
286        let mut hbl = false;
287
288        for line in orig.lines() {
289            let trimmed_line = line.trim_start();
290            if trimmed_line.is_empty() {
291                hbl = true;
292                break;
293            } else if trimmed_line.starts_with(line_start)
294                || comment_style(trimmed_line, false) == style
295            {
296                first_group_ending += compute_len(&orig[first_group_ending..], line);
297            } else {
298                break;
299            }
300        }
301        (hbl, first_group_ending)
302    }
303
304    let (has_bare_lines, first_group_ending) = match style {
305        CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
306            let line_start = style.line_start().trim_start();
307            consume_same_line_comments(style, orig, line_start)
308        }
309        CommentStyle::Custom(opener) => {
310            let trimmed_opener = opener.trim_end();
311            consume_same_line_comments(style, orig, trimmed_opener)
312        }
313        // for a block comment, search for the closing symbol
314        CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
315            let closer = style.closer().trim_start();
316            let mut count = orig.matches(closer).count();
317            let mut closing_symbol_offset = 0;
318            let mut hbl = false;
319            let mut first = true;
320            for line in orig.lines() {
321                closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
322                let mut trimmed_line = line.trim_start();
323                if !trimmed_line.starts_with('*')
324                    && !trimmed_line.starts_with("//")
325                    && !trimmed_line.starts_with("/*")
326                {
327                    hbl = true;
328                }
329
330                // Remove opener from consideration when searching for closer
331                if first {
332                    let opener = style.opener().trim_end();
333                    trimmed_line = &trimmed_line[opener.len()..];
334                    first = false;
335                }
336                if trimmed_line.ends_with(closer) {
337                    count -= 1;
338                    if count == 0 {
339                        break;
340                    }
341                }
342            }
343            (hbl, closing_symbol_offset)
344        }
345    };
346
347    let (first_group, rest) = orig.split_at(first_group_ending);
348    let rewritten_first_group =
349        if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
350            trim_left_preserve_layout(first_group, shape.indent, config).unknown_error()?
351        } else if !config.normalize_comments()
352            && !config.wrap_comments()
353            && !(
354                // `format_code_in_doc_comments` should only take effect on doc comments,
355                // so we only consider it when this comment block is a doc comment block.
356                is_doc_comment && config.format_code_in_doc_comments()
357            )
358        {
359            light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
360        } else {
361            rewrite_comment_inner(
362                first_group,
363                block_style,
364                style,
365                shape,
366                config,
367                is_doc_comment || style.is_doc_comment(),
368            )?
369        };
370    if rest.is_empty() {
371        Ok(rewritten_first_group)
372    } else {
373        identify_comment(
374            rest.trim_start(),
375            block_style,
376            shape,
377            config,
378            is_doc_comment,
379        )
380        .map(|rest_str| {
381            format!(
382                "{}\n{}{}{}",
383                rewritten_first_group,
384                // insert back the blank line
385                if has_bare_lines && style.is_line_comment() {
386                    "\n"
387                } else {
388                    ""
389                },
390                shape.indent.to_string(config),
391                rest_str
392            )
393        })
394    }
395}
396
397/// Enum indicating if the code block contains rust based on attributes
398enum CodeBlockAttribute {
399    Rust,
400    NotRust,
401}
402
403impl CodeBlockAttribute {
404    /// Parse comma separated attributes list. Return rust only if all
405    /// attributes are valid rust attributes
406    /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
407    fn new(attributes: &str) -> CodeBlockAttribute {
408        for attribute in attributes.split(',') {
409            match attribute.trim() {
410                "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
411                | "edition2021" => (),
412                "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
413                _ => return CodeBlockAttribute::NotRust,
414            }
415        }
416        CodeBlockAttribute::Rust
417    }
418}
419
420/// Block that is formatted as an item.
421///
422/// An item starts with either a star `*`, a dash `-`, a greater-than `>`, a plus '+', or a number
423/// `12.` or `34)` (with at most 2 digits). An item represents CommonMark's ["list
424/// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
425/// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
426/// CommonMark is recognized - see the doc comment of [`ItemizedBlock::get_marker_length`] for more
427/// details.
428///
429/// Different level of indentation are handled by shrinking the shape accordingly.
430struct ItemizedBlock {
431    /// the lines that are identified as part of an itemized block
432    lines: Vec<String>,
433    /// the number of characters (typically whitespaces) up to the item marker
434    indent: usize,
435    /// the string that marks the start of an item
436    opener: String,
437    /// sequence of characters (typically whitespaces) to prefix new lines that are part of the item
438    line_start: String,
439}
440
441impl ItemizedBlock {
442    /// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
443    /// marker. Returns the length of the marker (in bytes) if one is present. Note that the length
444    /// includes the whitespace that follows the marker, for example the marker in `"* list item"`
445    /// has the length of 2.
446    ///
447    /// This function recognizes item markers that correspond to CommonMark's
448    /// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
449    /// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
450    /// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
451    ///
452    /// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
453    /// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
454    /// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
455    /// arbitrary numbers as markers. See also
456    /// <https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990> which gives the
457    /// following example where a number (i.e. "1868") doesn't signify an ordered list:
458    /// ```md
459    /// The Captain died in
460    /// 1868. He wes buried in...
461    /// ```
462    fn get_marker_length(trimmed: &str) -> Option<usize> {
463        // https://spec.commonmark.org/0.30/#bullet-list-marker or
464        // https://spec.commonmark.org/0.30/#block-quote-marker
465        let itemized_start = ["* ", "- ", "> ", "+ "];
466        if itemized_start.iter().any(|s| trimmed.starts_with(s)) {
467            return Some(2); // All items in `itemized_start` have length 2.
468        }
469
470        // https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
471        // allowed.
472        for suffix in [". ", ") "] {
473            if let Some((prefix, _)) = trimmed.split_once(suffix) {
474                let has_leading_digits = (1..=2).contains(&prefix.len())
475                    && prefix.chars().all(|c| char::is_ascii_digit(&c));
476                if has_leading_digits {
477                    return Some(prefix.len() + suffix.len());
478                }
479            }
480        }
481
482        None // No markers found.
483    }
484
485    /// Creates a new `ItemizedBlock` described with the given `line`.
486    /// Returns `None` if `line` doesn't start an item.
487    fn new(line: &str) -> Option<ItemizedBlock> {
488        let marker_length = ItemizedBlock::get_marker_length(line.trim_start())?;
489        let space_to_marker = line.chars().take_while(|c| c.is_whitespace()).count();
490        let mut indent = space_to_marker + marker_length;
491        let mut line_start = " ".repeat(indent);
492
493        // Markdown blockquote start with a "> "
494        if line.trim_start().starts_with('>') {
495            // remove the original +2 indent because there might be multiple nested block quotes
496            // and it's easier to reason about the final indent by just taking the length
497            // of the new line_start. We update the indent because it effects the max width
498            // of each formatted line.
499            line_start = itemized_block_quote_start(line, line_start, 2);
500            indent = line_start.len();
501        }
502        Some(ItemizedBlock {
503            lines: vec![line[indent..].to_string()],
504            indent,
505            opener: line[..indent].to_string(),
506            line_start,
507        })
508    }
509
510    /// Returns a `StringFormat` used for formatting the content of an item.
511    fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
512        StringFormat {
513            opener: "",
514            closer: "",
515            line_start: "",
516            line_end: "",
517            shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
518            trim_end: true,
519            config: fmt.config,
520        }
521    }
522
523    /// Returns `true` if the line is part of the current itemized block.
524    /// If it is, then it is added to the internal lines list.
525    fn add_line(&mut self, line: &str) -> bool {
526        if ItemizedBlock::get_marker_length(line.trim_start()).is_none()
527            && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
528        {
529            self.lines.push(line.to_string());
530            return true;
531        }
532        false
533    }
534
535    /// Returns the block as a string, with each line trimmed at the start.
536    fn trimmed_block_as_string(&self) -> String {
537        self.lines.iter().fold(String::new(), |mut acc, line| {
538            acc.push_str(line.trim_start());
539            acc.push(' ');
540            acc
541        })
542    }
543
544    /// Returns the block as a string under its original form.
545    fn original_block_as_string(&self) -> String {
546        self.lines.join("\n")
547    }
548}
549
550/// Determine the line_start when formatting markdown block quotes.
551/// The original line_start likely contains indentation (whitespaces), which we'd like to
552/// replace with '> ' characters.
553fn itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String {
554    let quote_level = line
555        .chars()
556        .take_while(|c| !c.is_alphanumeric())
557        .fold(0, |acc, c| if c == '>' { acc + 1 } else { acc });
558
559    for _ in 0..remove_indent {
560        line_start.pop();
561    }
562
563    for _ in 0..quote_level {
564        line_start.push_str("> ");
565    }
566    line_start
567}
568
569struct CommentRewrite<'a> {
570    result: String,
571    code_block_buffer: String,
572    is_prev_line_multi_line: bool,
573    code_block_attr: Option<CodeBlockAttribute>,
574    item_block: Option<ItemizedBlock>,
575    comment_line_separator: String,
576    indent_str: String,
577    max_width: usize,
578    fmt_indent: Indent,
579    fmt: StringFormat<'a>,
580
581    opener: String,
582    closer: String,
583    line_start: String,
584    style: CommentStyle<'a>,
585}
586
587impl<'a> CommentRewrite<'a> {
588    fn new(
589        orig: &'a str,
590        block_style: bool,
591        shape: Shape,
592        config: &'a Config,
593    ) -> CommentRewrite<'a> {
594        let ((opener, closer, line_start), style) = if block_style {
595            (
596                CommentStyle::SingleBullet.to_str_tuplet(),
597                CommentStyle::SingleBullet,
598            )
599        } else {
600            let style = comment_style(orig, config.normalize_comments());
601            (style.to_str_tuplet(), style)
602        };
603
604        let max_width = shape
605            .width
606            .checked_sub(closer.len() + opener.len())
607            .unwrap_or(1);
608        let indent_str = shape.indent.to_string_with_newline(config).to_string();
609
610        let mut cr = CommentRewrite {
611            result: String::with_capacity(orig.len() * 2),
612            code_block_buffer: String::with_capacity(128),
613            is_prev_line_multi_line: false,
614            code_block_attr: None,
615            item_block: None,
616            comment_line_separator: format!("{indent_str}{line_start}"),
617            max_width,
618            indent_str,
619            fmt_indent: shape.indent,
620
621            fmt: StringFormat {
622                opener: "",
623                closer: "",
624                line_start,
625                line_end: "",
626                shape: Shape::legacy(max_width, shape.indent),
627                trim_end: true,
628                config,
629            },
630
631            opener: opener.to_owned(),
632            closer: closer.to_owned(),
633            line_start: line_start.to_owned(),
634            style,
635        };
636        cr.result.push_str(opener);
637        cr
638    }
639
640    fn join_block(s: &str, sep: &str) -> String {
641        let mut result = String::with_capacity(s.len() + 128);
642        let mut iter = s.lines().peekable();
643        while let Some(line) = iter.next() {
644            result.push_str(line);
645            result.push_str(match iter.peek() {
646                Some(&"") => sep.trim_end(),
647                Some(..) => sep,
648                None => "",
649            });
650        }
651        result
652    }
653
654    /// Check if any characters were written to the result buffer after the start of the comment.
655    /// when calling [`CommentRewrite::new()`] the result buffer is initialized with the opening
656    /// characters for the comment.
657    fn buffer_contains_comment(&self) -> bool {
658        // if self.result.len() < self.opener.len() then an empty comment is in the buffer
659        // if self.result.len() > self.opener.len() then a non empty comment is in the buffer
660        self.result.len() != self.opener.len()
661    }
662
663    fn finish(mut self) -> String {
664        if !self.code_block_buffer.is_empty() {
665            // There is a code block that is not properly enclosed by backticks.
666            // We will leave them untouched.
667            self.result.push_str(&self.comment_line_separator);
668            self.result.push_str(&Self::join_block(
669                &trim_custom_comment_prefix(&self.code_block_buffer),
670                &self.comment_line_separator,
671            ));
672        }
673
674        if let Some(ref ib) = self.item_block {
675            // the last few lines are part of an itemized block
676            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
677            let item_fmt = ib.create_string_format(&self.fmt);
678
679            // only push a comment_line_separator for ItemizedBlocks if the comment is not empty
680            if self.buffer_contains_comment() {
681                self.result.push_str(&self.comment_line_separator);
682            }
683
684            self.result.push_str(&ib.opener);
685            match rewrite_string(
686                &ib.trimmed_block_as_string(),
687                &item_fmt,
688                self.max_width.saturating_sub(ib.indent),
689            ) {
690                Some(s) => self.result.push_str(&Self::join_block(
691                    &s,
692                    &format!("{}{}", self.comment_line_separator, ib.line_start),
693                )),
694                None => self.result.push_str(&Self::join_block(
695                    &ib.original_block_as_string(),
696                    &self.comment_line_separator,
697                )),
698            };
699        }
700
701        self.result.push_str(&self.closer);
702        if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
703            // Trailing space.
704            self.result.pop();
705        }
706
707        self.result
708    }
709
710    fn handle_line(
711        &mut self,
712        orig: &'a str,
713        i: usize,
714        line: &'a str,
715        has_leading_whitespace: bool,
716        is_doc_comment: bool,
717    ) -> bool {
718        let num_newlines = count_newlines(orig);
719        let is_last = i == num_newlines;
720        let needs_new_comment_line = if self.style.is_block_comment() {
721            num_newlines > 0 || self.buffer_contains_comment()
722        } else {
723            self.buffer_contains_comment()
724        };
725
726        if let Some(ref mut ib) = self.item_block {
727            if ib.add_line(line) {
728                return false;
729            }
730            self.is_prev_line_multi_line = false;
731            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
732            let item_fmt = ib.create_string_format(&self.fmt);
733
734            // only push a comment_line_separator if we need to start a new comment line
735            if needs_new_comment_line {
736                self.result.push_str(&self.comment_line_separator);
737            }
738
739            self.result.push_str(&ib.opener);
740            match rewrite_string(
741                &ib.trimmed_block_as_string(),
742                &item_fmt,
743                self.max_width.saturating_sub(ib.indent),
744            ) {
745                Some(s) => self.result.push_str(&Self::join_block(
746                    &s,
747                    &format!("{}{}", self.comment_line_separator, ib.line_start),
748                )),
749                None => self.result.push_str(&Self::join_block(
750                    &ib.original_block_as_string(),
751                    &self.comment_line_separator,
752                )),
753            };
754        } else if self.code_block_attr.is_some() {
755            if line.starts_with("```") {
756                let code_block = match self.code_block_attr.as_ref().unwrap() {
757                    CodeBlockAttribute::Rust
758                        if self.fmt.config.format_code_in_doc_comments()
759                            && !self.code_block_buffer.trim().is_empty() =>
760                    {
761                        let mut config = self.fmt.config.clone();
762                        config.set().wrap_comments(false);
763                        let comment_max_width = config
764                            .doc_comment_code_block_width()
765                            .min(config.max_width());
766                        config.set().max_width(comment_max_width);
767                        if let Some(s) =
768                            crate::format_code_block(&self.code_block_buffer, &config, false)
769                        {
770                            trim_custom_comment_prefix(&s.snippet)
771                        } else {
772                            trim_custom_comment_prefix(&self.code_block_buffer)
773                        }
774                    }
775                    _ => trim_custom_comment_prefix(&self.code_block_buffer),
776                };
777                if !code_block.is_empty() {
778                    self.result.push_str(&self.comment_line_separator);
779                    self.result
780                        .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
781                }
782                self.code_block_buffer.clear();
783                self.result.push_str(&self.comment_line_separator);
784                self.result.push_str(line);
785                self.code_block_attr = None;
786            } else {
787                self.code_block_buffer
788                    .push_str(&hide_sharp_behind_comment(line));
789                self.code_block_buffer.push('\n');
790            }
791            return false;
792        }
793
794        self.code_block_attr = None;
795        self.item_block = None;
796        if let Some(stripped) = line.strip_prefix("```") {
797            self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
798        } else if self.fmt.config.wrap_comments() {
799            if let Some(ib) = ItemizedBlock::new(line) {
800                self.item_block = Some(ib);
801                return false;
802            }
803        }
804
805        if self.result == self.opener {
806            let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
807            if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
808                self.result.pop();
809            }
810            if line.is_empty() {
811                return false;
812            }
813        } else if self.is_prev_line_multi_line && !line.is_empty() {
814            self.result.push(' ')
815        } else if is_last && line.is_empty() {
816            // trailing blank lines are unwanted
817            if !self.closer.is_empty() {
818                self.result.push_str(&self.indent_str);
819            }
820            return true;
821        } else {
822            self.result.push_str(&self.comment_line_separator);
823            if !has_leading_whitespace && self.result.ends_with(' ') {
824                self.result.pop();
825            }
826        }
827
828        let is_markdown_header_doc_comment = is_doc_comment && line.starts_with('#');
829
830        // We only want to wrap the comment if:
831        // 1) wrap_comments = true is configured
832        // 2) The comment is not the start of a markdown header doc comment
833        // 3) The comment width exceeds the shape's width
834        // 4) No URLS were found in the comment
835        // If this changes, the documentation in ../Configurations.md#wrap_comments
836        // should be changed accordingly.
837        let should_wrap_comment = self.fmt.config.wrap_comments()
838            && !is_markdown_header_doc_comment
839            && unicode_str_width(line) > self.fmt.shape.width
840            && !has_url(line)
841            && !is_table_item(line);
842
843        if should_wrap_comment {
844            match rewrite_string(line, &self.fmt, self.max_width) {
845                Some(ref s) => {
846                    self.is_prev_line_multi_line = s.contains('\n');
847                    self.result.push_str(s);
848                }
849                None if self.is_prev_line_multi_line => {
850                    // We failed to put the current `line` next to the previous `line`.
851                    // Remove the trailing space, then start rewrite on the next line.
852                    self.result.pop();
853                    self.result.push_str(&self.comment_line_separator);
854                    self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
855                    match rewrite_string(line, &self.fmt, self.max_width) {
856                        Some(ref s) => {
857                            self.is_prev_line_multi_line = s.contains('\n');
858                            self.result.push_str(s);
859                        }
860                        None => {
861                            self.is_prev_line_multi_line = false;
862                            self.result.push_str(line);
863                        }
864                    }
865                }
866                None => {
867                    self.is_prev_line_multi_line = false;
868                    self.result.push_str(line);
869                }
870            }
871
872            self.fmt.shape = if self.is_prev_line_multi_line {
873                // 1 = " "
874                let offset = 1 + last_line_width(&self.result) - self.line_start.len();
875                Shape {
876                    width: self.max_width.saturating_sub(offset),
877                    indent: self.fmt_indent,
878                    offset: self.fmt.shape.offset + offset,
879                }
880            } else {
881                Shape::legacy(self.max_width, self.fmt_indent)
882            };
883        } else {
884            if line.is_empty() && self.result.ends_with(' ') && !is_last {
885                // Remove space if this is an empty comment or a doc comment.
886                self.result.pop();
887            }
888            if self.code_block_attr.is_some() && self.is_prev_line_multi_line {
889                self.result.push_str(&self.comment_line_separator);
890            }
891            self.result.push_str(line);
892            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
893            self.is_prev_line_multi_line = false;
894        }
895
896        false
897    }
898}
899
900fn rewrite_comment_inner(
901    orig: &str,
902    block_style: bool,
903    style: CommentStyle<'_>,
904    shape: Shape,
905    config: &Config,
906    is_doc_comment: bool,
907) -> RewriteResult {
908    let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
909
910    let line_breaks = count_newlines(orig.trim_end());
911    let lines = orig
912        .lines()
913        .enumerate()
914        .map(|(i, mut line)| {
915            line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
916            // Drop old closer.
917            if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
918                line = line[..(line.len() - 2)].trim_end();
919            }
920
921            line
922        })
923        .map(|s| left_trim_comment_line(s, &style))
924        .map(|(line, has_leading_whitespace)| {
925            if orig.starts_with("/*") && line_breaks == 0 {
926                (
927                    line.trim_start(),
928                    has_leading_whitespace || config.normalize_comments(),
929                )
930            } else {
931                (line, has_leading_whitespace || config.normalize_comments())
932            }
933        });
934
935    for (i, (line, has_leading_whitespace)) in lines.enumerate() {
936        if rewriter.handle_line(orig, i, line, has_leading_whitespace, is_doc_comment) {
937            break;
938        }
939    }
940
941    Ok(rewriter.finish())
942}
943
944const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
945
946fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
947    let s_trimmed = s.trim();
948    if s_trimmed.starts_with("# ") || s_trimmed == "#" {
949        Cow::from(format!("{RUSTFMT_CUSTOM_COMMENT_PREFIX}{s}"))
950    } else {
951        Cow::from(s)
952    }
953}
954
955fn trim_custom_comment_prefix(s: &str) -> String {
956    s.lines()
957        .map(|line| {
958            let left_trimmed = line.trim_start();
959            if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
960                left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
961            } else {
962                line
963            }
964        })
965        .collect::<Vec<_>>()
966        .join("\n")
967}
968
969/// Returns `true` if the given string MAY include URLs or alike.
970fn has_url(s: &str) -> bool {
971    // A regex matching reference doc links.
972    //
973    // ```markdown
974    // /// An [example].
975    // ///
976    // /// [example]: this::is::a::link
977    // ```
978    let reference_link_url = static_regex!(r"^\[.+\]\s?:");
979
980    // This function may return false positive, but should get its job done in most cases.
981    s.contains("https://")
982        || s.contains("http://")
983        || s.contains("ftp://")
984        || s.contains("file://")
985        || reference_link_url.is_match(s)
986}
987
988/// Returns true if the given string may be part of a Markdown table.
989fn is_table_item(mut s: &str) -> bool {
990    // This function may return false positive, but should get its job done in most cases (i.e.
991    // markdown tables with two column delimiters).
992    s = s.trim_start();
993    return s.starts_with('|')
994        && match s.rfind('|') {
995            Some(0) | None => false,
996            _ => true,
997        };
998}
999
1000/// Given the span, rewrite the missing comment inside it if available.
1001/// Note that the given span must only include comments (or leading/trailing whitespaces).
1002pub(crate) fn rewrite_missing_comment(
1003    span: Span,
1004    shape: Shape,
1005    context: &RewriteContext<'_>,
1006) -> RewriteResult {
1007    let missing_snippet = context.snippet(span);
1008    let trimmed_snippet = missing_snippet.trim();
1009    // check the span starts with a comment
1010    let pos = trimmed_snippet.find('/');
1011    if !trimmed_snippet.is_empty() && pos.is_some() {
1012        rewrite_comment(trimmed_snippet, false, shape, context.config)
1013    } else {
1014        Ok(String::new())
1015    }
1016}
1017
1018/// Recover the missing comments in the specified span, if available.
1019/// The layout of the comments will be preserved as long as it does not break the code
1020/// and its total width does not exceed the max width.
1021pub(crate) fn recover_missing_comment_in_span(
1022    span: Span,
1023    shape: Shape,
1024    context: &RewriteContext<'_>,
1025    used_width: usize,
1026) -> RewriteResult {
1027    let missing_comment = rewrite_missing_comment(span, shape, context)?;
1028    if missing_comment.is_empty() {
1029        Ok(String::new())
1030    } else {
1031        let missing_snippet = context.snippet(span);
1032        let pos = missing_snippet.find('/').unknown_error()?;
1033        // 1 = ` `
1034        let total_width = missing_comment.len() + used_width + 1;
1035        let force_new_line_before_comment =
1036            missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
1037        let sep = if force_new_line_before_comment {
1038            shape.indent.to_string_with_newline(context.config)
1039        } else {
1040            Cow::from(" ")
1041        };
1042        Ok(format!("{sep}{missing_comment}"))
1043    }
1044}
1045
1046/// Trim trailing whitespaces unless they consist of two or more whitespaces.
1047fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
1048    if is_doc_comment && s.ends_with("  ") {
1049        s
1050    } else {
1051        s.trim_end()
1052    }
1053}
1054
1055/// Trims whitespace and aligns to indent, but otherwise does not change comments.
1056fn light_rewrite_comment(
1057    orig: &str,
1058    offset: Indent,
1059    config: &Config,
1060    is_doc_comment: bool,
1061) -> String {
1062    orig.lines()
1063        .map(|l| {
1064            // This is basically just l.trim(), but in the case that a line starts
1065            // with `*` we want to leave one space before it, so it aligns with the
1066            // `*` in `/*`.
1067            let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
1068            let left_trimmed = if let Some(fnw) = first_non_whitespace {
1069                if l.as_bytes()[fnw] == b'*' && fnw > 0 {
1070                    &l[fnw - 1..]
1071                } else {
1072                    &l[fnw..]
1073                }
1074            } else {
1075                ""
1076            };
1077            // Preserve markdown's double-space line break syntax in doc comment.
1078            trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
1079        })
1080        .join(&format!("\n{}", offset.to_string(config)))
1081}
1082
1083/// Trims comment characters and possibly a single space from the left of a string.
1084/// Does not trim all whitespace. If a single space is trimmed from the left of the string,
1085/// this function returns true.
1086fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
1087    if line.starts_with("//! ")
1088        || line.starts_with("/// ")
1089        || line.starts_with("/*! ")
1090        || line.starts_with("/** ")
1091    {
1092        (&line[4..], true)
1093    } else if let CommentStyle::Custom(opener) = *style {
1094        if let Some(stripped) = line.strip_prefix(opener) {
1095            (stripped, true)
1096        } else {
1097            (&line[opener.trim_end().len()..], false)
1098        }
1099    } else if line.starts_with("/* ")
1100        || line.starts_with("// ")
1101        || line.starts_with("//!")
1102        || line.starts_with("///")
1103        || line.starts_with("** ")
1104        || line.starts_with("/*!")
1105        || (line.starts_with("/**") && !line.starts_with("/**/"))
1106    {
1107        (&line[3..], line.chars().nth(2).unwrap() == ' ')
1108    } else if line.starts_with("/*")
1109        || line.starts_with("* ")
1110        || line.starts_with("//")
1111        || line.starts_with("**")
1112    {
1113        (&line[2..], line.chars().nth(1).unwrap() == ' ')
1114    } else if let Some(stripped) = line.strip_prefix('*') {
1115        (stripped, false)
1116    } else {
1117        (line, line.starts_with(' '))
1118    }
1119}
1120
1121pub(crate) trait FindUncommented {
1122    fn find_uncommented(&self, pat: &str) -> Option<usize>;
1123    fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
1124}
1125
1126impl FindUncommented for str {
1127    fn find_uncommented(&self, pat: &str) -> Option<usize> {
1128        let mut needle_iter = pat.chars();
1129        for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
1130            match needle_iter.next() {
1131                None => {
1132                    return Some(i - pat.len());
1133                }
1134                Some(c) => match kind {
1135                    FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
1136                    _ => {
1137                        needle_iter = pat.chars();
1138                    }
1139                },
1140            }
1141        }
1142
1143        // Handle case where the pattern is a suffix of the search string
1144        match needle_iter.next() {
1145            Some(_) => None,
1146            None => Some(self.len() - pat.len()),
1147        }
1148    }
1149
1150    fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1151        if let Some(left) = self.find_uncommented(pat) {
1152            let mut result = left;
1153            // add 1 to use find_last_uncommented for &str after pat
1154            while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1155                result += next + 1;
1156            }
1157            Some(result)
1158        } else {
1159            None
1160        }
1161    }
1162}
1163
1164// Returns the first byte position after the first comment. The given string
1165// is expected to be prefixed by a comment, including delimiters.
1166// Good: `/* /* inner */ outer */ code();`
1167// Bad:  `code(); // hello\n world!`
1168pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1169    let mut iter = CharClasses::new(s.char_indices());
1170    for (kind, (i, _c)) in &mut iter {
1171        if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1172            return Some(i);
1173        }
1174    }
1175
1176    // Handle case where the comment ends at the end of `s`.
1177    if iter.status == CharClassesStatus::Normal {
1178        Some(s.len())
1179    } else {
1180        None
1181    }
1182}
1183
1184/// Returns `true` if text contains any comment.
1185pub(crate) fn contains_comment(text: &str) -> bool {
1186    CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1187}
1188
1189pub(crate) struct CharClasses<T>
1190where
1191    T: Iterator,
1192    T::Item: RichChar,
1193{
1194    base: MultiPeek<T>,
1195    status: CharClassesStatus,
1196}
1197
1198pub(crate) trait RichChar {
1199    fn get_char(&self) -> char;
1200}
1201
1202impl RichChar for char {
1203    fn get_char(&self) -> char {
1204        *self
1205    }
1206}
1207
1208impl RichChar for (usize, char) {
1209    fn get_char(&self) -> char {
1210        self.1
1211    }
1212}
1213
1214#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1215enum CharClassesStatus {
1216    Normal,
1217    /// Character is within a string
1218    LitString,
1219    LitStringEscape,
1220    /// Character is within a raw string
1221    LitRawString(u32),
1222    RawStringPrefix(u32),
1223    RawStringSuffix(u32),
1224    LitChar,
1225    LitCharEscape,
1226    /// Character inside a block comment, with the integer indicating the nesting deepness of the
1227    /// comment
1228    BlockComment(u32),
1229    /// Character inside a block-commented string, with the integer indicating the nesting deepness
1230    /// of the comment
1231    StringInBlockComment(u32),
1232    /// Status when the '/' has been consumed, but not yet the '*', deepness is
1233    /// the new deepness (after the comment opening).
1234    BlockCommentOpening(u32),
1235    /// Status when the '*' has been consumed, but not yet the '/', deepness is
1236    /// the new deepness (after the comment closing).
1237    BlockCommentClosing(u32),
1238    /// Character is within a line comment
1239    LineComment,
1240}
1241
1242/// Distinguish between functional part of code and comments
1243#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1244pub(crate) enum CodeCharKind {
1245    Normal,
1246    Comment,
1247}
1248
1249/// Distinguish between functional part of code and comments,
1250/// describing opening and closing of comments for ease when chunking
1251/// code from tagged characters
1252#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1253pub(crate) enum FullCodeCharKind {
1254    Normal,
1255    /// The first character of a comment, there is only one for a comment (always '/')
1256    StartComment,
1257    /// Any character inside a comment including the second character of comment
1258    /// marks ("//", "/*")
1259    InComment,
1260    /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1261    EndComment,
1262    /// Start of a multiline string inside a comment
1263    StartStringCommented,
1264    /// End of a multiline string inside a comment
1265    EndStringCommented,
1266    /// Inside a commented string
1267    InStringCommented,
1268    /// Start of a multiline string
1269    StartString,
1270    /// End of a multiline string
1271    EndString,
1272    /// Inside a string.
1273    InString,
1274}
1275
1276impl FullCodeCharKind {
1277    pub(crate) fn is_comment(self) -> bool {
1278        match self {
1279            FullCodeCharKind::StartComment
1280            | FullCodeCharKind::InComment
1281            | FullCodeCharKind::EndComment
1282            | FullCodeCharKind::StartStringCommented
1283            | FullCodeCharKind::InStringCommented
1284            | FullCodeCharKind::EndStringCommented => true,
1285            _ => false,
1286        }
1287    }
1288
1289    /// Returns true if the character is inside a comment
1290    pub(crate) fn inside_comment(self) -> bool {
1291        match self {
1292            FullCodeCharKind::InComment
1293            | FullCodeCharKind::StartStringCommented
1294            | FullCodeCharKind::InStringCommented
1295            | FullCodeCharKind::EndStringCommented => true,
1296            _ => false,
1297        }
1298    }
1299
1300    pub(crate) fn is_string(self) -> bool {
1301        self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1302    }
1303
1304    /// Returns true if the character is within a commented string
1305    pub(crate) fn is_commented_string(self) -> bool {
1306        self == FullCodeCharKind::InStringCommented
1307            || self == FullCodeCharKind::StartStringCommented
1308    }
1309
1310    fn to_codecharkind(self) -> CodeCharKind {
1311        if self.is_comment() {
1312            CodeCharKind::Comment
1313        } else {
1314            CodeCharKind::Normal
1315        }
1316    }
1317}
1318
1319impl<T> CharClasses<T>
1320where
1321    T: Iterator,
1322    T::Item: RichChar,
1323{
1324    pub(crate) fn new(base: T) -> CharClasses<T> {
1325        CharClasses {
1326            base: multipeek(base),
1327            status: CharClassesStatus::Normal,
1328        }
1329    }
1330}
1331
1332fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1333where
1334    T: Iterator,
1335    T::Item: RichChar,
1336{
1337    for _ in 0..count {
1338        match iter.peek() {
1339            Some(c) if c.get_char() == '#' => continue,
1340            _ => return false,
1341        }
1342    }
1343    true
1344}
1345
1346impl<T> Iterator for CharClasses<T>
1347where
1348    T: Iterator,
1349    T::Item: RichChar,
1350{
1351    type Item = (FullCodeCharKind, T::Item);
1352
1353    fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1354        let item = self.base.next()?;
1355        let chr = item.get_char();
1356        let mut char_kind = FullCodeCharKind::Normal;
1357        self.status = match self.status {
1358            CharClassesStatus::LitRawString(sharps) => {
1359                char_kind = FullCodeCharKind::InString;
1360                match chr {
1361                    '"' => {
1362                        if sharps == 0 {
1363                            char_kind = FullCodeCharKind::Normal;
1364                            CharClassesStatus::Normal
1365                        } else if is_raw_string_suffix(&mut self.base, sharps) {
1366                            CharClassesStatus::RawStringSuffix(sharps)
1367                        } else {
1368                            CharClassesStatus::LitRawString(sharps)
1369                        }
1370                    }
1371                    _ => CharClassesStatus::LitRawString(sharps),
1372                }
1373            }
1374            CharClassesStatus::RawStringPrefix(sharps) => {
1375                char_kind = FullCodeCharKind::InString;
1376                match chr {
1377                    '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1378                    '"' => CharClassesStatus::LitRawString(sharps),
1379                    _ => CharClassesStatus::Normal, // Unreachable.
1380                }
1381            }
1382            CharClassesStatus::RawStringSuffix(sharps) => {
1383                match chr {
1384                    '#' => {
1385                        if sharps == 1 {
1386                            CharClassesStatus::Normal
1387                        } else {
1388                            char_kind = FullCodeCharKind::InString;
1389                            CharClassesStatus::RawStringSuffix(sharps - 1)
1390                        }
1391                    }
1392                    _ => CharClassesStatus::Normal, // Unreachable
1393                }
1394            }
1395            CharClassesStatus::LitString => {
1396                char_kind = FullCodeCharKind::InString;
1397                match chr {
1398                    '"' => CharClassesStatus::Normal,
1399                    '\\' => CharClassesStatus::LitStringEscape,
1400                    _ => CharClassesStatus::LitString,
1401                }
1402            }
1403            CharClassesStatus::LitStringEscape => {
1404                char_kind = FullCodeCharKind::InString;
1405                CharClassesStatus::LitString
1406            }
1407            CharClassesStatus::LitChar => match chr {
1408                '\\' => CharClassesStatus::LitCharEscape,
1409                '\'' => CharClassesStatus::Normal,
1410                _ => CharClassesStatus::LitChar,
1411            },
1412            CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1413            CharClassesStatus::Normal => match chr {
1414                'r' => match self.base.peek().map(RichChar::get_char) {
1415                    Some('#') | Some('"') => {
1416                        char_kind = FullCodeCharKind::InString;
1417                        CharClassesStatus::RawStringPrefix(0)
1418                    }
1419                    _ => CharClassesStatus::Normal,
1420                },
1421                '"' => {
1422                    char_kind = FullCodeCharKind::InString;
1423                    CharClassesStatus::LitString
1424                }
1425                '\'' => {
1426                    // HACK: Work around mut borrow.
1427                    match self.base.peek() {
1428                        Some(next) if next.get_char() == '\\' => {
1429                            self.status = CharClassesStatus::LitChar;
1430                            return Some((char_kind, item));
1431                        }
1432                        _ => (),
1433                    }
1434
1435                    match self.base.peek() {
1436                        Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1437                        _ => CharClassesStatus::Normal,
1438                    }
1439                }
1440                '/' => match self.base.peek() {
1441                    Some(next) if next.get_char() == '*' => {
1442                        self.status = CharClassesStatus::BlockCommentOpening(1);
1443                        return Some((FullCodeCharKind::StartComment, item));
1444                    }
1445                    Some(next) if next.get_char() == '/' => {
1446                        self.status = CharClassesStatus::LineComment;
1447                        return Some((FullCodeCharKind::StartComment, item));
1448                    }
1449                    _ => CharClassesStatus::Normal,
1450                },
1451                _ => CharClassesStatus::Normal,
1452            },
1453            CharClassesStatus::StringInBlockComment(deepness) => {
1454                char_kind = FullCodeCharKind::InStringCommented;
1455                if chr == '"' {
1456                    CharClassesStatus::BlockComment(deepness)
1457                } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1458                    char_kind = FullCodeCharKind::InComment;
1459                    CharClassesStatus::BlockCommentClosing(deepness - 1)
1460                } else {
1461                    CharClassesStatus::StringInBlockComment(deepness)
1462                }
1463            }
1464            CharClassesStatus::BlockComment(deepness) => {
1465                assert_ne!(deepness, 0);
1466                char_kind = FullCodeCharKind::InComment;
1467                match self.base.peek() {
1468                    Some(next) if next.get_char() == '/' && chr == '*' => {
1469                        CharClassesStatus::BlockCommentClosing(deepness - 1)
1470                    }
1471                    Some(next) if next.get_char() == '*' && chr == '/' => {
1472                        CharClassesStatus::BlockCommentOpening(deepness + 1)
1473                    }
1474                    _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1475                    _ => self.status,
1476                }
1477            }
1478            CharClassesStatus::BlockCommentOpening(deepness) => {
1479                assert_eq!(chr, '*');
1480                self.status = CharClassesStatus::BlockComment(deepness);
1481                return Some((FullCodeCharKind::InComment, item));
1482            }
1483            CharClassesStatus::BlockCommentClosing(deepness) => {
1484                assert_eq!(chr, '/');
1485                if deepness == 0 {
1486                    self.status = CharClassesStatus::Normal;
1487                    return Some((FullCodeCharKind::EndComment, item));
1488                } else {
1489                    self.status = CharClassesStatus::BlockComment(deepness);
1490                    return Some((FullCodeCharKind::InComment, item));
1491                }
1492            }
1493            CharClassesStatus::LineComment => match chr {
1494                '\n' => {
1495                    self.status = CharClassesStatus::Normal;
1496                    return Some((FullCodeCharKind::EndComment, item));
1497                }
1498                _ => {
1499                    self.status = CharClassesStatus::LineComment;
1500                    return Some((FullCodeCharKind::InComment, item));
1501                }
1502            },
1503        };
1504        Some((char_kind, item))
1505    }
1506}
1507
1508/// An iterator over the lines of a string, paired with the char kind at the
1509/// end of the line.
1510pub(crate) struct LineClasses<'a> {
1511    base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1512    kind: FullCodeCharKind,
1513}
1514
1515impl<'a> LineClasses<'a> {
1516    pub(crate) fn new(s: &'a str) -> Self {
1517        LineClasses {
1518            base: CharClasses::new(s.chars()).peekable(),
1519            kind: FullCodeCharKind::Normal,
1520        }
1521    }
1522}
1523
1524impl<'a> Iterator for LineClasses<'a> {
1525    type Item = (FullCodeCharKind, String);
1526
1527    fn next(&mut self) -> Option<Self::Item> {
1528        self.base.peek()?;
1529
1530        let mut line = String::new();
1531
1532        let start_kind = match self.base.peek() {
1533            Some((kind, _)) => *kind,
1534            None => unreachable!(),
1535        };
1536
1537        for (kind, c) in self.base.by_ref() {
1538            // needed to set the kind of the ending character on the last line
1539            self.kind = kind;
1540            if c == '\n' {
1541                self.kind = match (start_kind, kind) {
1542                    (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1543                        FullCodeCharKind::StartString
1544                    }
1545                    (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1546                        FullCodeCharKind::EndString
1547                    }
1548                    (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1549                        FullCodeCharKind::StartStringCommented
1550                    }
1551                    (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1552                        FullCodeCharKind::EndStringCommented
1553                    }
1554                    _ => kind,
1555                };
1556                break;
1557            }
1558            line.push(c);
1559        }
1560
1561        // Workaround for CRLF newline.
1562        if line.ends_with('\r') {
1563            line.pop();
1564        }
1565
1566        Some((self.kind, line))
1567    }
1568}
1569
1570/// Iterator over functional and commented parts of a string. Any part of a string is either
1571/// functional code, either *one* block comment, either *one* line comment. Whitespace between
1572/// comments is functional code. Line comments contain their ending newlines.
1573struct UngroupedCommentCodeSlices<'a> {
1574    slice: &'a str,
1575    iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1576}
1577
1578impl<'a> UngroupedCommentCodeSlices<'a> {
1579    fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1580        UngroupedCommentCodeSlices {
1581            slice: code,
1582            iter: CharClasses::new(code.char_indices()).peekable(),
1583        }
1584    }
1585}
1586
1587impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1588    type Item = (CodeCharKind, usize, &'a str);
1589
1590    fn next(&mut self) -> Option<Self::Item> {
1591        let (kind, (start_idx, _)) = self.iter.next()?;
1592        match kind {
1593            FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1594                // Consume all the Normal code
1595                while let Some(&(char_kind, _)) = self.iter.peek() {
1596                    if char_kind.is_comment() {
1597                        break;
1598                    }
1599                    let _ = self.iter.next();
1600                }
1601            }
1602            FullCodeCharKind::StartComment => {
1603                // Consume the whole comment
1604                loop {
1605                    match self.iter.next() {
1606                        Some((kind, ..)) if kind.inside_comment() => continue,
1607                        _ => break,
1608                    }
1609                }
1610            }
1611            _ => panic!(),
1612        }
1613        let slice = match self.iter.peek() {
1614            Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1615            None => &self.slice[start_idx..],
1616        };
1617        Some((
1618            if kind.is_comment() {
1619                CodeCharKind::Comment
1620            } else {
1621                CodeCharKind::Normal
1622            },
1623            start_idx,
1624            slice,
1625        ))
1626    }
1627}
1628
1629/// Iterator over an alternating sequence of functional and commented parts of
1630/// a string. The first item is always a, possibly zero length, subslice of
1631/// functional text. Line style comments contain their ending newlines.
1632pub(crate) struct CommentCodeSlices<'a> {
1633    slice: &'a str,
1634    last_slice_kind: CodeCharKind,
1635    last_slice_end: usize,
1636}
1637
1638impl<'a> CommentCodeSlices<'a> {
1639    pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1640        CommentCodeSlices {
1641            slice,
1642            last_slice_kind: CodeCharKind::Comment,
1643            last_slice_end: 0,
1644        }
1645    }
1646}
1647
1648impl<'a> Iterator for CommentCodeSlices<'a> {
1649    type Item = (CodeCharKind, usize, &'a str);
1650
1651    fn next(&mut self) -> Option<Self::Item> {
1652        if self.last_slice_end == self.slice.len() {
1653            return None;
1654        }
1655
1656        let mut sub_slice_end = self.last_slice_end;
1657        let mut first_whitespace = None;
1658        let subslice = &self.slice[self.last_slice_end..];
1659        let mut iter = CharClasses::new(subslice.char_indices());
1660
1661        for (kind, (i, c)) in &mut iter {
1662            let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1663                && &subslice[..2] == "//"
1664                && [' ', '\t'].contains(&c);
1665
1666            if is_comment_connector && first_whitespace.is_none() {
1667                first_whitespace = Some(i);
1668            }
1669
1670            if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1671                let last_index = match first_whitespace {
1672                    Some(j) => j,
1673                    None => i,
1674                };
1675                sub_slice_end = self.last_slice_end + last_index;
1676                break;
1677            }
1678
1679            if !is_comment_connector {
1680                first_whitespace = None;
1681            }
1682        }
1683
1684        if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1685            // This was the last subslice.
1686            sub_slice_end = match first_whitespace {
1687                Some(i) => self.last_slice_end + i,
1688                None => self.slice.len(),
1689            };
1690        }
1691
1692        let kind = match self.last_slice_kind {
1693            CodeCharKind::Comment => CodeCharKind::Normal,
1694            CodeCharKind::Normal => CodeCharKind::Comment,
1695        };
1696        let res = (
1697            kind,
1698            self.last_slice_end,
1699            &self.slice[self.last_slice_end..sub_slice_end],
1700        );
1701        self.last_slice_end = sub_slice_end;
1702        self.last_slice_kind = kind;
1703
1704        Some(res)
1705    }
1706}
1707
1708/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1709pub(crate) fn recover_comment_removed(
1710    new: String,
1711    span: Span,
1712    context: &RewriteContext<'_>,
1713) -> String {
1714    let snippet = context.snippet(span);
1715    if snippet != new && changed_comment_content(snippet, &new) {
1716        // We missed some comments. Warn and keep the original text.
1717        if context.config.error_on_unformatted() {
1718            context.report.append(
1719                context.psess.span_to_filename(span),
1720                vec![FormattingError::from_span(
1721                    span,
1722                    context.psess,
1723                    ErrorKind::LostComment,
1724                )],
1725            );
1726        }
1727        snippet.to_owned()
1728    } else {
1729        new
1730    }
1731}
1732
1733pub(crate) fn filter_normal_code(code: &str) -> String {
1734    let mut buffer = String::with_capacity(code.len());
1735    LineClasses::new(code).for_each(|(kind, line)| match kind {
1736        FullCodeCharKind::Normal
1737        | FullCodeCharKind::StartString
1738        | FullCodeCharKind::InString
1739        | FullCodeCharKind::EndString => {
1740            buffer.push_str(&line);
1741            buffer.push('\n');
1742        }
1743        _ => (),
1744    });
1745    if !code.ends_with('\n') && buffer.ends_with('\n') {
1746        buffer.pop();
1747    }
1748    buffer
1749}
1750
1751/// Returns `true` if the two strings of code have the same payload of comments.
1752/// The payload of comments is everything in the string except:
1753/// - actual code (not comments),
1754/// - comment start/end marks,
1755/// - whitespace,
1756/// - '*' at the beginning of lines in block comments.
1757fn changed_comment_content(orig: &str, new: &str) -> bool {
1758    // Cannot write this as a fn since we cannot return types containing closures.
1759    let code_comment_content = |code| {
1760        let slices = UngroupedCommentCodeSlices::new(code);
1761        slices
1762            .filter(|(kind, _, _)| *kind == CodeCharKind::Comment)
1763            .flat_map(|(_, _, s)| CommentReducer::new(s))
1764    };
1765    let res = code_comment_content(orig).ne(code_comment_content(new));
1766    debug!(
1767        "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1768        res,
1769        orig,
1770        new,
1771        code_comment_content(orig).collect::<String>(),
1772        code_comment_content(new).collect::<String>()
1773    );
1774    res
1775}
1776
1777/// Iterator over the 'payload' characters of a comment.
1778/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1779/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1780/// for example).
1781struct CommentReducer<'a> {
1782    is_block: bool,
1783    at_start_line: bool,
1784    iter: std::str::Chars<'a>,
1785}
1786
1787impl<'a> CommentReducer<'a> {
1788    fn new(comment: &'a str) -> CommentReducer<'a> {
1789        let is_block = comment.starts_with("/*");
1790        let comment = remove_comment_header(comment);
1791        CommentReducer {
1792            is_block,
1793            // There are no supplementary '*' on the first line.
1794            at_start_line: false,
1795            iter: comment.chars(),
1796        }
1797    }
1798}
1799
1800impl<'a> Iterator for CommentReducer<'a> {
1801    type Item = char;
1802
1803    fn next(&mut self) -> Option<Self::Item> {
1804        loop {
1805            let mut c = self.iter.next()?;
1806            if self.is_block && self.at_start_line {
1807                while c.is_whitespace() {
1808                    c = self.iter.next()?;
1809                }
1810                // Ignore leading '*'.
1811                if c == '*' {
1812                    c = self.iter.next()?;
1813                }
1814            } else if c == '\n' {
1815                self.at_start_line = true;
1816            }
1817            if !c.is_whitespace() {
1818                return Some(c);
1819            }
1820        }
1821    }
1822}
1823
1824fn remove_comment_header(comment: &str) -> &str {
1825    if comment.starts_with("///") || comment.starts_with("//!") {
1826        &comment[3..]
1827    } else if let Some(stripped) = comment.strip_prefix("//") {
1828        stripped
1829    } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1830        || comment.starts_with("/*!")
1831    {
1832        &comment[3..comment.len() - 2]
1833    } else {
1834        assert!(
1835            comment.starts_with("/*"),
1836            "string '{comment}' is not a comment"
1837        );
1838        &comment[2..comment.len() - 2]
1839    }
1840}
1841
1842#[cfg(test)]
1843mod test {
1844    use super::*;
1845
1846    #[test]
1847    fn char_classes() {
1848        let mut iter = CharClasses::new("//\n\n".chars());
1849
1850        assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1851        assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1852        assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1853        assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1854        assert_eq!(None, iter.next());
1855    }
1856
1857    #[test]
1858    fn comment_code_slices() {
1859        let input = "code(); /* test */ 1 + 1";
1860        let mut iter = CommentCodeSlices::new(input);
1861
1862        assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1863        assert_eq!(
1864            (CodeCharKind::Comment, 8, "/* test */"),
1865            iter.next().unwrap()
1866        );
1867        assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1868        assert_eq!(None, iter.next());
1869    }
1870
1871    #[test]
1872    fn comment_code_slices_two() {
1873        let input = "// comment\n    test();";
1874        let mut iter = CommentCodeSlices::new(input);
1875
1876        assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1877        assert_eq!(
1878            (CodeCharKind::Comment, 0, "// comment\n"),
1879            iter.next().unwrap()
1880        );
1881        assert_eq!(
1882            (CodeCharKind::Normal, 11, "    test();"),
1883            iter.next().unwrap()
1884        );
1885        assert_eq!(None, iter.next());
1886    }
1887
1888    #[test]
1889    fn comment_code_slices_three() {
1890        let input = "1 // comment\n    // comment2\n\n";
1891        let mut iter = CommentCodeSlices::new(input);
1892
1893        assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1894        assert_eq!(
1895            (CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
1896            iter.next().unwrap()
1897        );
1898        assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1899        assert_eq!(None, iter.next());
1900    }
1901
1902    #[test]
1903    #[rustfmt::skip]
1904    fn format_doc_comments() {
1905        let mut wrap_normalize_config: crate::config::Config = Default::default();
1906        wrap_normalize_config.set().wrap_comments(true);
1907        wrap_normalize_config.set().normalize_comments(true);
1908
1909        let mut wrap_config: crate::config::Config = Default::default();
1910        wrap_config.set().wrap_comments(true);
1911
1912        let comment = rewrite_comment(" //test",
1913                                      true,
1914                                      Shape::legacy(100, Indent::new(0, 100)),
1915                                      &wrap_normalize_config).unwrap();
1916        assert_eq!("/* test */", comment);
1917
1918        let comment = rewrite_comment("// comment on a",
1919                                      false,
1920                                      Shape::legacy(10, Indent::empty()),
1921                                      &wrap_normalize_config).unwrap();
1922        assert_eq!("// comment\n// on a", comment);
1923
1924        let comment = rewrite_comment("//  A multi line comment\n             // between args.",
1925                                      false,
1926                                      Shape::legacy(60, Indent::new(0, 12)),
1927                                      &wrap_normalize_config).unwrap();
1928        assert_eq!("//  A multi line comment\n            // between args.", comment);
1929
1930        let input = "// comment";
1931        let expected =
1932            "/* comment */";
1933        let comment = rewrite_comment(input,
1934                                      true,
1935                                      Shape::legacy(9, Indent::new(0, 69)),
1936                                      &wrap_normalize_config).unwrap();
1937        assert_eq!(expected, comment);
1938
1939        let comment = rewrite_comment("/*   trimmed    */",
1940                                      true,
1941                                      Shape::legacy(100, Indent::new(0, 100)),
1942                                      &wrap_normalize_config).unwrap();
1943        assert_eq!("/* trimmed */", comment);
1944
1945        // Check that different comment style are properly recognised.
1946        let comment = rewrite_comment(r#"/// test1
1947                                         /// test2
1948                                         /*
1949                                          * test3
1950                                          */"#,
1951                                      false,
1952                                      Shape::legacy(100, Indent::new(0, 0)),
1953                                      &wrap_normalize_config).unwrap();
1954        assert_eq!("/// test1\n/// test2\n// test3", comment);
1955
1956        // Check that the blank line marks the end of a commented paragraph.
1957        let comment = rewrite_comment(r#"// test1
1958
1959                                         // test2"#,
1960                                      false,
1961                                      Shape::legacy(100, Indent::new(0, 0)),
1962                                      &wrap_normalize_config).unwrap();
1963        assert_eq!("// test1\n\n// test2", comment);
1964
1965        // Check that the blank line marks the end of a custom-commented paragraph.
1966        let comment = rewrite_comment(r#"//@ test1
1967
1968                                         //@ test2"#,
1969                                      false,
1970                                      Shape::legacy(100, Indent::new(0, 0)),
1971                                      &wrap_normalize_config).unwrap();
1972        assert_eq!("//@ test1\n\n//@ test2", comment);
1973
1974        // Check that bare lines are just indented but otherwise left unchanged.
1975        let comment = rewrite_comment(r#"// test1
1976                                         /*
1977                                           a bare line!
1978
1979                                                another bare line!
1980                                          */"#,
1981                                      false,
1982                                      Shape::legacy(100, Indent::new(0, 0)),
1983                                      &wrap_config).unwrap();
1984        assert_eq!("// test1\n/*\n a bare line!\n\n      another bare line!\n*/", comment);
1985    }
1986
1987    // This is probably intended to be a non-test fn, but it is not used.
1988    // We should keep this around unless it helps us test stuff to remove it.
1989    fn uncommented(text: &str) -> String {
1990        CharClasses::new(text.chars())
1991            .filter_map(|(s, c)| match s {
1992                FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1993                _ => None,
1994            })
1995            .collect()
1996    }
1997
1998    #[test]
1999    fn test_uncommented() {
2000        assert_eq!(&uncommented("abc/*...*/"), "abc");
2001        assert_eq!(
2002            &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
2003            "..ac\n"
2004        );
2005        assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
2006    }
2007
2008    #[test]
2009    fn test_contains_comment() {
2010        assert_eq!(contains_comment("abc"), false);
2011        assert_eq!(contains_comment("abc // qsdf"), true);
2012        assert_eq!(contains_comment("abc /* kqsdf"), true);
2013        assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
2014    }
2015
2016    #[test]
2017    fn test_find_uncommented() {
2018        fn check(haystack: &str, needle: &str, expected: Option<usize>) {
2019            assert_eq!(expected, haystack.find_uncommented(needle));
2020        }
2021
2022        check("/*/ */test", "test", Some(6));
2023        check("//test\ntest", "test", Some(7));
2024        check("/* comment only */", "whatever", None);
2025        check(
2026            "/* comment */ some text /* more commentary */ result",
2027            "result",
2028            Some(46),
2029        );
2030        check("sup // sup", "p", Some(2));
2031        check("sup", "x", None);
2032        check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
2033        check("/*sup yo? \n sup*/ sup", "p", Some(20));
2034        check("hel/*lohello*/lo", "hello", None);
2035        check("acb", "ab", None);
2036        check(",/*A*/ ", ",", Some(0));
2037        check("abc", "abc", Some(0));
2038        check("/* abc */", "abc", None);
2039        check("/**/abc/* */", "abc", Some(4));
2040        check("\"/* abc */\"", "abc", Some(4));
2041        check("\"/* abc", "abc", Some(4));
2042    }
2043
2044    #[test]
2045    fn test_filter_normal_code() {
2046        let s = r#"
2047fn main() {
2048    println!("hello, world");
2049}
2050"#;
2051        assert_eq!(s, filter_normal_code(s));
2052        let s_with_comment = r#"
2053fn main() {
2054    // hello, world
2055    println!("hello, world");
2056}
2057"#;
2058        assert_eq!(s, filter_normal_code(s_with_comment));
2059    }
2060
2061    #[test]
2062    fn test_itemized_block_first_line_handling() {
2063        fn run_test(
2064            test_input: &str,
2065            expected_line: &str,
2066            expected_indent: usize,
2067            expected_opener: &str,
2068            expected_line_start: &str,
2069        ) {
2070            let block = ItemizedBlock::new(test_input).unwrap();
2071            assert_eq!(1, block.lines.len(), "test_input: {test_input:?}");
2072            assert_eq!(expected_line, &block.lines[0], "test_input: {test_input:?}");
2073            assert_eq!(expected_indent, block.indent, "test_input: {test_input:?}");
2074            assert_eq!(expected_opener, &block.opener, "test_input: {test_input:?}");
2075            assert_eq!(
2076                expected_line_start, &block.line_start,
2077                "test_input: {test_input:?}"
2078            );
2079        }
2080
2081        run_test("- foo", "foo", 2, "- ", "  ");
2082        run_test("* foo", "foo", 2, "* ", "  ");
2083        run_test("> foo", "foo", 2, "> ", "> ");
2084
2085        run_test("1. foo", "foo", 3, "1. ", "   ");
2086        run_test("12. foo", "foo", 4, "12. ", "    ");
2087        run_test("1) foo", "foo", 3, "1) ", "   ");
2088        run_test("12) foo", "foo", 4, "12) ", "    ");
2089
2090        run_test("    - foo", "foo", 6, "    - ", "      ");
2091
2092        // https://spec.commonmark.org/0.30 says: "A start number may begin with 0s":
2093        run_test("0. foo", "foo", 3, "0. ", "   ");
2094        run_test("01. foo", "foo", 4, "01. ", "    ");
2095    }
2096
2097    #[test]
2098    fn test_itemized_block_nonobvious_markers_are_rejected() {
2099        let test_inputs = vec![
2100            // Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
2101            // https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
2102            // them would risk misidentifying regular words as item markers. See also the
2103            // discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2104            "word.  rest of the paragraph.",
2105            "a.  maybe this is a list item?  maybe not?",
2106            "iv.  maybe this is a list item?  maybe not?",
2107            // Numbers with 3 or more digits are not recognized as item markers, to avoid
2108            // formatting the following example as a list:
2109            //
2110            // ```
2111            // The Captain died in
2112            // 1868. He was buried in...
2113            // ```
2114            "123.  only 2-digit numbers are recognized as item markers.",
2115            // Parens:
2116            "123)  giving some coverage to parens as well.",
2117            "a)  giving some coverage to parens as well.",
2118            // https://spec.commonmark.org/0.30 says that "at least one space or tab is needed
2119            // between the list marker and any following content":
2120            "1.Not a list item.",
2121            "1.2.3. Not a list item.",
2122            "1)Not a list item.",
2123            "-Not a list item.",
2124            "+Not a list item.",
2125            "+1 not a list item.",
2126            // https://spec.commonmark.org/0.30 says: "A start number may not be negative":
2127            "-1. Not a list item.",
2128            "-1 Not a list item.",
2129            // Marker without prefix are not recognized as item markers:
2130            ".   Not a list item.",
2131            ")   Not a list item.",
2132        ];
2133        for line in test_inputs.iter() {
2134            let maybe_block = ItemizedBlock::new(line);
2135            assert!(
2136                maybe_block.is_none(),
2137                "The following line shouldn't be classified as a list item: {line}"
2138            );
2139        }
2140    }
2141}
rustfmt_nightly/comment.rs

rustfmt_nightly/
comment.rs