rustfmt_nightly/
comment.rs

1// Formatting and tools for comments.
2
3use std::{borrow::Cow, iter};
4
5use itertools::{Itertools as _, MultiPeek, multipeek};
6use rustc_span::Span;
7use tracing::{debug, trace};
8
9use crate::config::Config;
10use crate::rewrite::{RewriteContext, RewriteErrorExt, RewriteResult};
11use crate::shape::{Indent, Shape};
12use crate::string::{StringFormat, rewrite_string};
13use crate::utils::{
14    count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
15    trimmed_last_line_width, unicode_str_width,
16};
17use crate::{ErrorKind, FormattingError};
18
19fn is_custom_comment(comment: &str) -> bool {
20    if !comment.starts_with("//") {
21        false
22    } else if let Some(c) = comment.chars().nth(2) {
23        !c.is_alphanumeric() && !c.is_whitespace()
24    } else {
25        false
26    }
27}
28
29#[derive(Copy, Clone, PartialEq, Eq)]
30pub(crate) enum CommentStyle<'a> {
31    DoubleSlash,
32    TripleSlash,
33    Doc,
34    SingleBullet,
35    DoubleBullet,
36    Exclamation,
37    Custom(&'a str),
38}
39
40fn custom_opener(s: &str) -> &str {
41    s.lines().next().map_or("", |first_line| {
42        first_line
43            .find(' ')
44            .map_or(first_line, |space_index| &first_line[0..=space_index])
45    })
46}
47
48impl<'a> CommentStyle<'a> {
49    /// Returns `true` if the commenting style cannot span multiple lines.
50    pub(crate) fn is_line_comment(&self) -> bool {
51        matches!(
52            self,
53            CommentStyle::DoubleSlash
54                | CommentStyle::TripleSlash
55                | CommentStyle::Doc
56                | CommentStyle::Custom(_)
57        )
58    }
59
60    /// Returns `true` if the commenting style can span multiple lines.
61    pub(crate) fn is_block_comment(&self) -> bool {
62        matches!(
63            self,
64            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation
65        )
66    }
67
68    /// Returns `true` if the commenting style is for documentation.
69    pub(crate) fn is_doc_comment(&self) -> bool {
70        matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
71    }
72
73    pub(crate) fn opener(&self) -> &'a str {
74        match *self {
75            CommentStyle::DoubleSlash => "// ",
76            CommentStyle::TripleSlash => "/// ",
77            CommentStyle::Doc => "//! ",
78            CommentStyle::SingleBullet => "/* ",
79            CommentStyle::DoubleBullet => "/** ",
80            CommentStyle::Exclamation => "/*! ",
81            CommentStyle::Custom(opener) => opener,
82        }
83    }
84
85    pub(crate) fn closer(&self) -> &'a str {
86        match *self {
87            CommentStyle::DoubleSlash
88            | CommentStyle::TripleSlash
89            | CommentStyle::Custom(..)
90            | CommentStyle::Doc => "",
91            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
92                " */"
93            }
94        }
95    }
96
97    pub(crate) fn line_start(&self) -> &'a str {
98        match *self {
99            CommentStyle::DoubleSlash => "// ",
100            CommentStyle::TripleSlash => "/// ",
101            CommentStyle::Doc => "//! ",
102            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
103                " * "
104            }
105            CommentStyle::Custom(opener) => opener,
106        }
107    }
108
109    pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
110        (self.opener(), self.closer(), self.line_start())
111    }
112}
113
114pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
115    if !normalize_comments {
116        if orig.starts_with("/**") && !orig.starts_with("/**/") {
117            CommentStyle::DoubleBullet
118        } else if orig.starts_with("/*!") {
119            CommentStyle::Exclamation
120        } else if orig.starts_with("/*") {
121            CommentStyle::SingleBullet
122        } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
123            CommentStyle::TripleSlash
124        } else if orig.starts_with("//!") {
125            CommentStyle::Doc
126        } else if is_custom_comment(orig) {
127            CommentStyle::Custom(custom_opener(orig))
128        } else {
129            CommentStyle::DoubleSlash
130        }
131    } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
132        || (orig.starts_with("/**") && !orig.starts_with("/**/"))
133    {
134        CommentStyle::TripleSlash
135    } else if orig.starts_with("//!") || orig.starts_with("/*!") {
136        CommentStyle::Doc
137    } else if is_custom_comment(orig) {
138        CommentStyle::Custom(custom_opener(orig))
139    } else {
140        CommentStyle::DoubleSlash
141    }
142}
143
144/// Returns true if the last line of the passed string finishes with a block-comment.
145pub(crate) fn is_last_comment_block(s: &str) -> bool {
146    s.trim_end().ends_with("*/")
147}
148
149/// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
150/// comments between two strings. If there are such comments, then that will be
151/// recovered. If `allow_extend` is true and there is no comment between the two
152/// strings, then they will be put on a single line as long as doing so does not
153/// exceed max width.
154pub(crate) fn combine_strs_with_missing_comments(
155    context: &RewriteContext<'_>,
156    prev_str: &str,
157    next_str: &str,
158    span: Span,
159    shape: Shape,
160    allow_extend: bool,
161) -> RewriteResult {
162    trace!(
163        "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
164        prev_str, next_str, span, shape
165    );
166
167    let mut result =
168        String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
169    result.push_str(prev_str);
170    let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
171    let first_sep =
172        if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
173            ""
174        } else {
175            " "
176        };
177    let mut one_line_width =
178        last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
179
180    let config = context.config;
181    let indent = shape.indent;
182    let missing_comment = rewrite_missing_comment(span, shape, context)?;
183
184    if missing_comment.is_empty() {
185        if allow_extend && one_line_width <= shape.width {
186            result.push_str(first_sep);
187        } else if !prev_str.is_empty() {
188            result.push_str(&indent.to_string_with_newline(config))
189        }
190        result.push_str(next_str);
191        return Ok(result);
192    }
193
194    // We have a missing comment between the first expression and the second expression.
195
196    // Peek the original source code and find out whether there is a newline between the first
197    // expression and the second expression or the missing comment. We will preserve the original
198    // layout whenever possible.
199    let original_snippet = context.snippet(span);
200    let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
201        !original_snippet[..pos].contains('\n')
202    } else {
203        !original_snippet.contains('\n')
204    };
205
206    one_line_width -= first_sep.len();
207    let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
208        Cow::from("")
209    } else {
210        let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
211        if prefer_same_line && one_line_width <= shape.width {
212            Cow::from(" ")
213        } else {
214            indent.to_string_with_newline(config)
215        }
216    };
217    result.push_str(&first_sep);
218    result.push_str(&missing_comment);
219
220    let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
221        Cow::from("")
222    } else if missing_comment.starts_with("//") {
223        indent.to_string_with_newline(config)
224    } else {
225        one_line_width += missing_comment.len() + first_sep.len() + 1;
226        allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
227        if prefer_same_line && allow_one_line && one_line_width <= shape.width {
228            Cow::from(" ")
229        } else {
230            indent.to_string_with_newline(config)
231        }
232    };
233    result.push_str(&second_sep);
234    result.push_str(next_str);
235
236    Ok(result)
237}
238
239pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> RewriteResult {
240    identify_comment(orig, false, shape, config, true)
241}
242
243pub(crate) fn rewrite_comment(
244    orig: &str,
245    block_style: bool,
246    shape: Shape,
247    config: &Config,
248) -> RewriteResult {
249    identify_comment(orig, block_style, shape, config, false)
250}
251
252fn identify_comment(
253    orig: &str,
254    block_style: bool,
255    shape: Shape,
256    config: &Config,
257    is_doc_comment: bool,
258) -> RewriteResult {
259    let style = comment_style(orig, false);
260
261    // Computes the byte length of line taking into account a newline if the line is part of a
262    // paragraph.
263    fn compute_len(orig: &str, line: &str) -> usize {
264        if orig.len() > line.len() {
265            if orig.as_bytes()[line.len()] == b'\r' {
266                line.len() + 2
267            } else {
268                line.len() + 1
269            }
270        } else {
271            line.len()
272        }
273    }
274
275    // Get the first group of line comments having the same commenting style.
276    //
277    // Returns a tuple with:
278    // - a boolean indicating if there is a blank line
279    // - a number indicating the size of the first group of comments
280    fn consume_same_line_comments(
281        style: CommentStyle<'_>,
282        orig: &str,
283        line_start: &str,
284    ) -> (bool, usize) {
285        let mut first_group_ending = 0;
286        let mut hbl = false;
287
288        for line in orig.lines() {
289            let trimmed_line = line.trim_start();
290            if trimmed_line.is_empty() {
291                hbl = true;
292                break;
293            } else if trimmed_line.starts_with(line_start)
294                || comment_style(trimmed_line, false) == style
295            {
296                first_group_ending += compute_len(&orig[first_group_ending..], line);
297            } else {
298                break;
299            }
300        }
301        (hbl, first_group_ending)
302    }
303
304    let (has_bare_lines, first_group_ending) = match style {
305        CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
306            let line_start = style.line_start().trim_start();
307            consume_same_line_comments(style, orig, line_start)
308        }
309        CommentStyle::Custom(opener) => {
310            let trimmed_opener = opener.trim_end();
311            consume_same_line_comments(style, orig, trimmed_opener)
312        }
313        // for a block comment, search for the closing symbol
314        CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
315            let closer = style.closer().trim_start();
316            let mut count = orig.matches(closer).count();
317            let mut closing_symbol_offset = 0;
318            let mut hbl = false;
319            let mut first = true;
320            for line in orig.lines() {
321                closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
322                let mut trimmed_line = line.trim_start();
323                if !trimmed_line.starts_with('*')
324                    && !trimmed_line.starts_with("//")
325                    && !trimmed_line.starts_with("/*")
326                {
327                    hbl = true;
328                }
329
330                // Remove opener from consideration when searching for closer
331                if first {
332                    let opener = style.opener().trim_end();
333                    trimmed_line = &trimmed_line[opener.len()..];
334                    first = false;
335                }
336                if trimmed_line.ends_with(closer) {
337                    count -= 1;
338                    if count == 0 {
339                        break;
340                    }
341                }
342            }
343            (hbl, closing_symbol_offset)
344        }
345    };
346
347    let (first_group, rest) = orig.split_at(first_group_ending);
348    let rewritten_first_group =
349        if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
350            trim_left_preserve_layout(first_group, shape.indent, config).unknown_error()?
351        } else if !config.normalize_comments()
352            && !config.wrap_comments()
353            && !(
354                // `format_code_in_doc_comments` should only take effect on doc comments,
355                // so we only consider it when this comment block is a doc comment block.
356                is_doc_comment && config.format_code_in_doc_comments()
357            )
358        {
359            light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
360        } else {
361            rewrite_comment_inner(
362                first_group,
363                block_style,
364                style,
365                shape,
366                config,
367                is_doc_comment || style.is_doc_comment(),
368            )?
369        };
370    if rest.is_empty() {
371        Ok(rewritten_first_group)
372    } else {
373        identify_comment(
374            rest.trim_start(),
375            block_style,
376            shape,
377            config,
378            is_doc_comment,
379        )
380        .map(|rest_str| {
381            format!(
382                "{}\n{}{}{}",
383                rewritten_first_group,
384                // insert back the blank line
385                if has_bare_lines && style.is_line_comment() {
386                    "\n"
387                } else {
388                    ""
389                },
390                shape.indent.to_string(config),
391                rest_str
392            )
393        })
394    }
395}
396
397/// Enum indicating if the code block contains rust based on attributes
398enum CodeBlockAttribute {
399    Rust,
400    NotRust,
401}
402
403impl CodeBlockAttribute {
404    /// Parse comma separated attributes list. Return rust only if all
405    /// attributes are valid rust attributes
406    /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
407    fn new(attributes: &str) -> CodeBlockAttribute {
408        for attribute in attributes.split(',') {
409            match attribute.trim() {
410                "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
411                | "edition2021" => (),
412                "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
413                _ => return CodeBlockAttribute::NotRust,
414            }
415        }
416        CodeBlockAttribute::Rust
417    }
418}
419
420/// Block that is formatted as an item.
421///
422/// An item starts with either a star `*`, a dash `-`, a greater-than `>`, a plus '+', or a number
423/// `12.` or `34)` (with at most 2 digits). An item represents CommonMark's ["list
424/// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
425/// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
426/// CommonMark is recognized - see the doc comment of [`ItemizedBlock::get_marker_length`] for more
427/// details.
428///
429/// Different level of indentation are handled by shrinking the shape accordingly.
430struct ItemizedBlock {
431    /// the lines that are identified as part of an itemized block
432    lines: Vec<String>,
433    /// the number of characters (typically whitespaces) up to the item marker
434    indent: usize,
435    /// the string that marks the start of an item
436    opener: String,
437    /// sequence of characters (typically whitespaces) to prefix new lines that are part of the item
438    line_start: String,
439}
440
441impl ItemizedBlock {
442    /// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
443    /// marker. Returns the length of the marker (in bytes) if one is present. Note that the length
444    /// includes the whitespace that follows the marker, for example the marker in `"* list item"`
445    /// has the length of 2.
446    ///
447    /// This function recognizes item markers that correspond to CommonMark's
448    /// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
449    /// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
450    /// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
451    ///
452    /// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
453    /// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
454    /// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
455    /// arbitrary numbers as markers. See also
456    /// <https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990> which gives the
457    /// following example where a number (i.e. "1868") doesn't signify an ordered list:
458    /// ```md
459    /// The Captain died in
460    /// 1868. He wes buried in...
461    /// ```
462    fn get_marker_length(trimmed: &str) -> Option<usize> {
463        // https://spec.commonmark.org/0.30/#bullet-list-marker or
464        // https://spec.commonmark.org/0.30/#block-quote-marker
465        let itemized_start = ["* ", "- ", "> ", "+ "];
466        if itemized_start.iter().any(|s| trimmed.starts_with(s)) {
467            return Some(2); // All items in `itemized_start` have length 2.
468        }
469
470        // https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
471        // allowed.
472        for suffix in [". ", ") "] {
473            if let Some((prefix, _)) = trimmed.split_once(suffix) {
474                let has_leading_digits = (1..=2).contains(&prefix.len())
475                    && prefix.chars().all(|c| char::is_ascii_digit(&c));
476                if has_leading_digits {
477                    return Some(prefix.len() + suffix.len());
478                }
479            }
480        }
481
482        None // No markers found.
483    }
484
485    /// Creates a new `ItemizedBlock` described with the given `line`.
486    /// Returns `None` if `line` doesn't start an item.
487    fn new(line: &str) -> Option<ItemizedBlock> {
488        let marker_length = ItemizedBlock::get_marker_length(line.trim_start())?;
489        let space_to_marker = line.chars().take_while(|c| c.is_whitespace()).count();
490        let mut indent = space_to_marker + marker_length;
491        let mut line_start = " ".repeat(indent);
492
493        // Markdown blockquote start with a "> "
494        if line.trim_start().starts_with('>') {
495            // remove the original +2 indent because there might be multiple nested block quotes
496            // and it's easier to reason about the final indent by just taking the length
497            // of the new line_start. We update the indent because it effects the max width
498            // of each formatted line.
499            line_start = itemized_block_quote_start(line, line_start, 2);
500            indent = line_start.len();
501        }
502        Some(ItemizedBlock {
503            lines: vec![line[indent..].to_string()],
504            indent,
505            opener: line[..indent].to_string(),
506            line_start,
507        })
508    }
509
510    /// Returns a `StringFormat` used for formatting the content of an item.
511    fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
512        StringFormat {
513            opener: "",
514            closer: "",
515            line_start: "",
516            line_end: "",
517            shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
518            trim_end: true,
519            config: fmt.config,
520        }
521    }
522
523    /// Returns `true` if the line is part of the current itemized block.
524    /// If it is, then it is added to the internal lines list.
525    fn add_line(&mut self, line: &str) -> bool {
526        if ItemizedBlock::get_marker_length(line.trim_start()).is_none()
527            && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
528        {
529            self.lines.push(line.to_string());
530            return true;
531        }
532        false
533    }
534
535    /// Returns the block as a string, with each line trimmed at the start.
536    fn trimmed_block_as_string(&self) -> String {
537        self.lines.iter().fold(String::new(), |mut acc, line| {
538            acc.push_str(line.trim_start());
539            acc.push(' ');
540            acc
541        })
542    }
543
544    /// Returns the block as a string under its original form.
545    fn original_block_as_string(&self) -> String {
546        self.lines.join("\n")
547    }
548}
549
550/// Determine the line_start when formatting markdown block quotes.
551/// The original line_start likely contains indentation (whitespaces), which we'd like to
552/// replace with '> ' characters.
553fn itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String {
554    let quote_level = line
555        .chars()
556        .take_while(|c| !c.is_alphanumeric())
557        .fold(0, |acc, c| if c == '>' { acc + 1 } else { acc });
558
559    for _ in 0..remove_indent {
560        line_start.pop();
561    }
562
563    for _ in 0..quote_level {
564        line_start.push_str("> ")
565    }
566    line_start
567}
568
569struct CommentRewrite<'a> {
570    result: String,
571    code_block_buffer: String,
572    is_prev_line_multi_line: bool,
573    code_block_attr: Option<CodeBlockAttribute>,
574    item_block: Option<ItemizedBlock>,
575    comment_line_separator: String,
576    indent_str: String,
577    max_width: usize,
578    fmt_indent: Indent,
579    fmt: StringFormat<'a>,
580
581    opener: String,
582    closer: String,
583    line_start: String,
584    style: CommentStyle<'a>,
585}
586
587impl<'a> CommentRewrite<'a> {
588    fn new(
589        orig: &'a str,
590        block_style: bool,
591        shape: Shape,
592        config: &'a Config,
593    ) -> CommentRewrite<'a> {
594        let ((opener, closer, line_start), style) = if block_style {
595            (
596                CommentStyle::SingleBullet.to_str_tuplet(),
597                CommentStyle::SingleBullet,
598            )
599        } else {
600            let style = comment_style(orig, config.normalize_comments());
601            (style.to_str_tuplet(), style)
602        };
603
604        let max_width = shape
605            .width
606            .checked_sub(closer.len() + opener.len())
607            .unwrap_or(1);
608        let indent_str = shape.indent.to_string_with_newline(config).to_string();
609
610        let mut cr = CommentRewrite {
611            result: String::with_capacity(orig.len() * 2),
612            code_block_buffer: String::with_capacity(128),
613            is_prev_line_multi_line: false,
614            code_block_attr: None,
615            item_block: None,
616            comment_line_separator: format!("{indent_str}{line_start}"),
617            max_width,
618            indent_str,
619            fmt_indent: shape.indent,
620
621            fmt: StringFormat {
622                opener: "",
623                closer: "",
624                line_start,
625                line_end: "",
626                shape: Shape::legacy(max_width, shape.indent),
627                trim_end: true,
628                config,
629            },
630
631            opener: opener.to_owned(),
632            closer: closer.to_owned(),
633            line_start: line_start.to_owned(),
634            style,
635        };
636        cr.result.push_str(opener);
637        cr
638    }
639
640    fn join_block(s: &str, sep: &str) -> String {
641        let mut result = String::with_capacity(s.len() + 128);
642        let mut iter = s.lines().peekable();
643        while let Some(line) = iter.next() {
644            result.push_str(line);
645            result.push_str(match iter.peek() {
646                Some(&"") => sep.trim_end(),
647                Some(..) => sep,
648                None => "",
649            });
650        }
651        result
652    }
653
654    /// Check if any characters were written to the result buffer after the start of the comment.
655    /// when calling [`CommentRewrite::new()`] the result buffer is initialized with the opening
656    /// characters for the comment.
657    fn buffer_contains_comment(&self) -> bool {
658        // if self.result.len() < self.opener.len() then an empty comment is in the buffer
659        // if self.result.len() > self.opener.len() then a non empty comment is in the buffer
660        self.result.len() != self.opener.len()
661    }
662
663    fn finish(mut self) -> String {
664        if !self.code_block_buffer.is_empty() {
665            // There is a code block that is not properly enclosed by backticks.
666            // We will leave them untouched.
667            self.result.push_str(&self.comment_line_separator);
668            self.result.push_str(&Self::join_block(
669                &trim_custom_comment_prefix(&self.code_block_buffer),
670                &self.comment_line_separator,
671            ));
672        }
673
674        if let Some(ref ib) = self.item_block {
675            // the last few lines are part of an itemized block
676            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
677            let item_fmt = ib.create_string_format(&self.fmt);
678
679            // only push a comment_line_separator for ItemizedBlocks if the comment is not empty
680            if self.buffer_contains_comment() {
681                self.result.push_str(&self.comment_line_separator);
682            }
683
684            self.result.push_str(&ib.opener);
685            match rewrite_string(
686                &ib.trimmed_block_as_string(),
687                &item_fmt,
688                self.max_width.saturating_sub(ib.indent),
689            ) {
690                Some(s) => self.result.push_str(&Self::join_block(
691                    &s,
692                    &format!("{}{}", self.comment_line_separator, ib.line_start),
693                )),
694                None => self.result.push_str(&Self::join_block(
695                    &ib.original_block_as_string(),
696                    &self.comment_line_separator,
697                )),
698            };
699        }
700
701        self.result.push_str(&self.closer);
702        if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
703            // Trailing space.
704            self.result.pop();
705        }
706
707        self.result
708    }
709
710    fn handle_line(
711        &mut self,
712        orig: &'a str,
713        i: usize,
714        line: &'a str,
715        has_leading_whitespace: bool,
716        is_doc_comment: bool,
717    ) -> bool {
718        let num_newlines = count_newlines(orig);
719        let is_last = i == num_newlines;
720        let needs_new_comment_line = if self.style.is_block_comment() {
721            num_newlines > 0 || self.buffer_contains_comment()
722        } else {
723            self.buffer_contains_comment()
724        };
725
726        if let Some(ref mut ib) = self.item_block {
727            if ib.add_line(line) {
728                return false;
729            }
730            self.is_prev_line_multi_line = false;
731            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
732            let item_fmt = ib.create_string_format(&self.fmt);
733
734            // only push a comment_line_separator if we need to start a new comment line
735            if needs_new_comment_line {
736                self.result.push_str(&self.comment_line_separator);
737            }
738
739            self.result.push_str(&ib.opener);
740            match rewrite_string(
741                &ib.trimmed_block_as_string(),
742                &item_fmt,
743                self.max_width.saturating_sub(ib.indent),
744            ) {
745                Some(s) => self.result.push_str(&Self::join_block(
746                    &s,
747                    &format!("{}{}", self.comment_line_separator, ib.line_start),
748                )),
749                None => self.result.push_str(&Self::join_block(
750                    &ib.original_block_as_string(),
751                    &self.comment_line_separator,
752                )),
753            };
754        } else if self.code_block_attr.is_some() {
755            if line.starts_with("```") {
756                let code_block = match self.code_block_attr.as_ref().unwrap() {
757                    CodeBlockAttribute::Rust
758                        if self.fmt.config.format_code_in_doc_comments()
759                            && !self.code_block_buffer.trim().is_empty() =>
760                    {
761                        let mut config = self.fmt.config.clone();
762                        config.set().wrap_comments(false);
763                        let comment_max_width = config
764                            .doc_comment_code_block_width()
765                            .min(config.max_width());
766                        config.set().max_width(comment_max_width);
767                        if let Some(s) =
768                            crate::format_code_block(&self.code_block_buffer, &config, false)
769                        {
770                            trim_custom_comment_prefix(&s.snippet)
771                        } else {
772                            trim_custom_comment_prefix(&self.code_block_buffer)
773                        }
774                    }
775                    _ => trim_custom_comment_prefix(&self.code_block_buffer),
776                };
777                if !code_block.is_empty() {
778                    self.result.push_str(&self.comment_line_separator);
779                    self.result
780                        .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
781                }
782                self.code_block_buffer.clear();
783                self.result.push_str(&self.comment_line_separator);
784                self.result.push_str(line);
785                self.code_block_attr = None;
786            } else {
787                self.code_block_buffer
788                    .push_str(&hide_sharp_behind_comment(line));
789                self.code_block_buffer.push('\n');
790            }
791            return false;
792        }
793
794        self.code_block_attr = None;
795        self.item_block = None;
796        if let Some(stripped) = line.strip_prefix("```") {
797            self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
798        } else if self.fmt.config.wrap_comments() {
799            if let Some(ib) = ItemizedBlock::new(line) {
800                self.item_block = Some(ib);
801                return false;
802            }
803        }
804
805        if self.result == self.opener {
806            let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
807            if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
808                self.result.pop();
809            }
810            if line.is_empty() {
811                return false;
812            }
813        } else if self.is_prev_line_multi_line && !line.is_empty() {
814            self.result.push(' ')
815        } else if is_last && line.is_empty() {
816            // trailing blank lines are unwanted
817            if !self.closer.is_empty() {
818                self.result.push_str(&self.indent_str);
819            }
820            return true;
821        } else {
822            self.result.push_str(&self.comment_line_separator);
823            if !has_leading_whitespace && self.result.ends_with(' ') {
824                self.result.pop();
825            }
826        }
827
828        let is_markdown_header_doc_comment = is_doc_comment && line.starts_with('#');
829
830        // We only want to wrap the comment if:
831        // 1) wrap_comments = true is configured
832        // 2) The comment is not the start of a markdown header doc comment
833        // 3) The comment width exceeds the shape's width
834        // 4) No URLS were found in the comment
835        // If this changes, the documentation in ../Configurations.md#wrap_comments
836        // should be changed accordingly.
837        let should_wrap_comment = self.fmt.config.wrap_comments()
838            && !is_markdown_header_doc_comment
839            && unicode_str_width(line) > self.fmt.shape.width
840            && !has_url(line)
841            && !is_table_item(line);
842
843        if should_wrap_comment {
844            match rewrite_string(line, &self.fmt, self.max_width) {
845                Some(ref s) => {
846                    self.is_prev_line_multi_line = s.contains('\n');
847                    self.result.push_str(s);
848                }
849                None if self.is_prev_line_multi_line => {
850                    // We failed to put the current `line` next to the previous `line`.
851                    // Remove the trailing space, then start rewrite on the next line.
852                    self.result.pop();
853                    self.result.push_str(&self.comment_line_separator);
854                    self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
855                    match rewrite_string(line, &self.fmt, self.max_width) {
856                        Some(ref s) => {
857                            self.is_prev_line_multi_line = s.contains('\n');
858                            self.result.push_str(s);
859                        }
860                        None => {
861                            self.is_prev_line_multi_line = false;
862                            self.result.push_str(line);
863                        }
864                    }
865                }
866                None => {
867                    self.is_prev_line_multi_line = false;
868                    self.result.push_str(line);
869                }
870            }
871
872            self.fmt.shape = if self.is_prev_line_multi_line {
873                // 1 = " "
874                let offset = 1 + last_line_width(&self.result) - self.line_start.len();
875                Shape {
876                    width: self.max_width.saturating_sub(offset),
877                    indent: self.fmt_indent,
878                    offset: self.fmt.shape.offset + offset,
879                }
880            } else {
881                Shape::legacy(self.max_width, self.fmt_indent)
882            };
883        } else {
884            if line.is_empty() && self.result.ends_with(' ') && !is_last {
885                // Remove space if this is an empty comment or a doc comment.
886                self.result.pop();
887            }
888            self.result.push_str(line);
889            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
890            self.is_prev_line_multi_line = false;
891        }
892
893        false
894    }
895}
896
897fn rewrite_comment_inner(
898    orig: &str,
899    block_style: bool,
900    style: CommentStyle<'_>,
901    shape: Shape,
902    config: &Config,
903    is_doc_comment: bool,
904) -> RewriteResult {
905    let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
906
907    let line_breaks = count_newlines(orig.trim_end());
908    let lines = orig
909        .lines()
910        .enumerate()
911        .map(|(i, mut line)| {
912            line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
913            // Drop old closer.
914            if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
915                line = line[..(line.len() - 2)].trim_end();
916            }
917
918            line
919        })
920        .map(|s| left_trim_comment_line(s, &style))
921        .map(|(line, has_leading_whitespace)| {
922            if orig.starts_with("/*") && line_breaks == 0 {
923                (
924                    line.trim_start(),
925                    has_leading_whitespace || config.normalize_comments(),
926                )
927            } else {
928                (line, has_leading_whitespace || config.normalize_comments())
929            }
930        });
931
932    for (i, (line, has_leading_whitespace)) in lines.enumerate() {
933        if rewriter.handle_line(orig, i, line, has_leading_whitespace, is_doc_comment) {
934            break;
935        }
936    }
937
938    Ok(rewriter.finish())
939}
940
941const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
942
943fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
944    let s_trimmed = s.trim();
945    if s_trimmed.starts_with("# ") || s_trimmed == "#" {
946        Cow::from(format!("{RUSTFMT_CUSTOM_COMMENT_PREFIX}{s}"))
947    } else {
948        Cow::from(s)
949    }
950}
951
952fn trim_custom_comment_prefix(s: &str) -> String {
953    s.lines()
954        .map(|line| {
955            let left_trimmed = line.trim_start();
956            if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
957                left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
958            } else {
959                line
960            }
961        })
962        .collect::<Vec<_>>()
963        .join("\n")
964}
965
966/// Returns `true` if the given string MAY include URLs or alike.
967fn has_url(s: &str) -> bool {
968    // A regex matching reference doc links.
969    //
970    // ```markdown
971    // /// An [example].
972    // ///
973    // /// [example]: this::is::a::link
974    // ```
975    let reference_link_url = static_regex!(r"^\[.+\]\s?:");
976
977    // This function may return false positive, but should get its job done in most cases.
978    s.contains("https://")
979        || s.contains("http://")
980        || s.contains("ftp://")
981        || s.contains("file://")
982        || reference_link_url.is_match(s)
983}
984
985/// Returns true if the given string may be part of a Markdown table.
986fn is_table_item(mut s: &str) -> bool {
987    // This function may return false positive, but should get its job done in most cases (i.e.
988    // markdown tables with two column delimiters).
989    s = s.trim_start();
990    return s.starts_with('|')
991        && match s.rfind('|') {
992            Some(0) | None => false,
993            _ => true,
994        };
995}
996
997/// Given the span, rewrite the missing comment inside it if available.
998/// Note that the given span must only include comments (or leading/trailing whitespaces).
999pub(crate) fn rewrite_missing_comment(
1000    span: Span,
1001    shape: Shape,
1002    context: &RewriteContext<'_>,
1003) -> RewriteResult {
1004    let missing_snippet = context.snippet(span);
1005    let trimmed_snippet = missing_snippet.trim();
1006    // check the span starts with a comment
1007    let pos = trimmed_snippet.find('/');
1008    if !trimmed_snippet.is_empty() && pos.is_some() {
1009        rewrite_comment(trimmed_snippet, false, shape, context.config)
1010    } else {
1011        Ok(String::new())
1012    }
1013}
1014
1015/// Recover the missing comments in the specified span, if available.
1016/// The layout of the comments will be preserved as long as it does not break the code
1017/// and its total width does not exceed the max width.
1018pub(crate) fn recover_missing_comment_in_span(
1019    span: Span,
1020    shape: Shape,
1021    context: &RewriteContext<'_>,
1022    used_width: usize,
1023) -> RewriteResult {
1024    let missing_comment = rewrite_missing_comment(span, shape, context)?;
1025    if missing_comment.is_empty() {
1026        Ok(String::new())
1027    } else {
1028        let missing_snippet = context.snippet(span);
1029        let pos = missing_snippet.find('/').unknown_error()?;
1030        // 1 = ` `
1031        let total_width = missing_comment.len() + used_width + 1;
1032        let force_new_line_before_comment =
1033            missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
1034        let sep = if force_new_line_before_comment {
1035            shape.indent.to_string_with_newline(context.config)
1036        } else {
1037            Cow::from(" ")
1038        };
1039        Ok(format!("{sep}{missing_comment}"))
1040    }
1041}
1042
1043/// Trim trailing whitespaces unless they consist of two or more whitespaces.
1044fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
1045    if is_doc_comment && s.ends_with("  ") {
1046        s
1047    } else {
1048        s.trim_end()
1049    }
1050}
1051
1052/// Trims whitespace and aligns to indent, but otherwise does not change comments.
1053fn light_rewrite_comment(
1054    orig: &str,
1055    offset: Indent,
1056    config: &Config,
1057    is_doc_comment: bool,
1058) -> String {
1059    orig.lines()
1060        .map(|l| {
1061            // This is basically just l.trim(), but in the case that a line starts
1062            // with `*` we want to leave one space before it, so it aligns with the
1063            // `*` in `/*`.
1064            let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
1065            let left_trimmed = if let Some(fnw) = first_non_whitespace {
1066                if l.as_bytes()[fnw] == b'*' && fnw > 0 {
1067                    &l[fnw - 1..]
1068                } else {
1069                    &l[fnw..]
1070                }
1071            } else {
1072                ""
1073            };
1074            // Preserve markdown's double-space line break syntax in doc comment.
1075            trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
1076        })
1077        .join(&format!("\n{}", offset.to_string(config)))
1078}
1079
1080/// Trims comment characters and possibly a single space from the left of a string.
1081/// Does not trim all whitespace. If a single space is trimmed from the left of the string,
1082/// this function returns true.
1083fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
1084    if line.starts_with("//! ")
1085        || line.starts_with("/// ")
1086        || line.starts_with("/*! ")
1087        || line.starts_with("/** ")
1088    {
1089        (&line[4..], true)
1090    } else if let CommentStyle::Custom(opener) = *style {
1091        if let Some(stripped) = line.strip_prefix(opener) {
1092            (stripped, true)
1093        } else {
1094            (&line[opener.trim_end().len()..], false)
1095        }
1096    } else if line.starts_with("/* ")
1097        || line.starts_with("// ")
1098        || line.starts_with("//!")
1099        || line.starts_with("///")
1100        || line.starts_with("** ")
1101        || line.starts_with("/*!")
1102        || (line.starts_with("/**") && !line.starts_with("/**/"))
1103    {
1104        (&line[3..], line.chars().nth(2).unwrap() == ' ')
1105    } else if line.starts_with("/*")
1106        || line.starts_with("* ")
1107        || line.starts_with("//")
1108        || line.starts_with("**")
1109    {
1110        (&line[2..], line.chars().nth(1).unwrap() == ' ')
1111    } else if let Some(stripped) = line.strip_prefix('*') {
1112        (stripped, false)
1113    } else {
1114        (line, line.starts_with(' '))
1115    }
1116}
1117
1118pub(crate) trait FindUncommented {
1119    fn find_uncommented(&self, pat: &str) -> Option<usize>;
1120    fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
1121}
1122
1123impl FindUncommented for str {
1124    fn find_uncommented(&self, pat: &str) -> Option<usize> {
1125        let mut needle_iter = pat.chars();
1126        for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
1127            match needle_iter.next() {
1128                None => {
1129                    return Some(i - pat.len());
1130                }
1131                Some(c) => match kind {
1132                    FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
1133                    _ => {
1134                        needle_iter = pat.chars();
1135                    }
1136                },
1137            }
1138        }
1139
1140        // Handle case where the pattern is a suffix of the search string
1141        match needle_iter.next() {
1142            Some(_) => None,
1143            None => Some(self.len() - pat.len()),
1144        }
1145    }
1146
1147    fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1148        if let Some(left) = self.find_uncommented(pat) {
1149            let mut result = left;
1150            // add 1 to use find_last_uncommented for &str after pat
1151            while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1152                result += next + 1;
1153            }
1154            Some(result)
1155        } else {
1156            None
1157        }
1158    }
1159}
1160
1161// Returns the first byte position after the first comment. The given string
1162// is expected to be prefixed by a comment, including delimiters.
1163// Good: `/* /* inner */ outer */ code();`
1164// Bad:  `code(); // hello\n world!`
1165pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1166    let mut iter = CharClasses::new(s.char_indices());
1167    for (kind, (i, _c)) in &mut iter {
1168        if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1169            return Some(i);
1170        }
1171    }
1172
1173    // Handle case where the comment ends at the end of `s`.
1174    if iter.status == CharClassesStatus::Normal {
1175        Some(s.len())
1176    } else {
1177        None
1178    }
1179}
1180
1181/// Returns `true` if text contains any comment.
1182pub(crate) fn contains_comment(text: &str) -> bool {
1183    CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1184}
1185
1186pub(crate) struct CharClasses<T>
1187where
1188    T: Iterator,
1189    T::Item: RichChar,
1190{
1191    base: MultiPeek<T>,
1192    status: CharClassesStatus,
1193}
1194
1195pub(crate) trait RichChar {
1196    fn get_char(&self) -> char;
1197}
1198
1199impl RichChar for char {
1200    fn get_char(&self) -> char {
1201        *self
1202    }
1203}
1204
1205impl RichChar for (usize, char) {
1206    fn get_char(&self) -> char {
1207        self.1
1208    }
1209}
1210
1211#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1212enum CharClassesStatus {
1213    Normal,
1214    /// Character is within a string
1215    LitString,
1216    LitStringEscape,
1217    /// Character is within a raw string
1218    LitRawString(u32),
1219    RawStringPrefix(u32),
1220    RawStringSuffix(u32),
1221    LitChar,
1222    LitCharEscape,
1223    /// Character inside a block comment, with the integer indicating the nesting deepness of the
1224    /// comment
1225    BlockComment(u32),
1226    /// Character inside a block-commented string, with the integer indicating the nesting deepness
1227    /// of the comment
1228    StringInBlockComment(u32),
1229    /// Status when the '/' has been consumed, but not yet the '*', deepness is
1230    /// the new deepness (after the comment opening).
1231    BlockCommentOpening(u32),
1232    /// Status when the '*' has been consumed, but not yet the '/', deepness is
1233    /// the new deepness (after the comment closing).
1234    BlockCommentClosing(u32),
1235    /// Character is within a line comment
1236    LineComment,
1237}
1238
1239/// Distinguish between functional part of code and comments
1240#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1241pub(crate) enum CodeCharKind {
1242    Normal,
1243    Comment,
1244}
1245
1246/// Distinguish between functional part of code and comments,
1247/// describing opening and closing of comments for ease when chunking
1248/// code from tagged characters
1249#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1250pub(crate) enum FullCodeCharKind {
1251    Normal,
1252    /// The first character of a comment, there is only one for a comment (always '/')
1253    StartComment,
1254    /// Any character inside a comment including the second character of comment
1255    /// marks ("//", "/*")
1256    InComment,
1257    /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1258    EndComment,
1259    /// Start of a multiline string inside a comment
1260    StartStringCommented,
1261    /// End of a multiline string inside a comment
1262    EndStringCommented,
1263    /// Inside a commented string
1264    InStringCommented,
1265    /// Start of a multiline string
1266    StartString,
1267    /// End of a multiline string
1268    EndString,
1269    /// Inside a string.
1270    InString,
1271}
1272
1273impl FullCodeCharKind {
1274    pub(crate) fn is_comment(self) -> bool {
1275        match self {
1276            FullCodeCharKind::StartComment
1277            | FullCodeCharKind::InComment
1278            | FullCodeCharKind::EndComment
1279            | FullCodeCharKind::StartStringCommented
1280            | FullCodeCharKind::InStringCommented
1281            | FullCodeCharKind::EndStringCommented => true,
1282            _ => false,
1283        }
1284    }
1285
1286    /// Returns true if the character is inside a comment
1287    pub(crate) fn inside_comment(self) -> bool {
1288        match self {
1289            FullCodeCharKind::InComment
1290            | FullCodeCharKind::StartStringCommented
1291            | FullCodeCharKind::InStringCommented
1292            | FullCodeCharKind::EndStringCommented => true,
1293            _ => false,
1294        }
1295    }
1296
1297    pub(crate) fn is_string(self) -> bool {
1298        self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1299    }
1300
1301    /// Returns true if the character is within a commented string
1302    pub(crate) fn is_commented_string(self) -> bool {
1303        self == FullCodeCharKind::InStringCommented
1304            || self == FullCodeCharKind::StartStringCommented
1305    }
1306
1307    fn to_codecharkind(self) -> CodeCharKind {
1308        if self.is_comment() {
1309            CodeCharKind::Comment
1310        } else {
1311            CodeCharKind::Normal
1312        }
1313    }
1314}
1315
1316impl<T> CharClasses<T>
1317where
1318    T: Iterator,
1319    T::Item: RichChar,
1320{
1321    pub(crate) fn new(base: T) -> CharClasses<T> {
1322        CharClasses {
1323            base: multipeek(base),
1324            status: CharClassesStatus::Normal,
1325        }
1326    }
1327}
1328
1329fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1330where
1331    T: Iterator,
1332    T::Item: RichChar,
1333{
1334    for _ in 0..count {
1335        match iter.peek() {
1336            Some(c) if c.get_char() == '#' => continue,
1337            _ => return false,
1338        }
1339    }
1340    true
1341}
1342
1343impl<T> Iterator for CharClasses<T>
1344where
1345    T: Iterator,
1346    T::Item: RichChar,
1347{
1348    type Item = (FullCodeCharKind, T::Item);
1349
1350    fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1351        let item = self.base.next()?;
1352        let chr = item.get_char();
1353        let mut char_kind = FullCodeCharKind::Normal;
1354        self.status = match self.status {
1355            CharClassesStatus::LitRawString(sharps) => {
1356                char_kind = FullCodeCharKind::InString;
1357                match chr {
1358                    '"' => {
1359                        if sharps == 0 {
1360                            char_kind = FullCodeCharKind::Normal;
1361                            CharClassesStatus::Normal
1362                        } else if is_raw_string_suffix(&mut self.base, sharps) {
1363                            CharClassesStatus::RawStringSuffix(sharps)
1364                        } else {
1365                            CharClassesStatus::LitRawString(sharps)
1366                        }
1367                    }
1368                    _ => CharClassesStatus::LitRawString(sharps),
1369                }
1370            }
1371            CharClassesStatus::RawStringPrefix(sharps) => {
1372                char_kind = FullCodeCharKind::InString;
1373                match chr {
1374                    '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1375                    '"' => CharClassesStatus::LitRawString(sharps),
1376                    _ => CharClassesStatus::Normal, // Unreachable.
1377                }
1378            }
1379            CharClassesStatus::RawStringSuffix(sharps) => {
1380                match chr {
1381                    '#' => {
1382                        if sharps == 1 {
1383                            CharClassesStatus::Normal
1384                        } else {
1385                            char_kind = FullCodeCharKind::InString;
1386                            CharClassesStatus::RawStringSuffix(sharps - 1)
1387                        }
1388                    }
1389                    _ => CharClassesStatus::Normal, // Unreachable
1390                }
1391            }
1392            CharClassesStatus::LitString => {
1393                char_kind = FullCodeCharKind::InString;
1394                match chr {
1395                    '"' => CharClassesStatus::Normal,
1396                    '\\' => CharClassesStatus::LitStringEscape,
1397                    _ => CharClassesStatus::LitString,
1398                }
1399            }
1400            CharClassesStatus::LitStringEscape => {
1401                char_kind = FullCodeCharKind::InString;
1402                CharClassesStatus::LitString
1403            }
1404            CharClassesStatus::LitChar => match chr {
1405                '\\' => CharClassesStatus::LitCharEscape,
1406                '\'' => CharClassesStatus::Normal,
1407                _ => CharClassesStatus::LitChar,
1408            },
1409            CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1410            CharClassesStatus::Normal => match chr {
1411                'r' => match self.base.peek().map(RichChar::get_char) {
1412                    Some('#') | Some('"') => {
1413                        char_kind = FullCodeCharKind::InString;
1414                        CharClassesStatus::RawStringPrefix(0)
1415                    }
1416                    _ => CharClassesStatus::Normal,
1417                },
1418                '"' => {
1419                    char_kind = FullCodeCharKind::InString;
1420                    CharClassesStatus::LitString
1421                }
1422                '\'' => {
1423                    // HACK: Work around mut borrow.
1424                    match self.base.peek() {
1425                        Some(next) if next.get_char() == '\\' => {
1426                            self.status = CharClassesStatus::LitChar;
1427                            return Some((char_kind, item));
1428                        }
1429                        _ => (),
1430                    }
1431
1432                    match self.base.peek() {
1433                        Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1434                        _ => CharClassesStatus::Normal,
1435                    }
1436                }
1437                '/' => match self.base.peek() {
1438                    Some(next) if next.get_char() == '*' => {
1439                        self.status = CharClassesStatus::BlockCommentOpening(1);
1440                        return Some((FullCodeCharKind::StartComment, item));
1441                    }
1442                    Some(next) if next.get_char() == '/' => {
1443                        self.status = CharClassesStatus::LineComment;
1444                        return Some((FullCodeCharKind::StartComment, item));
1445                    }
1446                    _ => CharClassesStatus::Normal,
1447                },
1448                _ => CharClassesStatus::Normal,
1449            },
1450            CharClassesStatus::StringInBlockComment(deepness) => {
1451                char_kind = FullCodeCharKind::InStringCommented;
1452                if chr == '"' {
1453                    CharClassesStatus::BlockComment(deepness)
1454                } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1455                    char_kind = FullCodeCharKind::InComment;
1456                    CharClassesStatus::BlockCommentClosing(deepness - 1)
1457                } else {
1458                    CharClassesStatus::StringInBlockComment(deepness)
1459                }
1460            }
1461            CharClassesStatus::BlockComment(deepness) => {
1462                assert_ne!(deepness, 0);
1463                char_kind = FullCodeCharKind::InComment;
1464                match self.base.peek() {
1465                    Some(next) if next.get_char() == '/' && chr == '*' => {
1466                        CharClassesStatus::BlockCommentClosing(deepness - 1)
1467                    }
1468                    Some(next) if next.get_char() == '*' && chr == '/' => {
1469                        CharClassesStatus::BlockCommentOpening(deepness + 1)
1470                    }
1471                    _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1472                    _ => self.status,
1473                }
1474            }
1475            CharClassesStatus::BlockCommentOpening(deepness) => {
1476                assert_eq!(chr, '*');
1477                self.status = CharClassesStatus::BlockComment(deepness);
1478                return Some((FullCodeCharKind::InComment, item));
1479            }
1480            CharClassesStatus::BlockCommentClosing(deepness) => {
1481                assert_eq!(chr, '/');
1482                if deepness == 0 {
1483                    self.status = CharClassesStatus::Normal;
1484                    return Some((FullCodeCharKind::EndComment, item));
1485                } else {
1486                    self.status = CharClassesStatus::BlockComment(deepness);
1487                    return Some((FullCodeCharKind::InComment, item));
1488                }
1489            }
1490            CharClassesStatus::LineComment => match chr {
1491                '\n' => {
1492                    self.status = CharClassesStatus::Normal;
1493                    return Some((FullCodeCharKind::EndComment, item));
1494                }
1495                _ => {
1496                    self.status = CharClassesStatus::LineComment;
1497                    return Some((FullCodeCharKind::InComment, item));
1498                }
1499            },
1500        };
1501        Some((char_kind, item))
1502    }
1503}
1504
1505/// An iterator over the lines of a string, paired with the char kind at the
1506/// end of the line.
1507pub(crate) struct LineClasses<'a> {
1508    base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1509    kind: FullCodeCharKind,
1510}
1511
1512impl<'a> LineClasses<'a> {
1513    pub(crate) fn new(s: &'a str) -> Self {
1514        LineClasses {
1515            base: CharClasses::new(s.chars()).peekable(),
1516            kind: FullCodeCharKind::Normal,
1517        }
1518    }
1519}
1520
1521impl<'a> Iterator for LineClasses<'a> {
1522    type Item = (FullCodeCharKind, String);
1523
1524    fn next(&mut self) -> Option<Self::Item> {
1525        self.base.peek()?;
1526
1527        let mut line = String::new();
1528
1529        let start_kind = match self.base.peek() {
1530            Some((kind, _)) => *kind,
1531            None => unreachable!(),
1532        };
1533
1534        for (kind, c) in self.base.by_ref() {
1535            // needed to set the kind of the ending character on the last line
1536            self.kind = kind;
1537            if c == '\n' {
1538                self.kind = match (start_kind, kind) {
1539                    (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1540                        FullCodeCharKind::StartString
1541                    }
1542                    (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1543                        FullCodeCharKind::EndString
1544                    }
1545                    (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1546                        FullCodeCharKind::StartStringCommented
1547                    }
1548                    (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1549                        FullCodeCharKind::EndStringCommented
1550                    }
1551                    _ => kind,
1552                };
1553                break;
1554            }
1555            line.push(c);
1556        }
1557
1558        // Workaround for CRLF newline.
1559        if line.ends_with('\r') {
1560            line.pop();
1561        }
1562
1563        Some((self.kind, line))
1564    }
1565}
1566
1567/// Iterator over functional and commented parts of a string. Any part of a string is either
1568/// functional code, either *one* block comment, either *one* line comment. Whitespace between
1569/// comments is functional code. Line comments contain their ending newlines.
1570struct UngroupedCommentCodeSlices<'a> {
1571    slice: &'a str,
1572    iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1573}
1574
1575impl<'a> UngroupedCommentCodeSlices<'a> {
1576    fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1577        UngroupedCommentCodeSlices {
1578            slice: code,
1579            iter: CharClasses::new(code.char_indices()).peekable(),
1580        }
1581    }
1582}
1583
1584impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1585    type Item = (CodeCharKind, usize, &'a str);
1586
1587    fn next(&mut self) -> Option<Self::Item> {
1588        let (kind, (start_idx, _)) = self.iter.next()?;
1589        match kind {
1590            FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1591                // Consume all the Normal code
1592                while let Some(&(char_kind, _)) = self.iter.peek() {
1593                    if char_kind.is_comment() {
1594                        break;
1595                    }
1596                    let _ = self.iter.next();
1597                }
1598            }
1599            FullCodeCharKind::StartComment => {
1600                // Consume the whole comment
1601                loop {
1602                    match self.iter.next() {
1603                        Some((kind, ..)) if kind.inside_comment() => continue,
1604                        _ => break,
1605                    }
1606                }
1607            }
1608            _ => panic!(),
1609        }
1610        let slice = match self.iter.peek() {
1611            Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1612            None => &self.slice[start_idx..],
1613        };
1614        Some((
1615            if kind.is_comment() {
1616                CodeCharKind::Comment
1617            } else {
1618                CodeCharKind::Normal
1619            },
1620            start_idx,
1621            slice,
1622        ))
1623    }
1624}
1625
1626/// Iterator over an alternating sequence of functional and commented parts of
1627/// a string. The first item is always a, possibly zero length, subslice of
1628/// functional text. Line style comments contain their ending newlines.
1629pub(crate) struct CommentCodeSlices<'a> {
1630    slice: &'a str,
1631    last_slice_kind: CodeCharKind,
1632    last_slice_end: usize,
1633}
1634
1635impl<'a> CommentCodeSlices<'a> {
1636    pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1637        CommentCodeSlices {
1638            slice,
1639            last_slice_kind: CodeCharKind::Comment,
1640            last_slice_end: 0,
1641        }
1642    }
1643}
1644
1645impl<'a> Iterator for CommentCodeSlices<'a> {
1646    type Item = (CodeCharKind, usize, &'a str);
1647
1648    fn next(&mut self) -> Option<Self::Item> {
1649        if self.last_slice_end == self.slice.len() {
1650            return None;
1651        }
1652
1653        let mut sub_slice_end = self.last_slice_end;
1654        let mut first_whitespace = None;
1655        let subslice = &self.slice[self.last_slice_end..];
1656        let mut iter = CharClasses::new(subslice.char_indices());
1657
1658        for (kind, (i, c)) in &mut iter {
1659            let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1660                && &subslice[..2] == "//"
1661                && [' ', '\t'].contains(&c);
1662
1663            if is_comment_connector && first_whitespace.is_none() {
1664                first_whitespace = Some(i);
1665            }
1666
1667            if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1668                let last_index = match first_whitespace {
1669                    Some(j) => j,
1670                    None => i,
1671                };
1672                sub_slice_end = self.last_slice_end + last_index;
1673                break;
1674            }
1675
1676            if !is_comment_connector {
1677                first_whitespace = None;
1678            }
1679        }
1680
1681        if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1682            // This was the last subslice.
1683            sub_slice_end = match first_whitespace {
1684                Some(i) => self.last_slice_end + i,
1685                None => self.slice.len(),
1686            };
1687        }
1688
1689        let kind = match self.last_slice_kind {
1690            CodeCharKind::Comment => CodeCharKind::Normal,
1691            CodeCharKind::Normal => CodeCharKind::Comment,
1692        };
1693        let res = (
1694            kind,
1695            self.last_slice_end,
1696            &self.slice[self.last_slice_end..sub_slice_end],
1697        );
1698        self.last_slice_end = sub_slice_end;
1699        self.last_slice_kind = kind;
1700
1701        Some(res)
1702    }
1703}
1704
1705/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1706pub(crate) fn recover_comment_removed(
1707    new: String,
1708    span: Span,
1709    context: &RewriteContext<'_>,
1710) -> String {
1711    let snippet = context.snippet(span);
1712    if snippet != new && changed_comment_content(snippet, &new) {
1713        // We missed some comments. Warn and keep the original text.
1714        if context.config.error_on_unformatted() {
1715            context.report.append(
1716                context.psess.span_to_filename(span),
1717                vec![FormattingError::from_span(
1718                    span,
1719                    context.psess,
1720                    ErrorKind::LostComment,
1721                )],
1722            );
1723        }
1724        snippet.to_owned()
1725    } else {
1726        new
1727    }
1728}
1729
1730pub(crate) fn filter_normal_code(code: &str) -> String {
1731    let mut buffer = String::with_capacity(code.len());
1732    LineClasses::new(code).for_each(|(kind, line)| match kind {
1733        FullCodeCharKind::Normal
1734        | FullCodeCharKind::StartString
1735        | FullCodeCharKind::InString
1736        | FullCodeCharKind::EndString => {
1737            buffer.push_str(&line);
1738            buffer.push('\n');
1739        }
1740        _ => (),
1741    });
1742    if !code.ends_with('\n') && buffer.ends_with('\n') {
1743        buffer.pop();
1744    }
1745    buffer
1746}
1747
1748/// Returns `true` if the two strings of code have the same payload of comments.
1749/// The payload of comments is everything in the string except:
1750/// - actual code (not comments),
1751/// - comment start/end marks,
1752/// - whitespace,
1753/// - '*' at the beginning of lines in block comments.
1754fn changed_comment_content(orig: &str, new: &str) -> bool {
1755    // Cannot write this as a fn since we cannot return types containing closures.
1756    let code_comment_content = |code| {
1757        let slices = UngroupedCommentCodeSlices::new(code);
1758        slices
1759            .filter(|(kind, _, _)| *kind == CodeCharKind::Comment)
1760            .flat_map(|(_, _, s)| CommentReducer::new(s))
1761    };
1762    let res = code_comment_content(orig).ne(code_comment_content(new));
1763    debug!(
1764        "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1765        res,
1766        orig,
1767        new,
1768        code_comment_content(orig).collect::<String>(),
1769        code_comment_content(new).collect::<String>()
1770    );
1771    res
1772}
1773
1774/// Iterator over the 'payload' characters of a comment.
1775/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1776/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1777/// for example).
1778struct CommentReducer<'a> {
1779    is_block: bool,
1780    at_start_line: bool,
1781    iter: std::str::Chars<'a>,
1782}
1783
1784impl<'a> CommentReducer<'a> {
1785    fn new(comment: &'a str) -> CommentReducer<'a> {
1786        let is_block = comment.starts_with("/*");
1787        let comment = remove_comment_header(comment);
1788        CommentReducer {
1789            is_block,
1790            // There are no supplementary '*' on the first line.
1791            at_start_line: false,
1792            iter: comment.chars(),
1793        }
1794    }
1795}
1796
1797impl<'a> Iterator for CommentReducer<'a> {
1798    type Item = char;
1799
1800    fn next(&mut self) -> Option<Self::Item> {
1801        loop {
1802            let mut c = self.iter.next()?;
1803            if self.is_block && self.at_start_line {
1804                while c.is_whitespace() {
1805                    c = self.iter.next()?;
1806                }
1807                // Ignore leading '*'.
1808                if c == '*' {
1809                    c = self.iter.next()?;
1810                }
1811            } else if c == '\n' {
1812                self.at_start_line = true;
1813            }
1814            if !c.is_whitespace() {
1815                return Some(c);
1816            }
1817        }
1818    }
1819}
1820
1821fn remove_comment_header(comment: &str) -> &str {
1822    if comment.starts_with("///") || comment.starts_with("//!") {
1823        &comment[3..]
1824    } else if let Some(stripped) = comment.strip_prefix("//") {
1825        stripped
1826    } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1827        || comment.starts_with("/*!")
1828    {
1829        &comment[3..comment.len() - 2]
1830    } else {
1831        assert!(
1832            comment.starts_with("/*"),
1833            "string '{comment}' is not a comment"
1834        );
1835        &comment[2..comment.len() - 2]
1836    }
1837}
1838
1839#[cfg(test)]
1840mod test {
1841    use super::*;
1842
1843    #[test]
1844    fn char_classes() {
1845        let mut iter = CharClasses::new("//\n\n".chars());
1846
1847        assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1848        assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1849        assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1850        assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1851        assert_eq!(None, iter.next());
1852    }
1853
1854    #[test]
1855    fn comment_code_slices() {
1856        let input = "code(); /* test */ 1 + 1";
1857        let mut iter = CommentCodeSlices::new(input);
1858
1859        assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1860        assert_eq!(
1861            (CodeCharKind::Comment, 8, "/* test */"),
1862            iter.next().unwrap()
1863        );
1864        assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1865        assert_eq!(None, iter.next());
1866    }
1867
1868    #[test]
1869    fn comment_code_slices_two() {
1870        let input = "// comment\n    test();";
1871        let mut iter = CommentCodeSlices::new(input);
1872
1873        assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1874        assert_eq!(
1875            (CodeCharKind::Comment, 0, "// comment\n"),
1876            iter.next().unwrap()
1877        );
1878        assert_eq!(
1879            (CodeCharKind::Normal, 11, "    test();"),
1880            iter.next().unwrap()
1881        );
1882        assert_eq!(None, iter.next());
1883    }
1884
1885    #[test]
1886    fn comment_code_slices_three() {
1887        let input = "1 // comment\n    // comment2\n\n";
1888        let mut iter = CommentCodeSlices::new(input);
1889
1890        assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1891        assert_eq!(
1892            (CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
1893            iter.next().unwrap()
1894        );
1895        assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1896        assert_eq!(None, iter.next());
1897    }
1898
1899    #[test]
1900    #[rustfmt::skip]
1901    fn format_doc_comments() {
1902        let mut wrap_normalize_config: crate::config::Config = Default::default();
1903        wrap_normalize_config.set().wrap_comments(true);
1904        wrap_normalize_config.set().normalize_comments(true);
1905
1906        let mut wrap_config: crate::config::Config = Default::default();
1907        wrap_config.set().wrap_comments(true);
1908
1909        let comment = rewrite_comment(" //test",
1910                                      true,
1911                                      Shape::legacy(100, Indent::new(0, 100)),
1912                                      &wrap_normalize_config).unwrap();
1913        assert_eq!("/* test */", comment);
1914
1915        let comment = rewrite_comment("// comment on a",
1916                                      false,
1917                                      Shape::legacy(10, Indent::empty()),
1918                                      &wrap_normalize_config).unwrap();
1919        assert_eq!("// comment\n// on a", comment);
1920
1921        let comment = rewrite_comment("//  A multi line comment\n             // between args.",
1922                                      false,
1923                                      Shape::legacy(60, Indent::new(0, 12)),
1924                                      &wrap_normalize_config).unwrap();
1925        assert_eq!("//  A multi line comment\n            // between args.", comment);
1926
1927        let input = "// comment";
1928        let expected =
1929            "/* comment */";
1930        let comment = rewrite_comment(input,
1931                                      true,
1932                                      Shape::legacy(9, Indent::new(0, 69)),
1933                                      &wrap_normalize_config).unwrap();
1934        assert_eq!(expected, comment);
1935
1936        let comment = rewrite_comment("/*   trimmed    */",
1937                                      true,
1938                                      Shape::legacy(100, Indent::new(0, 100)),
1939                                      &wrap_normalize_config).unwrap();
1940        assert_eq!("/* trimmed */", comment);
1941
1942        // Check that different comment style are properly recognised.
1943        let comment = rewrite_comment(r#"/// test1
1944                                         /// test2
1945                                         /*
1946                                          * test3
1947                                          */"#,
1948                                      false,
1949                                      Shape::legacy(100, Indent::new(0, 0)),
1950                                      &wrap_normalize_config).unwrap();
1951        assert_eq!("/// test1\n/// test2\n// test3", comment);
1952
1953        // Check that the blank line marks the end of a commented paragraph.
1954        let comment = rewrite_comment(r#"// test1
1955
1956                                         // test2"#,
1957                                      false,
1958                                      Shape::legacy(100, Indent::new(0, 0)),
1959                                      &wrap_normalize_config).unwrap();
1960        assert_eq!("// test1\n\n// test2", comment);
1961
1962        // Check that the blank line marks the end of a custom-commented paragraph.
1963        let comment = rewrite_comment(r#"//@ test1
1964
1965                                         //@ test2"#,
1966                                      false,
1967                                      Shape::legacy(100, Indent::new(0, 0)),
1968                                      &wrap_normalize_config).unwrap();
1969        assert_eq!("//@ test1\n\n//@ test2", comment);
1970
1971        // Check that bare lines are just indented but otherwise left unchanged.
1972        let comment = rewrite_comment(r#"// test1
1973                                         /*
1974                                           a bare line!
1975
1976                                                another bare line!
1977                                          */"#,
1978                                      false,
1979                                      Shape::legacy(100, Indent::new(0, 0)),
1980                                      &wrap_config).unwrap();
1981        assert_eq!("// test1\n/*\n a bare line!\n\n      another bare line!\n*/", comment);
1982    }
1983
1984    // This is probably intended to be a non-test fn, but it is not used.
1985    // We should keep this around unless it helps us test stuff to remove it.
1986    fn uncommented(text: &str) -> String {
1987        CharClasses::new(text.chars())
1988            .filter_map(|(s, c)| match s {
1989                FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1990                _ => None,
1991            })
1992            .collect()
1993    }
1994
1995    #[test]
1996    fn test_uncommented() {
1997        assert_eq!(&uncommented("abc/*...*/"), "abc");
1998        assert_eq!(
1999            &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
2000            "..ac\n"
2001        );
2002        assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
2003    }
2004
2005    #[test]
2006    fn test_contains_comment() {
2007        assert_eq!(contains_comment("abc"), false);
2008        assert_eq!(contains_comment("abc // qsdf"), true);
2009        assert_eq!(contains_comment("abc /* kqsdf"), true);
2010        assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
2011    }
2012
2013    #[test]
2014    fn test_find_uncommented() {
2015        fn check(haystack: &str, needle: &str, expected: Option<usize>) {
2016            assert_eq!(expected, haystack.find_uncommented(needle));
2017        }
2018
2019        check("/*/ */test", "test", Some(6));
2020        check("//test\ntest", "test", Some(7));
2021        check("/* comment only */", "whatever", None);
2022        check(
2023            "/* comment */ some text /* more commentary */ result",
2024            "result",
2025            Some(46),
2026        );
2027        check("sup // sup", "p", Some(2));
2028        check("sup", "x", None);
2029        check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
2030        check("/*sup yo? \n sup*/ sup", "p", Some(20));
2031        check("hel/*lohello*/lo", "hello", None);
2032        check("acb", "ab", None);
2033        check(",/*A*/ ", ",", Some(0));
2034        check("abc", "abc", Some(0));
2035        check("/* abc */", "abc", None);
2036        check("/**/abc/* */", "abc", Some(4));
2037        check("\"/* abc */\"", "abc", Some(4));
2038        check("\"/* abc", "abc", Some(4));
2039    }
2040
2041    #[test]
2042    fn test_filter_normal_code() {
2043        let s = r#"
2044fn main() {
2045    println!("hello, world");
2046}
2047"#;
2048        assert_eq!(s, filter_normal_code(s));
2049        let s_with_comment = r#"
2050fn main() {
2051    // hello, world
2052    println!("hello, world");
2053}
2054"#;
2055        assert_eq!(s, filter_normal_code(s_with_comment));
2056    }
2057
2058    #[test]
2059    fn test_itemized_block_first_line_handling() {
2060        fn run_test(
2061            test_input: &str,
2062            expected_line: &str,
2063            expected_indent: usize,
2064            expected_opener: &str,
2065            expected_line_start: &str,
2066        ) {
2067            let block = ItemizedBlock::new(test_input).unwrap();
2068            assert_eq!(1, block.lines.len(), "test_input: {test_input:?}");
2069            assert_eq!(expected_line, &block.lines[0], "test_input: {test_input:?}");
2070            assert_eq!(expected_indent, block.indent, "test_input: {test_input:?}");
2071            assert_eq!(expected_opener, &block.opener, "test_input: {test_input:?}");
2072            assert_eq!(
2073                expected_line_start, &block.line_start,
2074                "test_input: {test_input:?}"
2075            );
2076        }
2077
2078        run_test("- foo", "foo", 2, "- ", "  ");
2079        run_test("* foo", "foo", 2, "* ", "  ");
2080        run_test("> foo", "foo", 2, "> ", "> ");
2081
2082        run_test("1. foo", "foo", 3, "1. ", "   ");
2083        run_test("12. foo", "foo", 4, "12. ", "    ");
2084        run_test("1) foo", "foo", 3, "1) ", "   ");
2085        run_test("12) foo", "foo", 4, "12) ", "    ");
2086
2087        run_test("    - foo", "foo", 6, "    - ", "      ");
2088
2089        // https://spec.commonmark.org/0.30 says: "A start number may begin with 0s":
2090        run_test("0. foo", "foo", 3, "0. ", "   ");
2091        run_test("01. foo", "foo", 4, "01. ", "    ");
2092    }
2093
2094    #[test]
2095    fn test_itemized_block_nonobvious_markers_are_rejected() {
2096        let test_inputs = vec![
2097            // Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
2098            // https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
2099            // them would risk misidentifying regular words as item markers. See also the
2100            // discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2101            "word.  rest of the paragraph.",
2102            "a.  maybe this is a list item?  maybe not?",
2103            "iv.  maybe this is a list item?  maybe not?",
2104            // Numbers with 3 or more digits are not recognized as item markers, to avoid
2105            // formatting the following example as a list:
2106            //
2107            // ```
2108            // The Captain died in
2109            // 1868. He was buried in...
2110            // ```
2111            "123.  only 2-digit numbers are recognized as item markers.",
2112            // Parens:
2113            "123)  giving some coverage to parens as well.",
2114            "a)  giving some coverage to parens as well.",
2115            // https://spec.commonmark.org/0.30 says that "at least one space or tab is needed
2116            // between the list marker and any following content":
2117            "1.Not a list item.",
2118            "1.2.3. Not a list item.",
2119            "1)Not a list item.",
2120            "-Not a list item.",
2121            "+Not a list item.",
2122            "+1 not a list item.",
2123            // https://spec.commonmark.org/0.30 says: "A start number may not be negative":
2124            "-1. Not a list item.",
2125            "-1 Not a list item.",
2126            // Marker without prefix are not recognized as item markers:
2127            ".   Not a list item.",
2128            ")   Not a list item.",
2129        ];
2130        for line in test_inputs.iter() {
2131            let maybe_block = ItemizedBlock::new(line);
2132            assert!(
2133                maybe_block.is_none(),
2134                "The following line shouldn't be classified as a list item: {line}"
2135            );
2136        }
2137    }
2138}