rustfmt_nightly/
comment.rs

1// Formatting and tools for comments.
2
3use std::{borrow::Cow, iter};
4
5use itertools::{Itertools as _, MultiPeek, multipeek};
6use rustc_span::Span;
7use tracing::{debug, trace};
8
9use crate::config::Config;
10use crate::rewrite::{RewriteContext, RewriteErrorExt, RewriteResult};
11use crate::shape::{Indent, Shape};
12use crate::string::{StringFormat, rewrite_string};
13use crate::utils::{
14    count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
15    trimmed_last_line_width, unicode_str_width,
16};
17use crate::{ErrorKind, FormattingError};
18
19fn is_custom_comment(comment: &str) -> bool {
20    if !comment.starts_with("//") {
21        false
22    } else if let Some(c) = comment.chars().nth(2) {
23        !c.is_alphanumeric() && !c.is_whitespace()
24    } else {
25        false
26    }
27}
28
29#[derive(Copy, Clone, PartialEq, Eq)]
30pub(crate) enum CommentStyle<'a> {
31    DoubleSlash,
32    TripleSlash,
33    Doc,
34    SingleBullet,
35    DoubleBullet,
36    Exclamation,
37    Custom(&'a str),
38}
39
40fn custom_opener(s: &str) -> &str {
41    s.lines().next().map_or("", |first_line| {
42        first_line
43            .find(' ')
44            .map_or(first_line, |space_index| &first_line[0..=space_index])
45    })
46}
47
48impl<'a> CommentStyle<'a> {
49    /// Returns `true` if the commenting style cannot span multiple lines.
50    pub(crate) fn is_line_comment(&self) -> bool {
51        matches!(
52            self,
53            CommentStyle::DoubleSlash
54                | CommentStyle::TripleSlash
55                | CommentStyle::Doc
56                | CommentStyle::Custom(_)
57        )
58    }
59
60    /// Returns `true` if the commenting style can span multiple lines.
61    pub(crate) fn is_block_comment(&self) -> bool {
62        matches!(
63            self,
64            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation
65        )
66    }
67
68    /// Returns `true` if the commenting style is for documentation.
69    pub(crate) fn is_doc_comment(&self) -> bool {
70        matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
71    }
72
73    pub(crate) fn opener(&self) -> &'a str {
74        match *self {
75            CommentStyle::DoubleSlash => "// ",
76            CommentStyle::TripleSlash => "/// ",
77            CommentStyle::Doc => "//! ",
78            CommentStyle::SingleBullet => "/* ",
79            CommentStyle::DoubleBullet => "/** ",
80            CommentStyle::Exclamation => "/*! ",
81            CommentStyle::Custom(opener) => opener,
82        }
83    }
84
85    pub(crate) fn closer(&self) -> &'a str {
86        match *self {
87            CommentStyle::DoubleSlash
88            | CommentStyle::TripleSlash
89            | CommentStyle::Custom(..)
90            | CommentStyle::Doc => "",
91            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
92                " */"
93            }
94        }
95    }
96
97    pub(crate) fn line_start(&self) -> &'a str {
98        match *self {
99            CommentStyle::DoubleSlash => "// ",
100            CommentStyle::TripleSlash => "/// ",
101            CommentStyle::Doc => "//! ",
102            CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
103                " * "
104            }
105            CommentStyle::Custom(opener) => opener,
106        }
107    }
108
109    pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
110        (self.opener(), self.closer(), self.line_start())
111    }
112}
113
114pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
115    if !normalize_comments {
116        if orig.starts_with("/**") && !orig.starts_with("/**/") {
117            CommentStyle::DoubleBullet
118        } else if orig.starts_with("/*!") {
119            CommentStyle::Exclamation
120        } else if orig.starts_with("/*") {
121            CommentStyle::SingleBullet
122        } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
123            CommentStyle::TripleSlash
124        } else if orig.starts_with("//!") {
125            CommentStyle::Doc
126        } else if is_custom_comment(orig) {
127            CommentStyle::Custom(custom_opener(orig))
128        } else {
129            CommentStyle::DoubleSlash
130        }
131    } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
132        || (orig.starts_with("/**") && !orig.starts_with("/**/"))
133    {
134        CommentStyle::TripleSlash
135    } else if orig.starts_with("//!") || orig.starts_with("/*!") {
136        CommentStyle::Doc
137    } else if is_custom_comment(orig) {
138        CommentStyle::Custom(custom_opener(orig))
139    } else {
140        CommentStyle::DoubleSlash
141    }
142}
143
144/// Returns true if the last line of the passed string finishes with a block-comment.
145pub(crate) fn is_last_comment_block(s: &str) -> bool {
146    s.trim_end().ends_with("*/")
147}
148
149/// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
150/// comments between two strings. If there are such comments, then that will be
151/// recovered. If `allow_extend` is true and there is no comment between the two
152/// strings, then they will be put on a single line as long as doing so does not
153/// exceed max width.
154pub(crate) fn combine_strs_with_missing_comments(
155    context: &RewriteContext<'_>,
156    prev_str: &str,
157    next_str: &str,
158    span: Span,
159    shape: Shape,
160    allow_extend: bool,
161) -> RewriteResult {
162    trace!(
163        "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
164        prev_str, next_str, span, shape
165    );
166
167    let mut result =
168        String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
169    result.push_str(prev_str);
170    let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
171    let first_sep =
172        if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
173            ""
174        } else {
175            " "
176        };
177    let mut one_line_width =
178        last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
179
180    let config = context.config;
181    let indent = shape.indent;
182    let missing_comment = rewrite_missing_comment(span, shape, context)?;
183
184    if missing_comment.is_empty() {
185        if allow_extend && one_line_width <= shape.width {
186            result.push_str(first_sep);
187        } else if !prev_str.is_empty() {
188            result.push_str(&indent.to_string_with_newline(config))
189        }
190        result.push_str(next_str);
191        return Ok(result);
192    }
193
194    // We have a missing comment between the first expression and the second expression.
195
196    // Peek the original source code and find out whether there is a newline between the first
197    // expression and the second expression or the missing comment. We will preserve the original
198    // layout whenever possible.
199    let original_snippet = context.snippet(span);
200    let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
201        !original_snippet[..pos].contains('\n')
202    } else {
203        !original_snippet.contains('\n')
204    };
205
206    one_line_width -= first_sep.len();
207    let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
208        Cow::from("")
209    } else {
210        let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
211        if prefer_same_line && one_line_width <= shape.width {
212            Cow::from(" ")
213        } else {
214            indent.to_string_with_newline(config)
215        }
216    };
217    result.push_str(&first_sep);
218    result.push_str(&missing_comment);
219
220    let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
221        Cow::from("")
222    } else if missing_comment.starts_with("//") {
223        indent.to_string_with_newline(config)
224    } else {
225        one_line_width += missing_comment.len() + first_sep.len() + 1;
226        allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
227        if prefer_same_line && allow_one_line && one_line_width <= shape.width {
228            Cow::from(" ")
229        } else {
230            indent.to_string_with_newline(config)
231        }
232    };
233    result.push_str(&second_sep);
234    result.push_str(next_str);
235
236    Ok(result)
237}
238
239pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> RewriteResult {
240    identify_comment(orig, false, shape, config, true)
241}
242
243pub(crate) fn rewrite_comment(
244    orig: &str,
245    block_style: bool,
246    shape: Shape,
247    config: &Config,
248) -> RewriteResult {
249    identify_comment(orig, block_style, shape, config, false)
250}
251
252fn identify_comment(
253    orig: &str,
254    block_style: bool,
255    shape: Shape,
256    config: &Config,
257    is_doc_comment: bool,
258) -> RewriteResult {
259    let style = comment_style(orig, false);
260
261    // Computes the byte length of line taking into account a newline if the line is part of a
262    // paragraph.
263    fn compute_len(orig: &str, line: &str) -> usize {
264        if orig.len() > line.len() {
265            if orig.as_bytes()[line.len()] == b'\r' {
266                line.len() + 2
267            } else {
268                line.len() + 1
269            }
270        } else {
271            line.len()
272        }
273    }
274
275    // Get the first group of line comments having the same commenting style.
276    //
277    // Returns a tuple with:
278    // - a boolean indicating if there is a blank line
279    // - a number indicating the size of the first group of comments
280    fn consume_same_line_comments(
281        style: CommentStyle<'_>,
282        orig: &str,
283        line_start: &str,
284    ) -> (bool, usize) {
285        let mut first_group_ending = 0;
286        let mut hbl = false;
287
288        for line in orig.lines() {
289            let trimmed_line = line.trim_start();
290            if trimmed_line.is_empty() {
291                hbl = true;
292                break;
293            } else if trimmed_line.starts_with(line_start)
294                || comment_style(trimmed_line, false) == style
295            {
296                first_group_ending += compute_len(&orig[first_group_ending..], line);
297            } else {
298                break;
299            }
300        }
301        (hbl, first_group_ending)
302    }
303
304    let (has_bare_lines, first_group_ending) = match style {
305        CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
306            let line_start = style.line_start().trim_start();
307            consume_same_line_comments(style, orig, line_start)
308        }
309        CommentStyle::Custom(opener) => {
310            let trimmed_opener = opener.trim_end();
311            consume_same_line_comments(style, orig, trimmed_opener)
312        }
313        // for a block comment, search for the closing symbol
314        CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
315            let closer = style.closer().trim_start();
316            let mut count = orig.matches(closer).count();
317            let mut closing_symbol_offset = 0;
318            let mut hbl = false;
319            let mut first = true;
320            for line in orig.lines() {
321                closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
322                let mut trimmed_line = line.trim_start();
323                if !trimmed_line.starts_with('*')
324                    && !trimmed_line.starts_with("//")
325                    && !trimmed_line.starts_with("/*")
326                {
327                    hbl = true;
328                }
329
330                // Remove opener from consideration when searching for closer
331                if first {
332                    let opener = style.opener().trim_end();
333                    trimmed_line = &trimmed_line[opener.len()..];
334                    first = false;
335                }
336                if trimmed_line.ends_with(closer) {
337                    count -= 1;
338                    if count == 0 {
339                        break;
340                    }
341                }
342            }
343            (hbl, closing_symbol_offset)
344        }
345    };
346
347    let (first_group, rest) = orig.split_at(first_group_ending);
348    let rewritten_first_group =
349        if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
350            trim_left_preserve_layout(first_group, shape.indent, config).unknown_error()?
351        } else if !config.normalize_comments()
352            && !config.wrap_comments()
353            && !(
354                // `format_code_in_doc_comments` should only take effect on doc comments,
355                // so we only consider it when this comment block is a doc comment block.
356                is_doc_comment && config.format_code_in_doc_comments()
357            )
358        {
359            light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
360        } else {
361            rewrite_comment_inner(
362                first_group,
363                block_style,
364                style,
365                shape,
366                config,
367                is_doc_comment || style.is_doc_comment(),
368            )?
369        };
370    if rest.is_empty() {
371        Ok(rewritten_first_group)
372    } else {
373        identify_comment(
374            rest.trim_start(),
375            block_style,
376            shape,
377            config,
378            is_doc_comment,
379        )
380        .map(|rest_str| {
381            format!(
382                "{}\n{}{}{}",
383                rewritten_first_group,
384                // insert back the blank line
385                if has_bare_lines && style.is_line_comment() {
386                    "\n"
387                } else {
388                    ""
389                },
390                shape.indent.to_string(config),
391                rest_str
392            )
393        })
394    }
395}
396
397/// Enum indicating if the code block contains rust based on attributes
398enum CodeBlockAttribute {
399    Rust,
400    NotRust,
401}
402
403impl CodeBlockAttribute {
404    /// Parse comma separated attributes list. Return rust only if all
405    /// attributes are valid rust attributes
406    /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
407    fn new(attributes: &str) -> CodeBlockAttribute {
408        for attribute in attributes.split(',') {
409            match attribute.trim() {
410                "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
411                | "edition2021" => (),
412                "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
413                _ => return CodeBlockAttribute::NotRust,
414            }
415        }
416        CodeBlockAttribute::Rust
417    }
418}
419
420/// Block that is formatted as an item.
421///
422/// An item starts with either a star `*`, a dash `-`, a greater-than `>`, a plus '+', or a number
423/// `12.` or `34)` (with at most 2 digits). An item represents CommonMark's ["list
424/// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
425/// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
426/// CommonMark is recognized - see the doc comment of [`ItemizedBlock::get_marker_length`] for more
427/// details.
428///
429/// Different level of indentation are handled by shrinking the shape accordingly.
430struct ItemizedBlock {
431    /// the lines that are identified as part of an itemized block
432    lines: Vec<String>,
433    /// the number of characters (typically whitespaces) up to the item marker
434    indent: usize,
435    /// the string that marks the start of an item
436    opener: String,
437    /// sequence of characters (typically whitespaces) to prefix new lines that are part of the item
438    line_start: String,
439}
440
441impl ItemizedBlock {
442    /// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
443    /// marker. Returns the length of the marker (in bytes) if one is present. Note that the length
444    /// includes the whitespace that follows the marker, for example the marker in `"* list item"`
445    /// has the length of 2.
446    ///
447    /// This function recognizes item markers that correspond to CommonMark's
448    /// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
449    /// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
450    /// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
451    ///
452    /// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
453    /// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
454    /// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
455    /// arbitrary numbers as markers. See also
456    /// <https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990> which gives the
457    /// following example where a number (i.e. "1868") doesn't signify an ordered list:
458    /// ```md
459    /// The Captain died in
460    /// 1868. He wes buried in...
461    /// ```
462    fn get_marker_length(trimmed: &str) -> Option<usize> {
463        // https://spec.commonmark.org/0.30/#bullet-list-marker or
464        // https://spec.commonmark.org/0.30/#block-quote-marker
465        let itemized_start = ["* ", "- ", "> ", "+ "];
466        if itemized_start.iter().any(|s| trimmed.starts_with(s)) {
467            return Some(2); // All items in `itemized_start` have length 2.
468        }
469
470        // https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
471        // allowed.
472        for suffix in [". ", ") "] {
473            if let Some((prefix, _)) = trimmed.split_once(suffix) {
474                let has_leading_digits = (1..=2).contains(&prefix.len())
475                    && prefix.chars().all(|c| char::is_ascii_digit(&c));
476                if has_leading_digits {
477                    return Some(prefix.len() + suffix.len());
478                }
479            }
480        }
481
482        None // No markers found.
483    }
484
485    /// Creates a new `ItemizedBlock` described with the given `line`.
486    /// Returns `None` if `line` doesn't start an item.
487    fn new(line: &str) -> Option<ItemizedBlock> {
488        let marker_length = ItemizedBlock::get_marker_length(line.trim_start())?;
489        let space_to_marker = line.chars().take_while(|c| c.is_whitespace()).count();
490        let mut indent = space_to_marker + marker_length;
491        let mut line_start = " ".repeat(indent);
492
493        // Markdown blockquote start with a "> "
494        if line.trim_start().starts_with('>') {
495            // remove the original +2 indent because there might be multiple nested block quotes
496            // and it's easier to reason about the final indent by just taking the length
497            // of the new line_start. We update the indent because it effects the max width
498            // of each formatted line.
499            line_start = itemized_block_quote_start(line, line_start, 2);
500            indent = line_start.len();
501        }
502        Some(ItemizedBlock {
503            lines: vec![line[indent..].to_string()],
504            indent,
505            opener: line[..indent].to_string(),
506            line_start,
507        })
508    }
509
510    /// Returns a `StringFormat` used for formatting the content of an item.
511    fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
512        StringFormat {
513            opener: "",
514            closer: "",
515            line_start: "",
516            line_end: "",
517            shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
518            trim_end: true,
519            config: fmt.config,
520        }
521    }
522
523    /// Returns `true` if the line is part of the current itemized block.
524    /// If it is, then it is added to the internal lines list.
525    fn add_line(&mut self, line: &str) -> bool {
526        if ItemizedBlock::get_marker_length(line.trim_start()).is_none()
527            && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
528        {
529            self.lines.push(line.to_string());
530            return true;
531        }
532        false
533    }
534
535    /// Returns the block as a string, with each line trimmed at the start.
536    fn trimmed_block_as_string(&self) -> String {
537        self.lines.iter().fold(String::new(), |mut acc, line| {
538            acc.push_str(line.trim_start());
539            acc.push(' ');
540            acc
541        })
542    }
543
544    /// Returns the block as a string under its original form.
545    fn original_block_as_string(&self) -> String {
546        self.lines.join("\n")
547    }
548}
549
550/// Determine the line_start when formatting markdown block quotes.
551/// The original line_start likely contains indentation (whitespaces), which we'd like to
552/// replace with '> ' characters.
553fn itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String {
554    let quote_level = line
555        .chars()
556        .take_while(|c| !c.is_alphanumeric())
557        .fold(0, |acc, c| if c == '>' { acc + 1 } else { acc });
558
559    for _ in 0..remove_indent {
560        line_start.pop();
561    }
562
563    for _ in 0..quote_level {
564        line_start.push_str("> ");
565    }
566    line_start
567}
568
569struct CommentRewrite<'a> {
570    result: String,
571    code_block_buffer: String,
572    is_prev_line_multi_line: bool,
573    code_block_attr: Option<CodeBlockAttribute>,
574    item_block: Option<ItemizedBlock>,
575    comment_line_separator: String,
576    indent_str: String,
577    max_width: usize,
578    fmt_indent: Indent,
579    fmt: StringFormat<'a>,
580
581    opener: String,
582    closer: String,
583    line_start: String,
584    style: CommentStyle<'a>,
585}
586
587impl<'a> CommentRewrite<'a> {
588    fn new(
589        orig: &'a str,
590        block_style: bool,
591        shape: Shape,
592        config: &'a Config,
593    ) -> CommentRewrite<'a> {
594        let ((opener, closer, line_start), style) = if block_style {
595            (
596                CommentStyle::SingleBullet.to_str_tuplet(),
597                CommentStyle::SingleBullet,
598            )
599        } else {
600            let style = comment_style(orig, config.normalize_comments());
601            (style.to_str_tuplet(), style)
602        };
603
604        let max_width = shape
605            .width
606            .checked_sub(closer.len() + opener.len())
607            .unwrap_or(1);
608        let indent_str = shape.indent.to_string_with_newline(config).to_string();
609
610        let mut cr = CommentRewrite {
611            result: String::with_capacity(orig.len() * 2),
612            code_block_buffer: String::with_capacity(128),
613            is_prev_line_multi_line: false,
614            code_block_attr: None,
615            item_block: None,
616            comment_line_separator: format!("{indent_str}{line_start}"),
617            max_width,
618            indent_str,
619            fmt_indent: shape.indent,
620
621            fmt: StringFormat {
622                opener: "",
623                closer: "",
624                line_start,
625                line_end: "",
626                shape: Shape::legacy(max_width, shape.indent),
627                trim_end: true,
628                config,
629            },
630
631            opener: opener.to_owned(),
632            closer: closer.to_owned(),
633            line_start: line_start.to_owned(),
634            style,
635        };
636        cr.result.push_str(opener);
637        cr
638    }
639
640    fn join_block(s: &str, sep: &str) -> String {
641        let mut result = String::with_capacity(s.len() + 128);
642        let mut iter = s.lines().peekable();
643        while let Some(line) = iter.next() {
644            result.push_str(line);
645            result.push_str(match iter.peek() {
646                Some(&"") => sep.trim_end(),
647                Some(..) => sep,
648                None => "",
649            });
650        }
651        result
652    }
653
654    /// Check if any characters were written to the result buffer after the start of the comment.
655    /// when calling [`CommentRewrite::new()`] the result buffer is initialized with the opening
656    /// characters for the comment.
657    fn buffer_contains_comment(&self) -> bool {
658        // if self.result.len() < self.opener.len() then an empty comment is in the buffer
659        // if self.result.len() > self.opener.len() then a non empty comment is in the buffer
660        self.result.len() != self.opener.len()
661    }
662
663    fn finish(mut self) -> String {
664        if !self.code_block_buffer.is_empty() {
665            // There is a code block that is not properly enclosed by backticks.
666            // We will leave them untouched.
667            self.result.push_str(&self.comment_line_separator);
668            self.result.push_str(&Self::join_block(
669                &trim_custom_comment_prefix(&self.code_block_buffer),
670                &self.comment_line_separator,
671            ));
672        }
673
674        if let Some(ref ib) = self.item_block {
675            // the last few lines are part of an itemized block
676            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
677            let item_fmt = ib.create_string_format(&self.fmt);
678
679            // only push a comment_line_separator for ItemizedBlocks if the comment is not empty
680            if self.buffer_contains_comment() {
681                self.result.push_str(&self.comment_line_separator);
682            }
683
684            self.result.push_str(&ib.opener);
685            match rewrite_string(
686                &ib.trimmed_block_as_string(),
687                &item_fmt,
688                self.max_width.saturating_sub(ib.indent),
689            ) {
690                Some(s) => self.result.push_str(&Self::join_block(
691                    &s,
692                    &format!("{}{}", self.comment_line_separator, ib.line_start),
693                )),
694                None => self.result.push_str(&Self::join_block(
695                    &ib.original_block_as_string(),
696                    &self.comment_line_separator,
697                )),
698            };
699        }
700
701        self.result.push_str(&self.closer);
702        if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
703            // Trailing space.
704            self.result.pop();
705        }
706
707        self.result
708    }
709
710    fn handle_line(
711        &mut self,
712        orig: &'a str,
713        i: usize,
714        line: &'a str,
715        has_leading_whitespace: bool,
716        is_doc_comment: bool,
717    ) -> bool {
718        let num_newlines = count_newlines(orig);
719        let is_last = i == num_newlines;
720        let needs_new_comment_line = if self.style.is_block_comment() {
721            num_newlines > 0 || self.buffer_contains_comment()
722        } else {
723            self.buffer_contains_comment()
724        };
725
726        if let Some(ref mut ib) = self.item_block {
727            if ib.add_line(line) {
728                return false;
729            }
730            self.is_prev_line_multi_line = false;
731            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
732            let item_fmt = ib.create_string_format(&self.fmt);
733
734            // only push a comment_line_separator if we need to start a new comment line
735            if needs_new_comment_line {
736                self.result.push_str(&self.comment_line_separator);
737            }
738
739            self.result.push_str(&ib.opener);
740            match rewrite_string(
741                &ib.trimmed_block_as_string(),
742                &item_fmt,
743                self.max_width.saturating_sub(ib.indent),
744            ) {
745                Some(s) => self.result.push_str(&Self::join_block(
746                    &s,
747                    &format!("{}{}", self.comment_line_separator, ib.line_start),
748                )),
749                None => self.result.push_str(&Self::join_block(
750                    &ib.original_block_as_string(),
751                    &self.comment_line_separator,
752                )),
753            };
754        } else if self.code_block_attr.is_some() {
755            if line.starts_with("```") {
756                let code_block = match self.code_block_attr.as_ref().unwrap() {
757                    CodeBlockAttribute::Rust
758                        if self.fmt.config.format_code_in_doc_comments()
759                            && !self.code_block_buffer.trim().is_empty() =>
760                    {
761                        let mut config = self.fmt.config.clone();
762                        config.set().wrap_comments(false);
763                        let comment_max_width = config
764                            .doc_comment_code_block_width()
765                            .min(config.max_width());
766                        config.set().max_width(comment_max_width);
767                        if let Some(comment_use_small_heuristics) = config
768                            .doc_comment_code_block_small_heuristics()
769                            .to_heuristics()
770                        {
771                            config
772                                .set()
773                                .use_small_heuristics(comment_use_small_heuristics);
774                        }
775                        if let Some(s) =
776                            crate::format_code_block(&self.code_block_buffer, &config, false)
777                        {
778                            trim_custom_comment_prefix(&s.snippet)
779                        } else {
780                            trim_custom_comment_prefix(&self.code_block_buffer)
781                        }
782                    }
783                    _ => trim_custom_comment_prefix(&self.code_block_buffer),
784                };
785                if !code_block.is_empty() {
786                    self.result.push_str(&self.comment_line_separator);
787                    self.result
788                        .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
789                }
790                self.code_block_buffer.clear();
791                self.result.push_str(&self.comment_line_separator);
792                self.result.push_str(line);
793                self.code_block_attr = None;
794            } else {
795                self.code_block_buffer
796                    .push_str(&hide_sharp_behind_comment(line));
797                self.code_block_buffer.push('\n');
798            }
799            return false;
800        }
801
802        self.code_block_attr = None;
803        self.item_block = None;
804        if let Some(stripped) = line.strip_prefix("```") {
805            self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
806        } else if self.fmt.config.wrap_comments() {
807            if let Some(ib) = ItemizedBlock::new(line) {
808                self.item_block = Some(ib);
809                return false;
810            }
811        }
812
813        if self.result == self.opener {
814            let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
815            if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
816                self.result.pop();
817            }
818            if line.is_empty() {
819                return false;
820            }
821        } else if self.is_prev_line_multi_line && !line.is_empty() {
822            self.result.push(' ')
823        } else if is_last && line.is_empty() {
824            // trailing blank lines are unwanted
825            if !self.closer.is_empty() {
826                self.result.push_str(&self.indent_str);
827            }
828            return true;
829        } else {
830            self.result.push_str(&self.comment_line_separator);
831            if !has_leading_whitespace && self.result.ends_with(' ') {
832                self.result.pop();
833            }
834        }
835
836        let is_markdown_header_doc_comment = is_doc_comment && line.starts_with('#');
837
838        // We only want to wrap the comment if:
839        // 1) wrap_comments = true is configured
840        // 2) The comment is not the start of a markdown header doc comment
841        // 3) The comment width exceeds the shape's width
842        // 4) No URLS were found in the comment
843        // If this changes, the documentation in ../Configurations.md#wrap_comments
844        // should be changed accordingly.
845        let should_wrap_comment = self.fmt.config.wrap_comments()
846            && !is_markdown_header_doc_comment
847            && unicode_str_width(line) > self.fmt.shape.width
848            && !has_url(line)
849            && !is_table_item(line);
850
851        if should_wrap_comment {
852            match rewrite_string(line, &self.fmt, self.max_width) {
853                Some(ref s) => {
854                    self.is_prev_line_multi_line = s.contains('\n');
855                    self.result.push_str(s);
856                }
857                None if self.is_prev_line_multi_line => {
858                    // We failed to put the current `line` next to the previous `line`.
859                    // Remove the trailing space, then start rewrite on the next line.
860                    self.result.pop();
861                    self.result.push_str(&self.comment_line_separator);
862                    self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
863                    match rewrite_string(line, &self.fmt, self.max_width) {
864                        Some(ref s) => {
865                            self.is_prev_line_multi_line = s.contains('\n');
866                            self.result.push_str(s);
867                        }
868                        None => {
869                            self.is_prev_line_multi_line = false;
870                            self.result.push_str(line);
871                        }
872                    }
873                }
874                None => {
875                    self.is_prev_line_multi_line = false;
876                    self.result.push_str(line);
877                }
878            }
879
880            self.fmt.shape = if self.is_prev_line_multi_line {
881                // 1 = " "
882                let offset = 1 + last_line_width(&self.result) - self.line_start.len();
883                Shape {
884                    width: self.max_width.saturating_sub(offset),
885                    indent: self.fmt_indent,
886                    offset: self.fmt.shape.offset + offset,
887                }
888            } else {
889                Shape::legacy(self.max_width, self.fmt_indent)
890            };
891        } else {
892            if line.is_empty() && self.result.ends_with(' ') && !is_last {
893                // Remove space if this is an empty comment or a doc comment.
894                self.result.pop();
895            }
896            if self.code_block_attr.is_some() && self.is_prev_line_multi_line {
897                self.result.push_str(&self.comment_line_separator);
898            }
899            self.result.push_str(line);
900            self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
901            self.is_prev_line_multi_line = false;
902        }
903
904        false
905    }
906}
907
908fn rewrite_comment_inner(
909    orig: &str,
910    block_style: bool,
911    style: CommentStyle<'_>,
912    shape: Shape,
913    config: &Config,
914    is_doc_comment: bool,
915) -> RewriteResult {
916    let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
917
918    let line_breaks = count_newlines(orig.trim_end());
919    let lines = orig
920        .lines()
921        .enumerate()
922        .map(|(i, mut line)| {
923            line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
924            // Drop old closer.
925            if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
926                line = line[..(line.len() - 2)].trim_end();
927            }
928
929            line
930        })
931        .map(|s| left_trim_comment_line(s, &style))
932        .map(|(line, has_leading_whitespace)| {
933            if orig.starts_with("/*") && line_breaks == 0 {
934                (
935                    line.trim_start(),
936                    has_leading_whitespace || config.normalize_comments(),
937                )
938            } else {
939                (line, has_leading_whitespace || config.normalize_comments())
940            }
941        });
942
943    for (i, (line, has_leading_whitespace)) in lines.enumerate() {
944        if rewriter.handle_line(orig, i, line, has_leading_whitespace, is_doc_comment) {
945            break;
946        }
947    }
948
949    Ok(rewriter.finish())
950}
951
952const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
953
954fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
955    let s_trimmed = s.trim();
956    if s_trimmed.starts_with("# ") || s_trimmed == "#" {
957        Cow::from(format!("{RUSTFMT_CUSTOM_COMMENT_PREFIX}{s}"))
958    } else {
959        Cow::from(s)
960    }
961}
962
963fn trim_custom_comment_prefix(s: &str) -> String {
964    s.lines()
965        .map(|line| {
966            let left_trimmed = line.trim_start();
967            if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
968                left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
969            } else {
970                line
971            }
972        })
973        .collect::<Vec<_>>()
974        .join("\n")
975}
976
977/// Returns `true` if the given string MAY include URLs or alike.
978fn has_url(s: &str) -> bool {
979    // A regex matching reference doc links.
980    //
981    // ```markdown
982    // /// An [example].
983    // ///
984    // /// [example]: this::is::a::link
985    // ```
986    let reference_link_url = static_regex!(r"^\[.+\]\s?:");
987
988    // This function may return false positive, but should get its job done in most cases.
989    s.contains("https://")
990        || s.contains("http://")
991        || s.contains("ftp://")
992        || s.contains("file://")
993        || reference_link_url.is_match(s)
994}
995
996/// Returns true if the given string may be part of a Markdown table.
997fn is_table_item(mut s: &str) -> bool {
998    // This function may return false positive, but should get its job done in most cases (i.e.
999    // markdown tables with two column delimiters).
1000    s = s.trim_start();
1001    return s.starts_with('|')
1002        && match s.rfind('|') {
1003            Some(0) | None => false,
1004            _ => true,
1005        };
1006}
1007
1008/// Given the span, rewrite the missing comment inside it if available.
1009/// Note that the given span must only include comments (or leading/trailing whitespaces).
1010pub(crate) fn rewrite_missing_comment(
1011    span: Span,
1012    shape: Shape,
1013    context: &RewriteContext<'_>,
1014) -> RewriteResult {
1015    let missing_snippet = context.snippet(span);
1016    let trimmed_snippet = missing_snippet.trim();
1017    // check the span starts with a comment
1018    let pos = trimmed_snippet.find('/');
1019    if !trimmed_snippet.is_empty() && pos.is_some() {
1020        rewrite_comment(trimmed_snippet, false, shape, context.config)
1021    } else {
1022        Ok(String::new())
1023    }
1024}
1025
1026/// Recover the missing comments in the specified span, if available.
1027/// The layout of the comments will be preserved as long as it does not break the code
1028/// and its total width does not exceed the max width.
1029pub(crate) fn recover_missing_comment_in_span(
1030    span: Span,
1031    shape: Shape,
1032    context: &RewriteContext<'_>,
1033    used_width: usize,
1034) -> RewriteResult {
1035    let missing_comment = rewrite_missing_comment(span, shape, context)?;
1036    if missing_comment.is_empty() {
1037        Ok(String::new())
1038    } else {
1039        let missing_snippet = context.snippet(span);
1040        let pos = missing_snippet.find('/').unknown_error()?;
1041        // 1 = ` `
1042        let total_width = missing_comment.len() + used_width + 1;
1043        let force_new_line_before_comment =
1044            missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
1045        let sep = if force_new_line_before_comment {
1046            shape.indent.to_string_with_newline(context.config)
1047        } else {
1048            Cow::from(" ")
1049        };
1050        Ok(format!("{sep}{missing_comment}"))
1051    }
1052}
1053
1054/// Trim trailing whitespaces unless they consist of two or more whitespaces.
1055fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
1056    if is_doc_comment && s.ends_with("  ") {
1057        s
1058    } else {
1059        s.trim_end()
1060    }
1061}
1062
1063/// Trims whitespace and aligns to indent, but otherwise does not change comments.
1064fn light_rewrite_comment(
1065    orig: &str,
1066    offset: Indent,
1067    config: &Config,
1068    is_doc_comment: bool,
1069) -> String {
1070    orig.lines()
1071        .map(|l| {
1072            // This is basically just l.trim(), but in the case that a line starts
1073            // with `*` we want to leave one space before it, so it aligns with the
1074            // `*` in `/*`.
1075            let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
1076            let left_trimmed = if let Some(fnw) = first_non_whitespace {
1077                if l.as_bytes()[fnw] == b'*' && fnw > 0 {
1078                    &l[fnw - 1..]
1079                } else {
1080                    &l[fnw..]
1081                }
1082            } else {
1083                ""
1084            };
1085            // Preserve markdown's double-space line break syntax in doc comment.
1086            trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
1087        })
1088        .join(&format!("\n{}", offset.to_string(config)))
1089}
1090
1091/// Trims comment characters and possibly a single space from the left of a string.
1092/// Does not trim all whitespace. If a single space is trimmed from the left of the string,
1093/// this function returns true.
1094fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
1095    if line.starts_with("//! ")
1096        || line.starts_with("/// ")
1097        || line.starts_with("/*! ")
1098        || line.starts_with("/** ")
1099    {
1100        (&line[4..], true)
1101    } else if let CommentStyle::Custom(opener) = *style {
1102        if let Some(stripped) = line.strip_prefix(opener) {
1103            (stripped, true)
1104        } else {
1105            (&line[opener.trim_end().len()..], false)
1106        }
1107    } else if line.starts_with("/* ")
1108        || line.starts_with("// ")
1109        || line.starts_with("//!")
1110        || line.starts_with("///")
1111        || line.starts_with("** ")
1112        || line.starts_with("/*!")
1113        || (line.starts_with("/**") && !line.starts_with("/**/"))
1114    {
1115        (&line[3..], line.chars().nth(2).unwrap() == ' ')
1116    } else if line.starts_with("/*")
1117        || line.starts_with("* ")
1118        || line.starts_with("//")
1119        || line.starts_with("**")
1120    {
1121        (&line[2..], line.chars().nth(1).unwrap() == ' ')
1122    } else if let Some(stripped) = line.strip_prefix('*') {
1123        (stripped, false)
1124    } else {
1125        (line, line.starts_with(' '))
1126    }
1127}
1128
1129pub(crate) trait FindUncommented {
1130    fn find_uncommented(&self, pat: &str) -> Option<usize>;
1131    fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
1132}
1133
1134impl FindUncommented for str {
1135    fn find_uncommented(&self, pat: &str) -> Option<usize> {
1136        let mut needle_iter = pat.chars();
1137        for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
1138            match needle_iter.next() {
1139                None => {
1140                    return Some(i - pat.len());
1141                }
1142                Some(c) => match kind {
1143                    FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
1144                    _ => {
1145                        needle_iter = pat.chars();
1146                    }
1147                },
1148            }
1149        }
1150
1151        // Handle case where the pattern is a suffix of the search string
1152        match needle_iter.next() {
1153            Some(_) => None,
1154            None => Some(self.len() - pat.len()),
1155        }
1156    }
1157
1158    fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1159        if let Some(left) = self.find_uncommented(pat) {
1160            let mut result = left;
1161            // add 1 to use find_last_uncommented for &str after pat
1162            while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1163                result += next + 1;
1164            }
1165            Some(result)
1166        } else {
1167            None
1168        }
1169    }
1170}
1171
1172// Returns the first byte position after the first comment. The given string
1173// is expected to be prefixed by a comment, including delimiters.
1174// Good: `/* /* inner */ outer */ code();`
1175// Bad:  `code(); // hello\n world!`
1176pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1177    let mut iter = CharClasses::new(s.char_indices());
1178    for (kind, (i, _c)) in &mut iter {
1179        if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1180            return Some(i);
1181        }
1182    }
1183
1184    // Handle case where the comment ends at the end of `s`.
1185    if iter.status == CharClassesStatus::Normal {
1186        Some(s.len())
1187    } else {
1188        None
1189    }
1190}
1191
1192/// Returns `true` if text contains any comment.
1193pub(crate) fn contains_comment(text: &str) -> bool {
1194    CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1195}
1196
1197pub(crate) struct CharClasses<T>
1198where
1199    T: Iterator,
1200    T::Item: RichChar,
1201{
1202    base: MultiPeek<T>,
1203    status: CharClassesStatus,
1204}
1205
1206pub(crate) trait RichChar {
1207    fn get_char(&self) -> char;
1208}
1209
1210impl RichChar for char {
1211    fn get_char(&self) -> char {
1212        *self
1213    }
1214}
1215
1216impl RichChar for (usize, char) {
1217    fn get_char(&self) -> char {
1218        self.1
1219    }
1220}
1221
1222#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1223enum CharClassesStatus {
1224    Normal,
1225    /// Character is within a string
1226    LitString,
1227    LitStringEscape,
1228    /// Character is within a raw string
1229    LitRawString(u32),
1230    RawStringPrefix(u32),
1231    RawStringSuffix(u32),
1232    LitChar,
1233    LitCharEscape,
1234    /// Character inside a block comment, with the integer indicating the nesting deepness of the
1235    /// comment
1236    BlockComment(u32),
1237    /// Character inside a block-commented string, with the integer indicating the nesting deepness
1238    /// of the comment
1239    StringInBlockComment(u32),
1240    /// Status when the '/' has been consumed, but not yet the '*', deepness is
1241    /// the new deepness (after the comment opening).
1242    BlockCommentOpening(u32),
1243    /// Status when the '*' has been consumed, but not yet the '/', deepness is
1244    /// the new deepness (after the comment closing).
1245    BlockCommentClosing(u32),
1246    /// Character is within a line comment
1247    LineComment,
1248}
1249
1250/// Distinguish between functional part of code and comments
1251#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1252pub(crate) enum CodeCharKind {
1253    Normal,
1254    Comment,
1255}
1256
1257/// Distinguish between functional part of code and comments,
1258/// describing opening and closing of comments for ease when chunking
1259/// code from tagged characters
1260#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1261pub(crate) enum FullCodeCharKind {
1262    Normal,
1263    /// The first character of a comment, there is only one for a comment (always '/')
1264    StartComment,
1265    /// Any character inside a comment including the second character of comment
1266    /// marks ("//", "/*")
1267    InComment,
1268    /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1269    EndComment,
1270    /// Start of a multiline string inside a comment
1271    StartStringCommented,
1272    /// End of a multiline string inside a comment
1273    EndStringCommented,
1274    /// Inside a commented string
1275    InStringCommented,
1276    /// Start of a multiline string
1277    StartString,
1278    /// End of a multiline string
1279    EndString,
1280    /// Inside a string.
1281    InString,
1282}
1283
1284impl FullCodeCharKind {
1285    pub(crate) fn is_comment(self) -> bool {
1286        match self {
1287            FullCodeCharKind::StartComment
1288            | FullCodeCharKind::InComment
1289            | FullCodeCharKind::EndComment
1290            | FullCodeCharKind::StartStringCommented
1291            | FullCodeCharKind::InStringCommented
1292            | FullCodeCharKind::EndStringCommented => true,
1293            _ => false,
1294        }
1295    }
1296
1297    /// Returns true if the character is inside a comment
1298    pub(crate) fn inside_comment(self) -> bool {
1299        match self {
1300            FullCodeCharKind::InComment
1301            | FullCodeCharKind::StartStringCommented
1302            | FullCodeCharKind::InStringCommented
1303            | FullCodeCharKind::EndStringCommented => true,
1304            _ => false,
1305        }
1306    }
1307
1308    pub(crate) fn is_string(self) -> bool {
1309        self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1310    }
1311
1312    /// Returns true if the character is within a commented string
1313    pub(crate) fn is_commented_string(self) -> bool {
1314        self == FullCodeCharKind::InStringCommented
1315            || self == FullCodeCharKind::StartStringCommented
1316    }
1317
1318    fn to_codecharkind(self) -> CodeCharKind {
1319        if self.is_comment() {
1320            CodeCharKind::Comment
1321        } else {
1322            CodeCharKind::Normal
1323        }
1324    }
1325}
1326
1327impl<T> CharClasses<T>
1328where
1329    T: Iterator,
1330    T::Item: RichChar,
1331{
1332    pub(crate) fn new(base: T) -> CharClasses<T> {
1333        CharClasses {
1334            base: multipeek(base),
1335            status: CharClassesStatus::Normal,
1336        }
1337    }
1338}
1339
1340fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1341where
1342    T: Iterator,
1343    T::Item: RichChar,
1344{
1345    for _ in 0..count {
1346        match iter.peek() {
1347            Some(c) if c.get_char() == '#' => continue,
1348            _ => return false,
1349        }
1350    }
1351    true
1352}
1353
1354impl<T> Iterator for CharClasses<T>
1355where
1356    T: Iterator,
1357    T::Item: RichChar,
1358{
1359    type Item = (FullCodeCharKind, T::Item);
1360
1361    fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1362        let item = self.base.next()?;
1363        let chr = item.get_char();
1364        let mut char_kind = FullCodeCharKind::Normal;
1365        self.status = match self.status {
1366            CharClassesStatus::LitRawString(sharps) => {
1367                char_kind = FullCodeCharKind::InString;
1368                match chr {
1369                    '"' => {
1370                        if sharps == 0 {
1371                            char_kind = FullCodeCharKind::Normal;
1372                            CharClassesStatus::Normal
1373                        } else if is_raw_string_suffix(&mut self.base, sharps) {
1374                            CharClassesStatus::RawStringSuffix(sharps)
1375                        } else {
1376                            CharClassesStatus::LitRawString(sharps)
1377                        }
1378                    }
1379                    _ => CharClassesStatus::LitRawString(sharps),
1380                }
1381            }
1382            CharClassesStatus::RawStringPrefix(sharps) => {
1383                char_kind = FullCodeCharKind::InString;
1384                match chr {
1385                    '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1386                    '"' => CharClassesStatus::LitRawString(sharps),
1387                    _ => CharClassesStatus::Normal, // Unreachable.
1388                }
1389            }
1390            CharClassesStatus::RawStringSuffix(sharps) => {
1391                match chr {
1392                    '#' => {
1393                        if sharps == 1 {
1394                            CharClassesStatus::Normal
1395                        } else {
1396                            char_kind = FullCodeCharKind::InString;
1397                            CharClassesStatus::RawStringSuffix(sharps - 1)
1398                        }
1399                    }
1400                    _ => CharClassesStatus::Normal, // Unreachable
1401                }
1402            }
1403            CharClassesStatus::LitString => {
1404                char_kind = FullCodeCharKind::InString;
1405                match chr {
1406                    '"' => CharClassesStatus::Normal,
1407                    '\\' => CharClassesStatus::LitStringEscape,
1408                    _ => CharClassesStatus::LitString,
1409                }
1410            }
1411            CharClassesStatus::LitStringEscape => {
1412                char_kind = FullCodeCharKind::InString;
1413                CharClassesStatus::LitString
1414            }
1415            CharClassesStatus::LitChar => match chr {
1416                '\\' => CharClassesStatus::LitCharEscape,
1417                '\'' => CharClassesStatus::Normal,
1418                _ => CharClassesStatus::LitChar,
1419            },
1420            CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1421            CharClassesStatus::Normal => match chr {
1422                'r' => match self.base.peek().map(RichChar::get_char) {
1423                    Some('#') | Some('"') => {
1424                        char_kind = FullCodeCharKind::InString;
1425                        CharClassesStatus::RawStringPrefix(0)
1426                    }
1427                    _ => CharClassesStatus::Normal,
1428                },
1429                '"' => {
1430                    char_kind = FullCodeCharKind::InString;
1431                    CharClassesStatus::LitString
1432                }
1433                '\'' => {
1434                    // HACK: Work around mut borrow.
1435                    match self.base.peek() {
1436                        Some(next) if next.get_char() == '\\' => {
1437                            self.status = CharClassesStatus::LitChar;
1438                            return Some((char_kind, item));
1439                        }
1440                        _ => (),
1441                    }
1442
1443                    match self.base.peek() {
1444                        Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1445                        _ => CharClassesStatus::Normal,
1446                    }
1447                }
1448                '/' => match self.base.peek() {
1449                    Some(next) if next.get_char() == '*' => {
1450                        self.status = CharClassesStatus::BlockCommentOpening(1);
1451                        return Some((FullCodeCharKind::StartComment, item));
1452                    }
1453                    Some(next) if next.get_char() == '/' => {
1454                        self.status = CharClassesStatus::LineComment;
1455                        return Some((FullCodeCharKind::StartComment, item));
1456                    }
1457                    _ => CharClassesStatus::Normal,
1458                },
1459                _ => CharClassesStatus::Normal,
1460            },
1461            CharClassesStatus::StringInBlockComment(deepness) => {
1462                char_kind = FullCodeCharKind::InStringCommented;
1463                if chr == '"' {
1464                    CharClassesStatus::BlockComment(deepness)
1465                } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1466                    char_kind = FullCodeCharKind::InComment;
1467                    CharClassesStatus::BlockCommentClosing(deepness - 1)
1468                } else {
1469                    CharClassesStatus::StringInBlockComment(deepness)
1470                }
1471            }
1472            CharClassesStatus::BlockComment(deepness) => {
1473                assert_ne!(deepness, 0);
1474                char_kind = FullCodeCharKind::InComment;
1475                match self.base.peek() {
1476                    Some(next) if next.get_char() == '/' && chr == '*' => {
1477                        CharClassesStatus::BlockCommentClosing(deepness - 1)
1478                    }
1479                    Some(next) if next.get_char() == '*' && chr == '/' => {
1480                        CharClassesStatus::BlockCommentOpening(deepness + 1)
1481                    }
1482                    _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1483                    _ => self.status,
1484                }
1485            }
1486            CharClassesStatus::BlockCommentOpening(deepness) => {
1487                assert_eq!(chr, '*');
1488                self.status = CharClassesStatus::BlockComment(deepness);
1489                return Some((FullCodeCharKind::InComment, item));
1490            }
1491            CharClassesStatus::BlockCommentClosing(deepness) => {
1492                assert_eq!(chr, '/');
1493                if deepness == 0 {
1494                    self.status = CharClassesStatus::Normal;
1495                    return Some((FullCodeCharKind::EndComment, item));
1496                } else {
1497                    self.status = CharClassesStatus::BlockComment(deepness);
1498                    return Some((FullCodeCharKind::InComment, item));
1499                }
1500            }
1501            CharClassesStatus::LineComment => match chr {
1502                '\n' => {
1503                    self.status = CharClassesStatus::Normal;
1504                    return Some((FullCodeCharKind::EndComment, item));
1505                }
1506                _ => {
1507                    self.status = CharClassesStatus::LineComment;
1508                    return Some((FullCodeCharKind::InComment, item));
1509                }
1510            },
1511        };
1512        Some((char_kind, item))
1513    }
1514}
1515
1516/// An iterator over the lines of a string, paired with the char kind at the
1517/// end of the line.
1518pub(crate) struct LineClasses<'a> {
1519    base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1520    kind: FullCodeCharKind,
1521}
1522
1523impl<'a> LineClasses<'a> {
1524    pub(crate) fn new(s: &'a str) -> Self {
1525        LineClasses {
1526            base: CharClasses::new(s.chars()).peekable(),
1527            kind: FullCodeCharKind::Normal,
1528        }
1529    }
1530}
1531
1532impl<'a> Iterator for LineClasses<'a> {
1533    type Item = (FullCodeCharKind, String);
1534
1535    fn next(&mut self) -> Option<Self::Item> {
1536        self.base.peek()?;
1537
1538        let mut line = String::new();
1539
1540        let start_kind = match self.base.peek() {
1541            Some((kind, _)) => *kind,
1542            None => unreachable!(),
1543        };
1544
1545        for (kind, c) in self.base.by_ref() {
1546            // needed to set the kind of the ending character on the last line
1547            self.kind = kind;
1548            if c == '\n' {
1549                self.kind = match (start_kind, kind) {
1550                    (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1551                        FullCodeCharKind::StartString
1552                    }
1553                    (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1554                        FullCodeCharKind::EndString
1555                    }
1556                    (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1557                        FullCodeCharKind::StartStringCommented
1558                    }
1559                    (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1560                        FullCodeCharKind::EndStringCommented
1561                    }
1562                    _ => kind,
1563                };
1564                break;
1565            }
1566            line.push(c);
1567        }
1568
1569        // Workaround for CRLF newline.
1570        if line.ends_with('\r') {
1571            line.pop();
1572        }
1573
1574        Some((self.kind, line))
1575    }
1576}
1577
1578/// Iterator over functional and commented parts of a string. Any part of a string is either
1579/// functional code, either *one* block comment, either *one* line comment. Whitespace between
1580/// comments is functional code. Line comments contain their ending newlines.
1581struct UngroupedCommentCodeSlices<'a> {
1582    slice: &'a str,
1583    iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1584}
1585
1586impl<'a> UngroupedCommentCodeSlices<'a> {
1587    fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1588        UngroupedCommentCodeSlices {
1589            slice: code,
1590            iter: CharClasses::new(code.char_indices()).peekable(),
1591        }
1592    }
1593}
1594
1595impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1596    type Item = (CodeCharKind, usize, &'a str);
1597
1598    fn next(&mut self) -> Option<Self::Item> {
1599        let (kind, (start_idx, _)) = self.iter.next()?;
1600        match kind {
1601            FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1602                // Consume all the Normal code
1603                while let Some(&(char_kind, _)) = self.iter.peek() {
1604                    if char_kind.is_comment() {
1605                        break;
1606                    }
1607                    let _ = self.iter.next();
1608                }
1609            }
1610            FullCodeCharKind::StartComment => {
1611                // Consume the whole comment
1612                loop {
1613                    match self.iter.next() {
1614                        Some((kind, ..)) if kind.inside_comment() => continue,
1615                        _ => break,
1616                    }
1617                }
1618            }
1619            _ => panic!(),
1620        }
1621        let slice = match self.iter.peek() {
1622            Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1623            None => &self.slice[start_idx..],
1624        };
1625        Some((
1626            if kind.is_comment() {
1627                CodeCharKind::Comment
1628            } else {
1629                CodeCharKind::Normal
1630            },
1631            start_idx,
1632            slice,
1633        ))
1634    }
1635}
1636
1637/// Iterator over an alternating sequence of functional and commented parts of
1638/// a string. The first item is always a, possibly zero length, subslice of
1639/// functional text. Line style comments contain their ending newlines.
1640pub(crate) struct CommentCodeSlices<'a> {
1641    slice: &'a str,
1642    last_slice_kind: CodeCharKind,
1643    last_slice_end: usize,
1644}
1645
1646impl<'a> CommentCodeSlices<'a> {
1647    pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1648        CommentCodeSlices {
1649            slice,
1650            last_slice_kind: CodeCharKind::Comment,
1651            last_slice_end: 0,
1652        }
1653    }
1654}
1655
1656impl<'a> Iterator for CommentCodeSlices<'a> {
1657    type Item = (CodeCharKind, usize, &'a str);
1658
1659    fn next(&mut self) -> Option<Self::Item> {
1660        if self.last_slice_end == self.slice.len() {
1661            return None;
1662        }
1663
1664        let mut sub_slice_end = self.last_slice_end;
1665        let mut first_whitespace = None;
1666        let subslice = &self.slice[self.last_slice_end..];
1667        let mut iter = CharClasses::new(subslice.char_indices());
1668
1669        for (kind, (i, c)) in &mut iter {
1670            let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1671                && &subslice[..2] == "//"
1672                && [' ', '\t'].contains(&c);
1673
1674            if is_comment_connector && first_whitespace.is_none() {
1675                first_whitespace = Some(i);
1676            }
1677
1678            if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1679                let last_index = match first_whitespace {
1680                    Some(j) => j,
1681                    None => i,
1682                };
1683                sub_slice_end = self.last_slice_end + last_index;
1684                break;
1685            }
1686
1687            if !is_comment_connector {
1688                first_whitespace = None;
1689            }
1690        }
1691
1692        if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1693            // This was the last subslice.
1694            sub_slice_end = match first_whitespace {
1695                Some(i) => self.last_slice_end + i,
1696                None => self.slice.len(),
1697            };
1698        }
1699
1700        let kind = match self.last_slice_kind {
1701            CodeCharKind::Comment => CodeCharKind::Normal,
1702            CodeCharKind::Normal => CodeCharKind::Comment,
1703        };
1704        let res = (
1705            kind,
1706            self.last_slice_end,
1707            &self.slice[self.last_slice_end..sub_slice_end],
1708        );
1709        self.last_slice_end = sub_slice_end;
1710        self.last_slice_kind = kind;
1711
1712        Some(res)
1713    }
1714}
1715
1716/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1717pub(crate) fn recover_comment_removed(
1718    new: String,
1719    span: Span,
1720    context: &RewriteContext<'_>,
1721) -> String {
1722    let snippet = context.snippet(span);
1723    if snippet != new && changed_comment_content(snippet, &new) {
1724        // We missed some comments. Warn and keep the original text.
1725        if context.config.error_on_unformatted() {
1726            context.report.append(
1727                context.psess.span_to_filename(span),
1728                vec![FormattingError::from_span(
1729                    span,
1730                    context.psess,
1731                    ErrorKind::LostComment,
1732                )],
1733            );
1734        }
1735        snippet.to_owned()
1736    } else {
1737        new
1738    }
1739}
1740
1741pub(crate) fn filter_normal_code(code: &str) -> String {
1742    let mut buffer = String::with_capacity(code.len());
1743    LineClasses::new(code).for_each(|(kind, line)| match kind {
1744        FullCodeCharKind::Normal
1745        | FullCodeCharKind::StartString
1746        | FullCodeCharKind::InString
1747        | FullCodeCharKind::EndString => {
1748            buffer.push_str(&line);
1749            buffer.push('\n');
1750        }
1751        _ => (),
1752    });
1753    if !code.ends_with('\n') && buffer.ends_with('\n') {
1754        buffer.pop();
1755    }
1756    buffer
1757}
1758
1759/// Returns `true` if the two strings of code have the same payload of comments.
1760/// The payload of comments is everything in the string except:
1761/// - actual code (not comments),
1762/// - comment start/end marks,
1763/// - whitespace,
1764/// - '*' at the beginning of lines in block comments.
1765fn changed_comment_content(orig: &str, new: &str) -> bool {
1766    // Cannot write this as a fn since we cannot return types containing closures.
1767    let code_comment_content = |code| {
1768        let slices = UngroupedCommentCodeSlices::new(code);
1769        slices
1770            .filter(|(kind, _, _)| *kind == CodeCharKind::Comment)
1771            .flat_map(|(_, _, s)| CommentReducer::new(s))
1772    };
1773    let res = code_comment_content(orig).ne(code_comment_content(new));
1774    debug!(
1775        "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1776        res,
1777        orig,
1778        new,
1779        code_comment_content(orig).collect::<String>(),
1780        code_comment_content(new).collect::<String>()
1781    );
1782    res
1783}
1784
1785/// Iterator over the 'payload' characters of a comment.
1786/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1787/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1788/// for example).
1789struct CommentReducer<'a> {
1790    is_block: bool,
1791    at_start_line: bool,
1792    iter: std::str::Chars<'a>,
1793}
1794
1795impl<'a> CommentReducer<'a> {
1796    fn new(comment: &'a str) -> CommentReducer<'a> {
1797        let is_block = comment.starts_with("/*");
1798        let comment = remove_comment_header(comment);
1799        CommentReducer {
1800            is_block,
1801            // There are no supplementary '*' on the first line.
1802            at_start_line: false,
1803            iter: comment.chars(),
1804        }
1805    }
1806}
1807
1808impl<'a> Iterator for CommentReducer<'a> {
1809    type Item = char;
1810
1811    fn next(&mut self) -> Option<Self::Item> {
1812        loop {
1813            let mut c = self.iter.next()?;
1814            if self.is_block && self.at_start_line {
1815                while c.is_whitespace() {
1816                    c = self.iter.next()?;
1817                }
1818                // Ignore leading '*'.
1819                if c == '*' {
1820                    c = self.iter.next()?;
1821                }
1822            } else if c == '\n' {
1823                self.at_start_line = true;
1824            }
1825            if !c.is_whitespace() {
1826                return Some(c);
1827            }
1828        }
1829    }
1830}
1831
1832fn remove_comment_header(comment: &str) -> &str {
1833    if comment.starts_with("///") || comment.starts_with("//!") {
1834        &comment[3..]
1835    } else if let Some(stripped) = comment.strip_prefix("//") {
1836        stripped
1837    } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1838        || comment.starts_with("/*!")
1839    {
1840        &comment[3..comment.len() - 2]
1841    } else {
1842        assert!(
1843            comment.starts_with("/*"),
1844            "string '{comment}' is not a comment"
1845        );
1846        &comment[2..comment.len() - 2]
1847    }
1848}
1849
1850#[cfg(test)]
1851mod test {
1852    use super::*;
1853
1854    #[test]
1855    fn char_classes() {
1856        let mut iter = CharClasses::new("//\n\n".chars());
1857
1858        assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1859        assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1860        assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1861        assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1862        assert_eq!(None, iter.next());
1863    }
1864
1865    #[test]
1866    fn comment_code_slices() {
1867        let input = "code(); /* test */ 1 + 1";
1868        let mut iter = CommentCodeSlices::new(input);
1869
1870        assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1871        assert_eq!(
1872            (CodeCharKind::Comment, 8, "/* test */"),
1873            iter.next().unwrap()
1874        );
1875        assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1876        assert_eq!(None, iter.next());
1877    }
1878
1879    #[test]
1880    fn comment_code_slices_two() {
1881        let input = "// comment\n    test();";
1882        let mut iter = CommentCodeSlices::new(input);
1883
1884        assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1885        assert_eq!(
1886            (CodeCharKind::Comment, 0, "// comment\n"),
1887            iter.next().unwrap()
1888        );
1889        assert_eq!(
1890            (CodeCharKind::Normal, 11, "    test();"),
1891            iter.next().unwrap()
1892        );
1893        assert_eq!(None, iter.next());
1894    }
1895
1896    #[test]
1897    fn comment_code_slices_three() {
1898        let input = "1 // comment\n    // comment2\n\n";
1899        let mut iter = CommentCodeSlices::new(input);
1900
1901        assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1902        assert_eq!(
1903            (CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
1904            iter.next().unwrap()
1905        );
1906        assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1907        assert_eq!(None, iter.next());
1908    }
1909
1910    #[test]
1911    #[rustfmt::skip]
1912    fn format_doc_comments() {
1913        let mut wrap_normalize_config: crate::config::Config = Default::default();
1914        wrap_normalize_config.set().wrap_comments(true);
1915        wrap_normalize_config.set().normalize_comments(true);
1916
1917        let mut wrap_config: crate::config::Config = Default::default();
1918        wrap_config.set().wrap_comments(true);
1919
1920        let comment = rewrite_comment(" //test",
1921                                      true,
1922                                      Shape::legacy(100, Indent::new(0, 100)),
1923                                      &wrap_normalize_config).unwrap();
1924        assert_eq!("/* test */", comment);
1925
1926        let comment = rewrite_comment("// comment on a",
1927                                      false,
1928                                      Shape::legacy(10, Indent::empty()),
1929                                      &wrap_normalize_config).unwrap();
1930        assert_eq!("// comment\n// on a", comment);
1931
1932        let comment = rewrite_comment("//  A multi line comment\n             // between args.",
1933                                      false,
1934                                      Shape::legacy(60, Indent::new(0, 12)),
1935                                      &wrap_normalize_config).unwrap();
1936        assert_eq!("//  A multi line comment\n            // between args.", comment);
1937
1938        let input = "// comment";
1939        let expected =
1940            "/* comment */";
1941        let comment = rewrite_comment(input,
1942                                      true,
1943                                      Shape::legacy(9, Indent::new(0, 69)),
1944                                      &wrap_normalize_config).unwrap();
1945        assert_eq!(expected, comment);
1946
1947        let comment = rewrite_comment("/*   trimmed    */",
1948                                      true,
1949                                      Shape::legacy(100, Indent::new(0, 100)),
1950                                      &wrap_normalize_config).unwrap();
1951        assert_eq!("/* trimmed */", comment);
1952
1953        // Check that different comment style are properly recognised.
1954        let comment = rewrite_comment(r#"/// test1
1955                                         /// test2
1956                                         /*
1957                                          * test3
1958                                          */"#,
1959                                      false,
1960                                      Shape::legacy(100, Indent::new(0, 0)),
1961                                      &wrap_normalize_config).unwrap();
1962        assert_eq!("/// test1\n/// test2\n// test3", comment);
1963
1964        // Check that the blank line marks the end of a commented paragraph.
1965        let comment = rewrite_comment(r#"// test1
1966
1967                                         // test2"#,
1968                                      false,
1969                                      Shape::legacy(100, Indent::new(0, 0)),
1970                                      &wrap_normalize_config).unwrap();
1971        assert_eq!("// test1\n\n// test2", comment);
1972
1973        // Check that the blank line marks the end of a custom-commented paragraph.
1974        let comment = rewrite_comment(r#"//@ test1
1975
1976                                         //@ test2"#,
1977                                      false,
1978                                      Shape::legacy(100, Indent::new(0, 0)),
1979                                      &wrap_normalize_config).unwrap();
1980        assert_eq!("//@ test1\n\n//@ test2", comment);
1981
1982        // Check that bare lines are just indented but otherwise left unchanged.
1983        let comment = rewrite_comment(r#"// test1
1984                                         /*
1985                                           a bare line!
1986
1987                                                another bare line!
1988                                          */"#,
1989                                      false,
1990                                      Shape::legacy(100, Indent::new(0, 0)),
1991                                      &wrap_config).unwrap();
1992        assert_eq!("// test1\n/*\n a bare line!\n\n      another bare line!\n*/", comment);
1993    }
1994
1995    // This is probably intended to be a non-test fn, but it is not used.
1996    // We should keep this around unless it helps us test stuff to remove it.
1997    fn uncommented(text: &str) -> String {
1998        CharClasses::new(text.chars())
1999            .filter_map(|(s, c)| match s {
2000                FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
2001                _ => None,
2002            })
2003            .collect()
2004    }
2005
2006    #[test]
2007    fn test_uncommented() {
2008        assert_eq!(&uncommented("abc/*...*/"), "abc");
2009        assert_eq!(
2010            &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
2011            "..ac\n"
2012        );
2013        assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
2014    }
2015
2016    #[test]
2017    fn test_contains_comment() {
2018        assert_eq!(contains_comment("abc"), false);
2019        assert_eq!(contains_comment("abc // qsdf"), true);
2020        assert_eq!(contains_comment("abc /* kqsdf"), true);
2021        assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
2022    }
2023
2024    #[test]
2025    fn test_find_uncommented() {
2026        fn check(haystack: &str, needle: &str, expected: Option<usize>) {
2027            assert_eq!(expected, haystack.find_uncommented(needle));
2028        }
2029
2030        check("/*/ */test", "test", Some(6));
2031        check("//test\ntest", "test", Some(7));
2032        check("/* comment only */", "whatever", None);
2033        check(
2034            "/* comment */ some text /* more commentary */ result",
2035            "result",
2036            Some(46),
2037        );
2038        check("sup // sup", "p", Some(2));
2039        check("sup", "x", None);
2040        check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
2041        check("/*sup yo? \n sup*/ sup", "p", Some(20));
2042        check("hel/*lohello*/lo", "hello", None);
2043        check("acb", "ab", None);
2044        check(",/*A*/ ", ",", Some(0));
2045        check("abc", "abc", Some(0));
2046        check("/* abc */", "abc", None);
2047        check("/**/abc/* */", "abc", Some(4));
2048        check("\"/* abc */\"", "abc", Some(4));
2049        check("\"/* abc", "abc", Some(4));
2050    }
2051
2052    #[test]
2053    fn test_filter_normal_code() {
2054        let s = r#"
2055fn main() {
2056    println!("hello, world");
2057}
2058"#;
2059        assert_eq!(s, filter_normal_code(s));
2060        let s_with_comment = r#"
2061fn main() {
2062    // hello, world
2063    println!("hello, world");
2064}
2065"#;
2066        assert_eq!(s, filter_normal_code(s_with_comment));
2067    }
2068
2069    #[test]
2070    fn test_itemized_block_first_line_handling() {
2071        fn run_test(
2072            test_input: &str,
2073            expected_line: &str,
2074            expected_indent: usize,
2075            expected_opener: &str,
2076            expected_line_start: &str,
2077        ) {
2078            let block = ItemizedBlock::new(test_input).unwrap();
2079            assert_eq!(1, block.lines.len(), "test_input: {test_input:?}");
2080            assert_eq!(expected_line, &block.lines[0], "test_input: {test_input:?}");
2081            assert_eq!(expected_indent, block.indent, "test_input: {test_input:?}");
2082            assert_eq!(expected_opener, &block.opener, "test_input: {test_input:?}");
2083            assert_eq!(
2084                expected_line_start, &block.line_start,
2085                "test_input: {test_input:?}"
2086            );
2087        }
2088
2089        run_test("- foo", "foo", 2, "- ", "  ");
2090        run_test("* foo", "foo", 2, "* ", "  ");
2091        run_test("> foo", "foo", 2, "> ", "> ");
2092
2093        run_test("1. foo", "foo", 3, "1. ", "   ");
2094        run_test("12. foo", "foo", 4, "12. ", "    ");
2095        run_test("1) foo", "foo", 3, "1) ", "   ");
2096        run_test("12) foo", "foo", 4, "12) ", "    ");
2097
2098        run_test("    - foo", "foo", 6, "    - ", "      ");
2099
2100        // https://spec.commonmark.org/0.30 says: "A start number may begin with 0s":
2101        run_test("0. foo", "foo", 3, "0. ", "   ");
2102        run_test("01. foo", "foo", 4, "01. ", "    ");
2103    }
2104
2105    #[test]
2106    fn test_itemized_block_nonobvious_markers_are_rejected() {
2107        let test_inputs = vec![
2108            // Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
2109            // https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
2110            // them would risk misidentifying regular words as item markers. See also the
2111            // discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2112            "word.  rest of the paragraph.",
2113            "a.  maybe this is a list item?  maybe not?",
2114            "iv.  maybe this is a list item?  maybe not?",
2115            // Numbers with 3 or more digits are not recognized as item markers, to avoid
2116            // formatting the following example as a list:
2117            //
2118            // ```
2119            // The Captain died in
2120            // 1868. He was buried in...
2121            // ```
2122            "123.  only 2-digit numbers are recognized as item markers.",
2123            // Parens:
2124            "123)  giving some coverage to parens as well.",
2125            "a)  giving some coverage to parens as well.",
2126            // https://spec.commonmark.org/0.30 says that "at least one space or tab is needed
2127            // between the list marker and any following content":
2128            "1.Not a list item.",
2129            "1.2.3. Not a list item.",
2130            "1)Not a list item.",
2131            "-Not a list item.",
2132            "+Not a list item.",
2133            "+1 not a list item.",
2134            // https://spec.commonmark.org/0.30 says: "A start number may not be negative":
2135            "-1. Not a list item.",
2136            "-1 Not a list item.",
2137            // Marker without prefix are not recognized as item markers:
2138            ".   Not a list item.",
2139            ")   Not a list item.",
2140        ];
2141        for line in test_inputs.iter() {
2142            let maybe_block = ItemizedBlock::new(line);
2143            assert!(
2144                maybe_block.is_none(),
2145                "The following line shouldn't be classified as a list item: {line}"
2146            );
2147        }
2148    }
2149}
rustfmt_nightly/comment.rs

rustfmt_nightly/
comment.rs