rustdoc/html/
highlight.rs

1//! Basic syntax highlighting functionality.
2//!
3//! This module uses librustc_ast's lexer to provide token-based highlighting for
4//! the HTML documentation generated by rustdoc.
5//!
6//! Use the `render_with_highlighting` to highlight some rust code.
7
8use std::collections::VecDeque;
9use std::fmt::{Display, Write};
10
11use rustc_data_structures::fx::FxIndexMap;
12use rustc_lexer::{Cursor, LiteralKind, TokenKind};
13use rustc_span::edition::Edition;
14use rustc_span::symbol::Symbol;
15use rustc_span::{BytePos, DUMMY_SP, Span};
16
17use super::format::{self, write_str};
18use crate::clean::PrimitiveType;
19use crate::html::escape::EscapeBodyText;
20use crate::html::render::{Context, LinkFromSrc};
21
22/// This type is needed in case we want to render links on items to allow to go to their definition.
23pub(crate) struct HrefContext<'a, 'tcx> {
24    pub(crate) context: &'a Context<'tcx>,
25    /// This span contains the current file we're going through.
26    pub(crate) file_span: Span,
27    /// This field is used to know "how far" from the top of the directory we are to link to either
28    /// documentation pages or other source pages.
29    pub(crate) root_path: &'a str,
30    /// This field is used to calculate precise local URLs.
31    pub(crate) current_href: String,
32}
33
34/// Decorations are represented as a map from CSS class to vector of character ranges.
35/// Each range will be wrapped in a span with that class.
36#[derive(Default)]
37pub(crate) struct DecorationInfo(pub(crate) FxIndexMap<&'static str, Vec<(u32, u32)>>);
38
39#[derive(Eq, PartialEq, Clone, Copy)]
40pub(crate) enum Tooltip {
41    Ignore,
42    CompileFail,
43    ShouldPanic,
44    Edition(Edition),
45    None,
46}
47
48/// Highlights `src` as an inline example, returning the HTML output.
49pub(crate) fn render_example_with_highlighting(
50    src: &str,
51    out: &mut String,
52    tooltip: Tooltip,
53    playground_button: Option<&str>,
54    extra_classes: &[String],
55) {
56    write_header(out, "rust-example-rendered", None, tooltip, extra_classes);
57    write_code(out, src, None, None, None);
58    write_footer(out, playground_button);
59}
60
61fn write_header(
62    out: &mut String,
63    class: &str,
64    extra_content: Option<&str>,
65    tooltip: Tooltip,
66    extra_classes: &[String],
67) {
68    write_str(
69        out,
70        format_args!(
71            "<div class=\"example-wrap{}\">",
72            match tooltip {
73                Tooltip::Ignore => " ignore",
74                Tooltip::CompileFail => " compile_fail",
75                Tooltip::ShouldPanic => " should_panic",
76                Tooltip::Edition(_) => " edition",
77                Tooltip::None => "",
78            }
79        ),
80    );
81
82    if tooltip != Tooltip::None {
83        let edition_code;
84        write_str(
85            out,
86            format_args!(
87                "<a href=\"#\" class=\"tooltip\" title=\"{}\">ⓘ</a>",
88                match tooltip {
89                    Tooltip::Ignore => "This example is not tested",
90                    Tooltip::CompileFail => "This example deliberately fails to compile",
91                    Tooltip::ShouldPanic => "This example panics",
92                    Tooltip::Edition(edition) => {
93                        edition_code = format!("This example runs with edition {edition}");
94                        &edition_code
95                    }
96                    Tooltip::None => unreachable!(),
97                }
98            ),
99        );
100    }
101
102    if let Some(extra) = extra_content {
103        out.push_str(&extra);
104    }
105    if class.is_empty() {
106        write_str(
107            out,
108            format_args!(
109                "<pre class=\"rust{}{}\">",
110                if extra_classes.is_empty() { "" } else { " " },
111                extra_classes.join(" ")
112            ),
113        );
114    } else {
115        write_str(
116            out,
117            format_args!(
118                "<pre class=\"rust {class}{}{}\">",
119                if extra_classes.is_empty() { "" } else { " " },
120                extra_classes.join(" ")
121            ),
122        );
123    }
124    write_str(out, format_args!("<code>"));
125}
126
127/// Check if two `Class` can be merged together. In the following rules, "unclassified" means `None`
128/// basically (since it's `Option<Class>`). The following rules apply:
129///
130/// * If two `Class` have the same variant, then they can be merged.
131/// * If the other `Class` is unclassified and only contains white characters (backline,
132///   whitespace, etc), it can be merged.
133/// * `Class::Ident` is considered the same as unclassified (because it doesn't have an associated
134///    CSS class).
135fn can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool {
136    match (class1, class2) {
137        (Some(c1), Some(c2)) => c1.is_equal_to(c2),
138        (Some(Class::Ident(_)), None) | (None, Some(Class::Ident(_))) => true,
139        (Some(Class::Macro(_)), _) => false,
140        (Some(_), None) | (None, Some(_)) => text.trim().is_empty(),
141        (None, None) => true,
142    }
143}
144
145/// This type is used as a conveniency to prevent having to pass all its fields as arguments into
146/// the various functions (which became its methods).
147struct TokenHandler<'a, 'tcx, F: Write> {
148    out: &'a mut F,
149    /// It contains the closing tag and the associated `Class`.
150    closing_tags: Vec<(&'static str, Class)>,
151    /// This is used because we don't automatically generate the closing tag on `ExitSpan` in
152    /// case an `EnterSpan` event with the same class follows.
153    pending_exit_span: Option<Class>,
154    /// `current_class` and `pending_elems` are used to group HTML elements with same `class`
155    /// attributes to reduce the DOM size.
156    current_class: Option<Class>,
157    /// We need to keep the `Class` for each element because it could contain a `Span` which is
158    /// used to generate links.
159    pending_elems: Vec<(&'a str, Option<Class>)>,
160    href_context: Option<HrefContext<'a, 'tcx>>,
161    write_line_number: fn(&mut F, u32, &'static str),
162}
163
164impl<F: Write> TokenHandler<'_, '_, F> {
165    fn handle_exit_span(&mut self) {
166        // We can't get the last `closing_tags` element using `pop()` because `closing_tags` is
167        // being used in `write_pending_elems`.
168        let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1;
169        // We flush everything just in case...
170        self.write_pending_elems(Some(class));
171
172        exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0);
173        self.pending_exit_span = None;
174    }
175
176    /// Write all the pending elements sharing a same (or at mergeable) `Class`.
177    ///
178    /// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged
179    /// with the elements' class, then we simply write the elements since the `ExitSpan` event will
180    /// close the tag.
181    ///
182    /// Otherwise, if there is only one pending element, we let the `string` function handle both
183    /// opening and closing the tag, otherwise we do it into this function.
184    ///
185    /// It returns `true` if `current_class` must be set to `None` afterwards.
186    fn write_pending_elems(&mut self, current_class: Option<Class>) -> bool {
187        if self.pending_elems.is_empty() {
188            return false;
189        }
190        if let Some((_, parent_class)) = self.closing_tags.last()
191            && can_merge(current_class, Some(*parent_class), "")
192        {
193            for (text, class) in self.pending_elems.iter() {
194                string(
195                    self.out,
196                    EscapeBodyText(text),
197                    *class,
198                    &self.href_context,
199                    false,
200                    self.write_line_number,
201                );
202            }
203        } else {
204            // We only want to "open" the tag ourselves if we have more than one pending and if the
205            // current parent tag is not the same as our pending content.
206            let close_tag = if self.pending_elems.len() > 1
207                && let Some(current_class) = current_class
208                // `PreludeTy` can never include more than an ident so it should not generate
209                // a wrapping `span`.
210                && !matches!(current_class, Class::PreludeTy(_))
211            {
212                Some(enter_span(self.out, current_class, &self.href_context))
213            } else {
214                None
215            };
216            for (text, class) in self.pending_elems.iter() {
217                string(
218                    self.out,
219                    EscapeBodyText(text),
220                    *class,
221                    &self.href_context,
222                    close_tag.is_none(),
223                    self.write_line_number,
224                );
225            }
226            if let Some(close_tag) = close_tag {
227                exit_span(self.out, close_tag);
228            }
229        }
230        self.pending_elems.clear();
231        true
232    }
233
234    #[inline]
235    fn write_line_number(&mut self, line: u32, extra: &'static str) {
236        (self.write_line_number)(&mut self.out, line, extra);
237    }
238}
239
240impl<F: Write> Drop for TokenHandler<'_, '_, F> {
241    /// When leaving, we need to flush all pending data to not have missing content.
242    fn drop(&mut self) {
243        if self.pending_exit_span.is_some() {
244            self.handle_exit_span();
245        } else {
246            self.write_pending_elems(self.current_class);
247        }
248    }
249}
250
251fn write_scraped_line_number(out: &mut impl Write, line: u32, extra: &'static str) {
252    // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr
253    // Do not show "1 2 3 4 5 ..." in web search results.
254    write!(out, "{extra}<span data-nosnippet>{line}</span>",).unwrap();
255}
256
257fn write_line_number(out: &mut impl Write, line: u32, extra: &'static str) {
258    // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr
259    // Do not show "1 2 3 4 5 ..." in web search results.
260    write!(out, "{extra}<a href=#{line} id={line} data-nosnippet>{line}</a>",).unwrap();
261}
262
263fn empty_line_number(out: &mut impl Write, _: u32, extra: &'static str) {
264    out.write_str(extra).unwrap();
265}
266
267#[derive(Clone, Copy)]
268pub(super) struct LineInfo {
269    pub(super) start_line: u32,
270    max_lines: u32,
271    pub(super) is_scraped_example: bool,
272}
273
274impl LineInfo {
275    pub(super) fn new(max_lines: u32) -> Self {
276        Self { start_line: 1, max_lines: max_lines + 1, is_scraped_example: false }
277    }
278
279    pub(super) fn new_scraped(max_lines: u32, start_line: u32) -> Self {
280        Self {
281            start_line: start_line + 1,
282            max_lines: max_lines + start_line + 1,
283            is_scraped_example: true,
284        }
285    }
286}
287
288/// Convert the given `src` source code into HTML by adding classes for highlighting.
289///
290/// This code is used to render code blocks (in the documentation) as well as the source code pages.
291///
292/// Some explanations on the last arguments:
293///
294/// In case we are rendering a code block and not a source code file, `href_context` will be `None`.
295/// To put it more simply: if `href_context` is `None`, the code won't try to generate links to an
296/// item definition.
297///
298/// More explanations about spans and how we use them here are provided in the
299pub(super) fn write_code(
300    out: &mut impl Write,
301    src: &str,
302    href_context: Option<HrefContext<'_, '_>>,
303    decoration_info: Option<&DecorationInfo>,
304    line_info: Option<LineInfo>,
305) {
306    // This replace allows to fix how the code source with DOS backline characters is displayed.
307    let src = src.replace("\r\n", "\n");
308    let mut token_handler = TokenHandler {
309        out,
310        closing_tags: Vec::new(),
311        pending_exit_span: None,
312        current_class: None,
313        pending_elems: Vec::new(),
314        href_context,
315        write_line_number: match line_info {
316            Some(line_info) => {
317                if line_info.is_scraped_example {
318                    write_scraped_line_number
319                } else {
320                    write_line_number
321                }
322            }
323            None => empty_line_number,
324        },
325    };
326
327    let (mut line, max_lines) = if let Some(line_info) = line_info {
328        token_handler.write_line_number(line_info.start_line, "");
329        (line_info.start_line, line_info.max_lines)
330    } else {
331        (0, u32::MAX)
332    };
333
334    Classifier::new(
335        &src,
336        token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP),
337        decoration_info,
338    )
339    .highlight(&mut |highlight| {
340        match highlight {
341            Highlight::Token { text, class } => {
342                // If we received a `ExitSpan` event and then have a non-compatible `Class`, we
343                // need to close the `<span>`.
344                let need_current_class_update = if let Some(pending) =
345                    token_handler.pending_exit_span
346                    && !can_merge(Some(pending), class, text)
347                {
348                    token_handler.handle_exit_span();
349                    true
350                // If the two `Class` are different, time to flush the current content and start
351                // a new one.
352                } else if !can_merge(token_handler.current_class, class, text) {
353                    token_handler.write_pending_elems(token_handler.current_class);
354                    true
355                } else {
356                    token_handler.current_class.is_none()
357                };
358
359                if need_current_class_update {
360                    token_handler.current_class = class.map(Class::dummy);
361                }
362                if text == "\n" {
363                    line += 1;
364                    if line < max_lines {
365                        token_handler.pending_elems.push((text, Some(Class::Backline(line))));
366                    }
367                } else {
368                    token_handler.pending_elems.push((text, class));
369                }
370            }
371            Highlight::EnterSpan { class } => {
372                let mut should_add = true;
373                if let Some(pending_exit_span) = token_handler.pending_exit_span {
374                    if class.is_equal_to(pending_exit_span) {
375                        should_add = false;
376                    } else {
377                        token_handler.handle_exit_span();
378                    }
379                } else {
380                    // We flush everything just in case...
381                    if token_handler.write_pending_elems(token_handler.current_class) {
382                        token_handler.current_class = None;
383                    }
384                }
385                if should_add {
386                    let closing_tag =
387                        enter_span(token_handler.out, class, &token_handler.href_context);
388                    token_handler.closing_tags.push((closing_tag, class));
389                }
390
391                token_handler.current_class = None;
392                token_handler.pending_exit_span = None;
393            }
394            Highlight::ExitSpan => {
395                token_handler.current_class = None;
396                token_handler.pending_exit_span = Some(
397                    token_handler
398                        .closing_tags
399                        .last()
400                        .as_ref()
401                        .expect("ExitSpan without EnterSpan")
402                        .1,
403                );
404            }
405        };
406    });
407}
408
409fn write_footer(out: &mut String, playground_button: Option<&str>) {
410    write_str(out, format_args_nl!("</code></pre>{}</div>", playground_button.unwrap_or_default()));
411}
412
413/// How a span of text is classified. Mostly corresponds to token kinds.
414#[derive(Clone, Copy, Debug, Eq, PartialEq)]
415enum Class {
416    Comment,
417    DocComment,
418    Attribute,
419    KeyWord,
420    /// Keywords that do pointer/reference stuff.
421    RefKeyWord,
422    Self_(Span),
423    Macro(Span),
424    MacroNonTerminal,
425    String,
426    Number,
427    Bool,
428    /// `Ident` isn't rendered in the HTML but we still need it for the `Span` it contains.
429    Ident(Span),
430    Lifetime,
431    PreludeTy(Span),
432    PreludeVal(Span),
433    QuestionMark,
434    Decoration(&'static str),
435    Backline(u32),
436}
437
438impl Class {
439    /// It is only looking at the variant, not the variant content.
440    ///
441    /// It is used mostly to group multiple similar HTML elements into one `<span>` instead of
442    /// multiple ones.
443    fn is_equal_to(self, other: Self) -> bool {
444        match (self, other) {
445            (Self::Self_(_), Self::Self_(_))
446            | (Self::Macro(_), Self::Macro(_))
447            | (Self::Ident(_), Self::Ident(_)) => true,
448            (Self::Decoration(c1), Self::Decoration(c2)) => c1 == c2,
449            (x, y) => x == y,
450        }
451    }
452
453    /// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links
454    /// on "empty content" (because of the attributes merge).
455    fn dummy(self) -> Self {
456        match self {
457            Self::Self_(_) => Self::Self_(DUMMY_SP),
458            Self::Macro(_) => Self::Macro(DUMMY_SP),
459            Self::Ident(_) => Self::Ident(DUMMY_SP),
460            s => s,
461        }
462    }
463
464    /// Returns the css class expected by rustdoc for each `Class`.
465    fn as_html(self) -> &'static str {
466        match self {
467            Class::Comment => "comment",
468            Class::DocComment => "doccomment",
469            Class::Attribute => "attr",
470            Class::KeyWord => "kw",
471            Class::RefKeyWord => "kw-2",
472            Class::Self_(_) => "self",
473            Class::Macro(_) => "macro",
474            Class::MacroNonTerminal => "macro-nonterminal",
475            Class::String => "string",
476            Class::Number => "number",
477            Class::Bool => "bool-val",
478            Class::Ident(_) => "",
479            Class::Lifetime => "lifetime",
480            Class::PreludeTy(_) => "prelude-ty",
481            Class::PreludeVal(_) => "prelude-val",
482            Class::QuestionMark => "question-mark",
483            Class::Decoration(kind) => kind,
484            Class::Backline(_) => "",
485        }
486    }
487
488    /// In case this is an item which can be converted into a link to a definition, it'll contain
489    /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
490    fn get_span(self) -> Option<Span> {
491        match self {
492            Self::Ident(sp)
493            | Self::Self_(sp)
494            | Self::Macro(sp)
495            | Self::PreludeTy(sp)
496            | Self::PreludeVal(sp) => Some(sp),
497            Self::Comment
498            | Self::DocComment
499            | Self::Attribute
500            | Self::KeyWord
501            | Self::RefKeyWord
502            | Self::MacroNonTerminal
503            | Self::String
504            | Self::Number
505            | Self::Bool
506            | Self::Lifetime
507            | Self::QuestionMark
508            | Self::Decoration(_)
509            | Self::Backline(_) => None,
510        }
511    }
512}
513
514#[derive(Debug)]
515enum Highlight<'a> {
516    Token { text: &'a str, class: Option<Class> },
517    EnterSpan { class: Class },
518    ExitSpan,
519}
520
521struct TokenIter<'a> {
522    src: &'a str,
523    cursor: Cursor<'a>,
524}
525
526impl<'a> Iterator for TokenIter<'a> {
527    type Item = (TokenKind, &'a str);
528    fn next(&mut self) -> Option<(TokenKind, &'a str)> {
529        let token = self.cursor.advance_token();
530        if token.kind == TokenKind::Eof {
531            return None;
532        }
533        let (text, rest) = self.src.split_at(token.len as usize);
534        self.src = rest;
535        Some((token.kind, text))
536    }
537}
538
539/// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
540fn get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class> {
541    let ignore: &[&str] =
542        if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
543    if ignore.iter().any(|k| *k == text) {
544        return None;
545    }
546    Some(match text {
547        "ref" | "mut" => Class::RefKeyWord,
548        "false" | "true" => Class::Bool,
549        _ if Symbol::intern(text).is_reserved(|| Edition::Edition2021) => Class::KeyWord,
550        _ => return None,
551    })
552}
553
554/// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than
555/// just the next item by using `peek_next`. The `peek` method always returns the next item after
556/// the current one whereas `peek_next` will return the next item after the last one peeked.
557///
558/// You can use both `peek` and `peek_next` at the same time without problem.
559struct PeekIter<'a> {
560    stored: VecDeque<(TokenKind, &'a str)>,
561    /// This position is reinitialized when using `next`. It is used in `peek_next`.
562    peek_pos: usize,
563    iter: TokenIter<'a>,
564}
565
566impl<'a> PeekIter<'a> {
567    fn new(iter: TokenIter<'a>) -> Self {
568        Self { stored: VecDeque::new(), peek_pos: 0, iter }
569    }
570    /// Returns the next item after the current one. It doesn't interfere with `peek_next` output.
571    fn peek(&mut self) -> Option<&(TokenKind, &'a str)> {
572        if self.stored.is_empty()
573            && let Some(next) = self.iter.next()
574        {
575            self.stored.push_back(next);
576        }
577        self.stored.front()
578    }
579    /// Returns the next item after the last one peeked. It doesn't interfere with `peek` output.
580    fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> {
581        self.peek_pos += 1;
582        if self.peek_pos - 1 < self.stored.len() {
583            self.stored.get(self.peek_pos - 1)
584        } else if let Some(next) = self.iter.next() {
585            self.stored.push_back(next);
586            self.stored.back()
587        } else {
588            None
589        }
590    }
591}
592
593impl<'a> Iterator for PeekIter<'a> {
594    type Item = (TokenKind, &'a str);
595    fn next(&mut self) -> Option<Self::Item> {
596        self.peek_pos = 0;
597        if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() }
598    }
599}
600
601/// Custom spans inserted into the source. Eg --scrape-examples uses this to highlight function calls
602struct Decorations {
603    starts: Vec<(u32, &'static str)>,
604    ends: Vec<u32>,
605}
606
607impl Decorations {
608    fn new(info: &DecorationInfo) -> Self {
609        // Extract tuples (start, end, kind) into separate sequences of (start, kind) and (end).
610        let (mut starts, mut ends): (Vec<_>, Vec<_>) = info
611            .0
612            .iter()
613            .flat_map(|(&kind, ranges)| ranges.into_iter().map(move |&(lo, hi)| ((lo, kind), hi)))
614            .unzip();
615
616        // Sort the sequences in document order.
617        starts.sort_by_key(|(lo, _)| *lo);
618        ends.sort();
619
620        Decorations { starts, ends }
621    }
622}
623
624/// Processes program tokens, classifying strings of text by highlighting
625/// category (`Class`).
626struct Classifier<'src> {
627    tokens: PeekIter<'src>,
628    in_attribute: bool,
629    in_macro: bool,
630    in_macro_nonterminal: bool,
631    byte_pos: u32,
632    file_span: Span,
633    src: &'src str,
634    decorations: Option<Decorations>,
635}
636
637impl<'src> Classifier<'src> {
638    /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
639    /// file span which will be used later on by the `span_correspondence_map`.
640    fn new(src: &'src str, file_span: Span, decoration_info: Option<&DecorationInfo>) -> Self {
641        let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
642        let decorations = decoration_info.map(Decorations::new);
643        Classifier {
644            tokens,
645            in_attribute: false,
646            in_macro: false,
647            in_macro_nonterminal: false,
648            byte_pos: 0,
649            file_span,
650            src,
651            decorations,
652        }
653    }
654
655    /// Convenient wrapper to create a [`Span`] from a position in the file.
656    fn new_span(&self, lo: u32, text: &str) -> Span {
657        let hi = lo + text.len() as u32;
658        let file_lo = self.file_span.lo();
659        self.file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi))
660    }
661
662    /// Concatenate colons and idents as one when possible.
663    fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
664        let start = self.byte_pos as usize;
665        let mut pos = start;
666        let mut has_ident = false;
667
668        loop {
669            let mut nb = 0;
670            while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
671                self.tokens.next();
672                nb += 1;
673            }
674            // Ident path can start with "::" but if we already have content in the ident path,
675            // the "::" is mandatory.
676            if has_ident && nb == 0 {
677                return vec![(TokenKind::Ident, start, pos)];
678            } else if nb != 0 && nb != 2 {
679                if has_ident {
680                    return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
681                } else {
682                    return vec![(TokenKind::Colon, start, pos + nb)];
683                }
684            }
685
686            if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
687                if *token == TokenKind::Ident {
688                    let class = get_real_ident_class(text, true);
689                    (class, text)
690                } else {
691                    // Doesn't matter which Class we put in here...
692                    (Some(Class::Comment), text)
693                }
694            }) {
695                // We only "add" the colon if there is an ident behind.
696                pos += text.len() + nb;
697                has_ident = true;
698                self.tokens.next();
699            } else if nb > 0 && has_ident {
700                return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
701            } else if nb > 0 {
702                return vec![(TokenKind::Colon, start, start + nb)];
703            } else if has_ident {
704                return vec![(TokenKind::Ident, start, pos)];
705            } else {
706                return Vec::new();
707            }
708        }
709    }
710
711    /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
712    ///
713    /// It returns the token's kind, the token as a string and its byte position in the source
714    /// string.
715    fn next(&mut self) -> Option<(TokenKind, &'src str, u32)> {
716        if let Some((kind, text)) = self.tokens.next() {
717            let before = self.byte_pos;
718            self.byte_pos += text.len() as u32;
719            Some((kind, text, before))
720        } else {
721            None
722        }
723    }
724
725    /// Exhausts the `Classifier` writing the output into `sink`.
726    ///
727    /// The general structure for this method is to iterate over each token,
728    /// possibly giving it an HTML span with a class specifying what flavor of
729    /// token is used.
730    fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'src>)) {
731        loop {
732            if let Some(decs) = self.decorations.as_mut() {
733                let byte_pos = self.byte_pos;
734                let n_starts = decs.starts.iter().filter(|(i, _)| byte_pos >= *i).count();
735                for (_, kind) in decs.starts.drain(0..n_starts) {
736                    sink(Highlight::EnterSpan { class: Class::Decoration(kind) });
737                }
738
739                let n_ends = decs.ends.iter().filter(|i| byte_pos >= **i).count();
740                for _ in decs.ends.drain(0..n_ends) {
741                    sink(Highlight::ExitSpan);
742                }
743            }
744
745            if self
746                .tokens
747                .peek()
748                .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
749                .unwrap_or(false)
750            {
751                let tokens = self.get_full_ident_path();
752                for (token, start, end) in &tokens {
753                    let text = &self.src[*start..*end];
754                    self.advance(*token, text, sink, *start as u32);
755                    self.byte_pos += text.len() as u32;
756                }
757                if !tokens.is_empty() {
758                    continue;
759                }
760            }
761            if let Some((token, text, before)) = self.next() {
762                self.advance(token, text, sink, before);
763            } else {
764                break;
765            }
766        }
767    }
768
769    /// Single step of highlighting. This will classify `token`, but maybe also a couple of
770    /// following ones as well.
771    ///
772    /// `before` is the position of the given token in the `source` string and is used as "lo" byte
773    /// in case we want to try to generate a link for this token using the
774    /// `span_correspondence_map`.
775    fn advance(
776        &mut self,
777        token: TokenKind,
778        text: &'src str,
779        sink: &mut dyn FnMut(Highlight<'src>),
780        before: u32,
781    ) {
782        let lookahead = self.peek();
783        let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
784        let whitespace = |sink: &mut dyn FnMut(_)| {
785            for part in text.split('\n').intersperse("\n").filter(|s| !s.is_empty()) {
786                sink(Highlight::Token { text: part, class: None });
787            }
788        };
789        let class = match token {
790            TokenKind::Whitespace => return whitespace(sink),
791            TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
792                if doc_style.is_some() {
793                    Class::DocComment
794                } else {
795                    Class::Comment
796                }
797            }
798            // Consider this as part of a macro invocation if there was a
799            // leading identifier.
800            TokenKind::Bang if self.in_macro => {
801                self.in_macro = false;
802                sink(Highlight::Token { text, class: None });
803                sink(Highlight::ExitSpan);
804                return;
805            }
806
807            // Assume that '&' or '*' is the reference or dereference operator
808            // or a reference or pointer type. Unless, of course, it looks like
809            // a logical and or a multiplication operator: `&&` or `* `.
810            TokenKind::Star => match self.tokens.peek() {
811                Some((TokenKind::Whitespace, _)) => return whitespace(sink),
812                Some((TokenKind::Ident, "mut")) => {
813                    self.next();
814                    sink(Highlight::Token { text: "*mut", class: Some(Class::RefKeyWord) });
815                    return;
816                }
817                Some((TokenKind::Ident, "const")) => {
818                    self.next();
819                    sink(Highlight::Token { text: "*const", class: Some(Class::RefKeyWord) });
820                    return;
821                }
822                _ => Class::RefKeyWord,
823            },
824            TokenKind::And => match self.tokens.peek() {
825                Some((TokenKind::And, _)) => {
826                    self.next();
827                    sink(Highlight::Token { text: "&&", class: None });
828                    return;
829                }
830                Some((TokenKind::Eq, _)) => {
831                    self.next();
832                    sink(Highlight::Token { text: "&=", class: None });
833                    return;
834                }
835                Some((TokenKind::Whitespace, _)) => return whitespace(sink),
836                Some((TokenKind::Ident, "mut")) => {
837                    self.next();
838                    sink(Highlight::Token { text: "&mut", class: Some(Class::RefKeyWord) });
839                    return;
840                }
841                _ => Class::RefKeyWord,
842            },
843
844            // These can either be operators, or arrows.
845            TokenKind::Eq => match lookahead {
846                Some(TokenKind::Eq) => {
847                    self.next();
848                    sink(Highlight::Token { text: "==", class: None });
849                    return;
850                }
851                Some(TokenKind::Gt) => {
852                    self.next();
853                    sink(Highlight::Token { text: "=>", class: None });
854                    return;
855                }
856                _ => return no_highlight(sink),
857            },
858            TokenKind::Minus if lookahead == Some(TokenKind::Gt) => {
859                self.next();
860                sink(Highlight::Token { text: "->", class: None });
861                return;
862            }
863
864            // Other operators.
865            TokenKind::Minus
866            | TokenKind::Plus
867            | TokenKind::Or
868            | TokenKind::Slash
869            | TokenKind::Caret
870            | TokenKind::Percent
871            | TokenKind::Bang
872            | TokenKind::Lt
873            | TokenKind::Gt => return no_highlight(sink),
874
875            // Miscellaneous, no highlighting.
876            TokenKind::Dot
877            | TokenKind::Semi
878            | TokenKind::Comma
879            | TokenKind::OpenParen
880            | TokenKind::CloseParen
881            | TokenKind::OpenBrace
882            | TokenKind::CloseBrace
883            | TokenKind::OpenBracket
884            | TokenKind::At
885            | TokenKind::Tilde
886            | TokenKind::Colon
887            | TokenKind::Unknown => return no_highlight(sink),
888
889            TokenKind::Question => Class::QuestionMark,
890
891            TokenKind::Dollar => match lookahead {
892                Some(TokenKind::Ident) => {
893                    self.in_macro_nonterminal = true;
894                    Class::MacroNonTerminal
895                }
896                _ => return no_highlight(sink),
897            },
898
899            // This might be the start of an attribute. We're going to want to
900            // continue highlighting it as an attribute until the ending ']' is
901            // seen, so skip out early. Down below we terminate the attribute
902            // span when we see the ']'.
903            TokenKind::Pound => {
904                match lookahead {
905                    // Case 1: #![inner_attribute]
906                    Some(TokenKind::Bang) => {
907                        self.next();
908                        if let Some(TokenKind::OpenBracket) = self.peek() {
909                            self.in_attribute = true;
910                            sink(Highlight::EnterSpan { class: Class::Attribute });
911                        }
912                        sink(Highlight::Token { text: "#", class: None });
913                        sink(Highlight::Token { text: "!", class: None });
914                        return;
915                    }
916                    // Case 2: #[outer_attribute]
917                    Some(TokenKind::OpenBracket) => {
918                        self.in_attribute = true;
919                        sink(Highlight::EnterSpan { class: Class::Attribute });
920                    }
921                    _ => (),
922                }
923                return no_highlight(sink);
924            }
925            TokenKind::CloseBracket => {
926                if self.in_attribute {
927                    self.in_attribute = false;
928                    sink(Highlight::Token { text: "]", class: None });
929                    sink(Highlight::ExitSpan);
930                    return;
931                }
932                return no_highlight(sink);
933            }
934            TokenKind::Literal { kind, .. } => match kind {
935                // Text literals.
936                LiteralKind::Byte { .. }
937                | LiteralKind::Char { .. }
938                | LiteralKind::Str { .. }
939                | LiteralKind::ByteStr { .. }
940                | LiteralKind::RawStr { .. }
941                | LiteralKind::RawByteStr { .. }
942                | LiteralKind::CStr { .. }
943                | LiteralKind::RawCStr { .. } => Class::String,
944                // Number literals.
945                LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
946            },
947            TokenKind::GuardedStrPrefix => return no_highlight(sink),
948            TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
949                self.in_macro = true;
950                sink(Highlight::EnterSpan { class: Class::Macro(self.new_span(before, text)) });
951                sink(Highlight::Token { text, class: None });
952                return;
953            }
954            TokenKind::Ident => match get_real_ident_class(text, false) {
955                None => match text {
956                    "Option" | "Result" => Class::PreludeTy(self.new_span(before, text)),
957                    "Some" | "None" | "Ok" | "Err" => {
958                        Class::PreludeVal(self.new_span(before, text))
959                    }
960                    // "union" is a weak keyword and is only considered as a keyword when declaring
961                    // a union type.
962                    "union" if self.check_if_is_union_keyword() => Class::KeyWord,
963                    _ if self.in_macro_nonterminal => {
964                        self.in_macro_nonterminal = false;
965                        Class::MacroNonTerminal
966                    }
967                    "self" | "Self" => Class::Self_(self.new_span(before, text)),
968                    _ => Class::Ident(self.new_span(before, text)),
969                },
970                Some(c) => c,
971            },
972            TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
973                Class::Ident(self.new_span(before, text))
974            }
975            TokenKind::Lifetime { .. }
976            | TokenKind::RawLifetime
977            | TokenKind::UnknownPrefixLifetime => Class::Lifetime,
978            TokenKind::Eof => panic!("Eof in advance"),
979        };
980        // Anything that didn't return above is the simple case where we the
981        // class just spans a single token, so we can use the `string` method.
982        for part in text.split('\n').intersperse("\n").filter(|s| !s.is_empty()) {
983            sink(Highlight::Token { text: part, class: Some(class) });
984        }
985    }
986
987    fn peek(&mut self) -> Option<TokenKind> {
988        self.tokens.peek().map(|(token_kind, _text)| *token_kind)
989    }
990
991    fn check_if_is_union_keyword(&mut self) -> bool {
992        while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) {
993            if *kind == TokenKind::Whitespace {
994                continue;
995            }
996            return *kind == TokenKind::Ident;
997        }
998        false
999    }
1000}
1001
1002/// Called when we start processing a span of text that should be highlighted.
1003/// The `Class` argument specifies how it should be highlighted.
1004fn enter_span(
1005    out: &mut impl Write,
1006    klass: Class,
1007    href_context: &Option<HrefContext<'_, '_>>,
1008) -> &'static str {
1009    string_without_closing_tag(out, "", Some(klass), href_context, true).expect(
1010        "internal error: enter_span was called with Some(klass) but did not return a \
1011            closing HTML tag",
1012    )
1013}
1014
1015/// Called at the end of a span of highlighted text.
1016fn exit_span(out: &mut impl Write, closing_tag: &str) {
1017    out.write_str(closing_tag).unwrap();
1018}
1019
1020/// Called for a span of text. If the text should be highlighted differently
1021/// from the surrounding text, then the `Class` argument will be a value other
1022/// than `None`.
1023///
1024/// The following sequences of callbacks are equivalent:
1025/// ```plain
1026///     enter_span(Foo), string("text", None), exit_span()
1027///     string("text", Foo)
1028/// ```
1029///
1030/// The latter can be thought of as a shorthand for the former, which is more
1031/// flexible.
1032///
1033/// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
1034/// will then try to find this `span` in the `span_correspondence_map`. If found, it'll then
1035/// generate a link for this element (which corresponds to where its definition is located).
1036fn string<T: Display, W: Write>(
1037    out: &mut W,
1038    text: T,
1039    klass: Option<Class>,
1040    href_context: &Option<HrefContext<'_, '_>>,
1041    open_tag: bool,
1042    write_line_number_callback: fn(&mut W, u32, &'static str),
1043) {
1044    if let Some(Class::Backline(line)) = klass {
1045        write_line_number_callback(out, line, "\n");
1046    } else if let Some(closing_tag) =
1047        string_without_closing_tag(out, text, klass, href_context, open_tag)
1048    {
1049        out.write_str(closing_tag).unwrap();
1050    }
1051}
1052
1053/// This function writes `text` into `out` with some modifications depending on `klass`:
1054///
1055/// * If `klass` is `None`, `text` is written into `out` with no modification.
1056/// * If `klass` is `Some` but `klass.get_span()` is `None`, it writes the text wrapped in a
1057///   `<span>` with the provided `klass`.
1058/// * If `klass` is `Some` and has a [`rustc_span::Span`], it then tries to generate a link (`<a>`
1059///   element) by retrieving the link information from the `span_correspondence_map` that was filled
1060///   in `span_map.rs::collect_spans_and_sources`. If it cannot retrieve the information, then it's
1061///   the same as the second point (`klass` is `Some` but doesn't have a [`rustc_span::Span`]).
1062fn string_without_closing_tag<T: Display>(
1063    out: &mut impl Write,
1064    text: T,
1065    klass: Option<Class>,
1066    href_context: &Option<HrefContext<'_, '_>>,
1067    open_tag: bool,
1068) -> Option<&'static str> {
1069    let Some(klass) = klass else {
1070        write!(out, "{text}").unwrap();
1071        return None;
1072    };
1073    let Some(def_span) = klass.get_span() else {
1074        if !open_tag {
1075            write!(out, "{text}").unwrap();
1076            return None;
1077        }
1078        write!(out, "<span class=\"{klass}\">{text}", klass = klass.as_html()).unwrap();
1079        return Some("</span>");
1080    };
1081
1082    let mut text_s = text.to_string();
1083    if text_s.contains("::") {
1084        text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| {
1085            match t {
1086                "self" | "Self" => write!(
1087                    &mut path,
1088                    "<span class=\"{klass}\">{t}</span>",
1089                    klass = Class::Self_(DUMMY_SP).as_html(),
1090                ),
1091                "crate" | "super" => {
1092                    write!(
1093                        &mut path,
1094                        "<span class=\"{klass}\">{t}</span>",
1095                        klass = Class::KeyWord.as_html(),
1096                    )
1097                }
1098                t => write!(&mut path, "{t}"),
1099            }
1100            .expect("Failed to build source HTML path");
1101            path
1102        });
1103    }
1104
1105    if let Some(href_context) = href_context {
1106        if let Some(href) =
1107            href_context.context.shared.span_correspondence_map.get(&def_span).and_then(|href| {
1108                let context = href_context.context;
1109                // FIXME: later on, it'd be nice to provide two links (if possible) for all items:
1110                // one to the documentation page and one to the source definition.
1111                // FIXME: currently, external items only generate a link to their documentation,
1112                // a link to their definition can be generated using this:
1113                // https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338
1114                match href {
1115                    LinkFromSrc::Local(span) => {
1116                        context.href_from_span_relative(*span, &href_context.current_href)
1117                    }
1118                    LinkFromSrc::External(def_id) => {
1119                        format::href_with_root_path(*def_id, context, Some(href_context.root_path))
1120                            .ok()
1121                            .map(|(url, _, _)| url)
1122                    }
1123                    LinkFromSrc::Primitive(prim) => format::href_with_root_path(
1124                        PrimitiveType::primitive_locations(context.tcx())[prim],
1125                        context,
1126                        Some(href_context.root_path),
1127                    )
1128                    .ok()
1129                    .map(|(url, _, _)| url),
1130                    LinkFromSrc::Doc(def_id) => {
1131                        format::href_with_root_path(*def_id, context, Some(href_context.root_path))
1132                            .ok()
1133                            .map(|(doc_link, _, _)| doc_link)
1134                    }
1135                }
1136            })
1137        {
1138            if !open_tag {
1139                // We're already inside an element which has the same klass, no need to give it
1140                // again.
1141                write!(out, "<a href=\"{href}\">{text_s}").unwrap();
1142            } else {
1143                let klass_s = klass.as_html();
1144                if klass_s.is_empty() {
1145                    write!(out, "<a href=\"{href}\">{text_s}").unwrap();
1146                } else {
1147                    write!(out, "<a class=\"{klass_s}\" href=\"{href}\">{text_s}").unwrap();
1148                }
1149            }
1150            return Some("</a>");
1151        }
1152    }
1153    if !open_tag {
1154        write!(out, "{}", text_s).unwrap();
1155        return None;
1156    }
1157    let klass_s = klass.as_html();
1158    if klass_s.is_empty() {
1159        out.write_str(&text_s).unwrap();
1160        Some("")
1161    } else {
1162        write!(out, "<span class=\"{klass_s}\">{text_s}").unwrap();
1163        Some("</span>")
1164    }
1165}
1166
1167#[cfg(test)]
1168mod tests;