Skip to main content

rustc_driver_impl/
highlighter.rs

1//! This module provides a syntax highlighter for Rust code.
2//! It is used by the `rustc --explain` command.
3//!
4//! The syntax highlighter uses `rustc_lexer`'s `tokenize`
5//! function to parse the Rust code into a `Vec` of tokens.
6//! The highlighter then highlights the tokens in the `Vec`,
7//! and writes the highlighted output to the buffer.
8use std::io::{self, Write};
9
10use anstyle::{AnsiColor, Color, Effects, Style};
11use rustc_lexer::{LiteralKind, strip_shebang, tokenize};
12
13const PRIMITIVE_TYPES: &'static [&str] = &[
14    "i8", "i16", "i32", "i64", "i128", "isize", // signed integers
15    "u8", "u16", "u32", "u64", "u128", "usize", // unsigned integers
16    "f32", "f64", // floating point
17    "char", "bool", // others
18];
19
20const KEYWORDS: &'static [&str] = &[
21    "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "as",
22    "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
23    "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub",
24    "ref",
25];
26
27const STR_LITERAL_COLOR: AnsiColor = AnsiColor::Green;
28const OTHER_LITERAL_COLOR: AnsiColor = AnsiColor::BrightRed;
29const DERIVE_COLOR: AnsiColor = AnsiColor::BrightRed;
30const KEYWORD_COLOR: AnsiColor = AnsiColor::BrightMagenta;
31const TYPE_COLOR: AnsiColor = AnsiColor::Yellow;
32const FUNCTION_COLOR: AnsiColor = AnsiColor::Blue;
33const USE_COLOR: AnsiColor = AnsiColor::BrightMagenta;
34const PRIMITIVE_TYPE_COLOR: AnsiColor = AnsiColor::Cyan;
35
36/// Highlight a Rust code string and write the highlighted
37/// output to the buffer. It serves as a wrapper around
38/// `Highlighter::highlight_rustc_lexer`. It is passed to
39/// `write_anstream_buf` in the `lib.rs` file.
40pub fn highlight(code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
41    let mut highlighter = Highlighter::default();
42    highlighter.highlight_rustc_lexer(code, buf)
43}
44
45/// A syntax highlighter for Rust code
46/// It is used by the `rustc --explain` command.
47#[derive(#[automatically_derived]
impl ::core::default::Default for Highlighter {
    #[inline]
    fn default() -> Highlighter {
        Highlighter {
            prev_was_special: ::core::default::Default::default(),
            len_accum: ::core::default::Default::default(),
        }
    }
}Default)]
48pub struct Highlighter {
49    /// Used to track if the previous token was a token
50    /// that warrants the next token to be colored differently
51    ///
52    /// For example, the keyword `fn` requires the next token
53    /// (the function name) to be colored differently.
54    prev_was_special: bool,
55    /// Used to track the length of tokens that have been
56    /// written so far. This is used to find the original
57    /// lexeme for a token from the code string.
58    len_accum: usize,
59}
60
61impl Highlighter {
62    /// Create a new highlighter
63    pub fn new() -> Self {
64        Self::default()
65    }
66
67    /// Highlight a Rust code string and write the highlighted
68    /// output to the buffer.
69    pub fn highlight_rustc_lexer(&mut self, code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
70        use rustc_lexer::TokenKind;
71
72        // Remove shebang from code string
73        let stripped_idx = strip_shebang(code).unwrap_or(0);
74        let stripped_code = &code[stripped_idx..];
75        self.len_accum = stripped_idx;
76        let len_accum = &mut self.len_accum;
77        let tokens = tokenize(stripped_code, rustc_lexer::FrontmatterAllowed::No);
78        for token in tokens {
79            let len = token.len as usize;
80            // If the previous token was a special token, and this token is
81            // not a whitespace token, then it should be colored differently
82            let token_str = &code[*len_accum..*len_accum + len];
83            if self.prev_was_special {
84                if token_str != " " {
85                    self.prev_was_special = false;
86                }
87                let style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue)));
88                buf.write_fmt(format_args!("{0}{1}{0:#}", style, token_str))write!(buf, "{style}{token_str}{style:#}")?;
89                *len_accum += len;
90                continue;
91            }
92            match token.kind {
93                TokenKind::Ident => {
94                    let mut style = Style::new();
95                    // Match if an identifier is a (well-known) keyword
96                    if KEYWORDS.contains(&token_str) {
97                        if token_str == "fn" {
98                            self.prev_was_special = true;
99                        }
100                        style = style.fg_color(Some(Color::Ansi(KEYWORD_COLOR)));
101                    }
102                    // The `use` keyword is colored differently
103                    if #[allow(non_exhaustive_omitted_patterns)] match token_str {
    "use" => true,
    _ => false,
}matches!(token_str, "use") {
104                        style = style.fg_color(Some(Color::Ansi(USE_COLOR)));
105                    }
106                    // This heuristic test is to detect if the identifier is
107                    // a function call. If it is, then the function identifier is
108                    // colored differently.
109                    if code[*len_accum..*len_accum + len + 1].ends_with('(') {
110                        style = style.fg_color(Some(Color::Ansi(FUNCTION_COLOR)));
111                    }
112                    // The `derive` keyword is colored differently.
113                    if token_str == "derive" {
114                        style = style.fg_color(Some(Color::Ansi(DERIVE_COLOR)));
115                    }
116                    // This heuristic test is to detect if the identifier is
117                    // a type. If it is, then the identifier is colored differently.
118                    if #[allow(non_exhaustive_omitted_patterns)] match token_str.chars().next().map(|c|
            c.is_uppercase()) {
    Some(true) => true,
    _ => false,
}matches!(token_str.chars().next().map(|c| c.is_uppercase()), Some(true)) {
119                        style = style.fg_color(Some(Color::Ansi(TYPE_COLOR)));
120                    }
121                    // This if statement is to detect if the identifier is a primitive type.
122                    if PRIMITIVE_TYPES.contains(&token_str) {
123                        style = style.fg_color(Some(Color::Ansi(PRIMITIVE_TYPE_COLOR)));
124                    }
125                    buf.write_fmt(format_args!("{0}{1}{0:#}", style, token_str))write!(buf, "{style}{token_str}{style:#}")?;
126                }
127
128                // Color literals
129                TokenKind::Literal { kind, suffix_start: _ } => {
130                    // Strings -> Green
131                    // Chars -> Green
132                    // Raw strings -> Green
133                    // C strings -> Green
134                    // Byte Strings -> Green
135                    // Other literals -> Bright Red (Orage-esque)
136                    let style = match kind {
137                        LiteralKind::Str { terminated: _ }
138                        | LiteralKind::Char { terminated: _ }
139                        | LiteralKind::RawStr { n_hashes: _ }
140                        | LiteralKind::CStr { terminated: _ } => {
141                            Style::new().fg_color(Some(Color::Ansi(STR_LITERAL_COLOR)))
142                        }
143                        _ => Style::new().fg_color(Some(Color::Ansi(OTHER_LITERAL_COLOR))),
144                    };
145                    buf.write_fmt(format_args!("{0}{1}{0:#}", style, token_str))write!(buf, "{style}{token_str}{style:#}")?;
146                }
147                _ => {
148                    // All other tokens are dimmed
149                    let style = Style::new()
150                        .fg_color(Some(Color::Ansi(AnsiColor::BrightWhite)))
151                        .effects(Effects::DIMMED);
152                    buf.write_fmt(format_args!("{0}{1}{0:#}", style, token_str))write!(buf, "{style}{token_str}{style:#}")?;
153                }
154            }
155            *len_accum += len;
156        }
157        Ok(())
158    }
159}