rustc_macros/
symbols.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
//! Proc macro which builds the Symbol table
//!
//! # Debugging
//!
//! Since this proc-macro does some non-trivial work, debugging it is important.
//! This proc-macro can be invoked as an ordinary unit test, like so:
//!
//! ```bash
//! cd compiler/rustc_macros
//! cargo test symbols::test_symbols -- --nocapture
//! ```
//!
//! This unit test finds the `symbols!` invocation in `compiler/rustc_span/src/symbol.rs`
//! and runs it. It verifies that the output token stream can be parsed as valid module
//! items and that no errors were produced.
//!
//! You can also view the generated code by using `cargo expand`:
//!
//! ```bash
//! cargo install cargo-expand          # this is necessary only once
//! cd compiler/rustc_span
//! # The specific version number in CFG_RELEASE doesn't matter.
//! # The output is large.
//! CFG_RELEASE="0.0.0" cargo +nightly expand > /tmp/rustc_span.rs
//! ```

use std::collections::HashMap;

use proc_macro2::{Span, TokenStream};
use quote::quote;
use syn::parse::{Parse, ParseStream, Result};
use syn::punctuated::Punctuated;
use syn::{Expr, Ident, Lit, LitStr, Macro, Token, braced};

#[cfg(test)]
mod tests;

mod kw {
    syn::custom_keyword!(Keywords);
    syn::custom_keyword!(Symbols);
}

struct Keyword {
    name: Ident,
    value: LitStr,
}

impl Parse for Keyword {
    fn parse(input: ParseStream<'_>) -> Result<Self> {
        let name = input.parse()?;
        input.parse::<Token![:]>()?;
        let value = input.parse()?;

        Ok(Keyword { name, value })
    }
}

struct Symbol {
    name: Ident,
    value: Value,
}

enum Value {
    SameAsName,
    String(LitStr),
    Env(LitStr, Macro),
    Unsupported(Expr),
}

impl Parse for Symbol {
    fn parse(input: ParseStream<'_>) -> Result<Self> {
        let name = input.parse()?;
        let colon_token: Option<Token![:]> = input.parse()?;
        let value = if colon_token.is_some() { input.parse()? } else { Value::SameAsName };

        Ok(Symbol { name, value })
    }
}

impl Parse for Value {
    fn parse(input: ParseStream<'_>) -> Result<Self> {
        let expr: Expr = input.parse()?;
        match &expr {
            Expr::Lit(expr) => {
                if let Lit::Str(lit) = &expr.lit {
                    return Ok(Value::String(lit.clone()));
                }
            }
            Expr::Macro(expr) => {
                if expr.mac.path.is_ident("env")
                    && let Ok(lit) = expr.mac.parse_body()
                {
                    return Ok(Value::Env(lit, expr.mac.clone()));
                }
            }
            _ => {}
        }
        Ok(Value::Unsupported(expr))
    }
}

struct Input {
    keywords: Punctuated<Keyword, Token![,]>,
    symbols: Punctuated<Symbol, Token![,]>,
}

impl Parse for Input {
    fn parse(input: ParseStream<'_>) -> Result<Self> {
        input.parse::<kw::Keywords>()?;
        let content;
        braced!(content in input);
        let keywords = Punctuated::parse_terminated(&content)?;

        input.parse::<kw::Symbols>()?;
        let content;
        braced!(content in input);
        let symbols = Punctuated::parse_terminated(&content)?;

        Ok(Input { keywords, symbols })
    }
}

#[derive(Default)]
struct Errors {
    list: Vec<syn::Error>,
}

impl Errors {
    fn error(&mut self, span: Span, message: String) {
        self.list.push(syn::Error::new(span, message));
    }
}

pub(super) fn symbols(input: TokenStream) -> TokenStream {
    let (mut output, errors) = symbols_with_errors(input);

    // If we generated any errors, then report them as compiler_error!() macro calls.
    // This lets the errors point back to the most relevant span. It also allows us
    // to report as many errors as we can during a single run.
    output.extend(errors.into_iter().map(|e| e.to_compile_error()));

    output
}

struct Preinterned {
    idx: u32,
    span_of_name: Span,
}

struct Entries {
    map: HashMap<String, Preinterned>,
}

impl Entries {
    fn with_capacity(capacity: usize) -> Self {
        Entries { map: HashMap::with_capacity(capacity) }
    }

    fn insert(&mut self, span: Span, str: &str, errors: &mut Errors) -> u32 {
        if let Some(prev) = self.map.get(str) {
            errors.error(span, format!("Symbol `{str}` is duplicated"));
            errors.error(prev.span_of_name, "location of previous definition".to_string());
            prev.idx
        } else {
            let idx = self.len();
            self.map.insert(str.to_string(), Preinterned { idx, span_of_name: span });
            idx
        }
    }

    fn len(&self) -> u32 {
        u32::try_from(self.map.len()).expect("way too many symbols")
    }
}

fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
    let mut errors = Errors::default();

    let input: Input = match syn::parse2(input) {
        Ok(input) => input,
        Err(e) => {
            // This allows us to display errors at the proper span, while minimizing
            // unrelated errors caused by bailing out (and not generating code).
            errors.list.push(e);
            Input { keywords: Default::default(), symbols: Default::default() }
        }
    };

    let mut keyword_stream = quote! {};
    let mut symbols_stream = quote! {};
    let mut prefill_stream = quote! {};
    let mut entries = Entries::with_capacity(input.keywords.len() + input.symbols.len() + 10);
    let mut prev_key: Option<(Span, String)> = None;

    let mut check_order = |span: Span, str: &str, errors: &mut Errors| {
        if let Some((prev_span, ref prev_str)) = prev_key {
            if str < prev_str {
                errors.error(span, format!("Symbol `{str}` must precede `{prev_str}`"));
                errors.error(prev_span, format!("location of previous symbol `{prev_str}`"));
            }
        }
        prev_key = Some((span, str.to_string()));
    };

    // Generate the listed keywords.
    for keyword in input.keywords.iter() {
        let name = &keyword.name;
        let value = &keyword.value;
        let value_string = value.value();
        let idx = entries.insert(keyword.name.span(), &value_string, &mut errors);
        prefill_stream.extend(quote! {
            #value,
        });
        keyword_stream.extend(quote! {
            pub const #name: Symbol = Symbol::new(#idx);
        });
    }

    // Generate the listed symbols.
    for symbol in input.symbols.iter() {
        let name = &symbol.name;
        check_order(symbol.name.span(), &name.to_string(), &mut errors);

        let value = match &symbol.value {
            Value::SameAsName => name.to_string(),
            Value::String(lit) => lit.value(),
            Value::Env(..) => continue, // in another loop below
            Value::Unsupported(expr) => {
                errors.list.push(syn::Error::new_spanned(
                    expr,
                    concat!(
                        "unsupported expression for symbol value; implement support for this in ",
                        file!(),
                    ),
                ));
                continue;
            }
        };
        let idx = entries.insert(symbol.name.span(), &value, &mut errors);

        prefill_stream.extend(quote! {
            #value,
        });
        symbols_stream.extend(quote! {
            pub const #name: Symbol = Symbol::new(#idx);
        });
    }

    // Generate symbols for the strings "0", "1", ..., "9".
    for n in 0..10 {
        let n = n.to_string();
        entries.insert(Span::call_site(), &n, &mut errors);
        prefill_stream.extend(quote! {
            #n,
        });
    }

    // Symbols whose value comes from an environment variable. It's allowed for
    // these to have the same value as another symbol.
    for symbol in &input.symbols {
        let (env_var, expr) = match &symbol.value {
            Value::Env(lit, expr) => (lit, expr),
            Value::SameAsName | Value::String(_) | Value::Unsupported(_) => continue,
        };

        if !proc_macro::is_available() {
            errors.error(
                Span::call_site(),
                "proc_macro::tracked_env is not available in unit test".to_owned(),
            );
            break;
        }

        let value = match proc_macro::tracked_env::var(env_var.value()) {
            Ok(value) => value,
            Err(err) => {
                errors.list.push(syn::Error::new_spanned(expr, err));
                continue;
            }
        };

        let idx = if let Some(prev) = entries.map.get(&value) {
            prev.idx
        } else {
            prefill_stream.extend(quote! {
                #value,
            });
            entries.insert(symbol.name.span(), &value, &mut errors)
        };

        let name = &symbol.name;
        symbols_stream.extend(quote! {
            pub const #name: Symbol = Symbol::new(#idx);
        });
    }

    let symbol_digits_base = entries.map["0"].idx;
    let preinterned_symbols_count = entries.len();
    let output = quote! {
        const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base;
        const PREINTERNED_SYMBOLS_COUNT: u32 = #preinterned_symbols_count;

        #[doc(hidden)]
        #[allow(non_upper_case_globals)]
        mod kw_generated {
            use super::Symbol;
            #keyword_stream
        }

        #[allow(non_upper_case_globals)]
        #[doc(hidden)]
        pub mod sym_generated {
            use super::Symbol;
            #symbols_stream
        }

        impl Interner {
            pub(crate) fn fresh() -> Self {
                Interner::prefill(&[
                    #prefill_stream
                ])
            }
        }
    };

    (output, errors.list)
}