rustc_span/
span_encoding.rs

1use rustc_data_structures::fx::FxIndexSet;
2// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
3// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
4use rustc_serialize::int_overflow::DebugStrictAdd;
5
6use crate::def_id::{DefIndex, LocalDefId};
7use crate::hygiene::SyntaxContext;
8use crate::{BytePos, SPAN_TRACK, SpanData};
9
10/// A compressed span.
11///
12/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
13/// takes up 8 bytes, with less space for the length, parent and context. The
14/// vast majority (99.9%+) of `SpanData` instances can be made to fit within
15/// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are
16/// stored in a separate interner table, and the `Span` will index into that
17/// table. Interning is rare enough that the cost is low, but common enough
18/// that the code is exercised regularly.
19///
20/// An earlier version of this code used only 4 bytes for `Span`, but that was
21/// slower because only 80--90% of spans could be stored inline (even less in
22/// very large crates) and so the interner was used a lot more. That version of
23/// the code also predated the storage of parents.
24///
25/// There are four different span forms.
26///
27/// Inline-context format (requires non-huge length, non-huge context, and no parent):
28/// - `span.lo_or_index == span_data.lo`
29/// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
30/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
31///
32/// Inline-parent format (requires non-huge length, root context, and non-huge parent):
33/// - `span.lo_or_index == span_data.lo`
34/// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo`
35///   (must be `<= MAX_LEN`)
36/// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set
37/// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`)
38///
39/// Partially-interned format (requires non-huge context):
40/// - `span.lo_or_index == index` (indexes into the interner table)
41/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
42/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
43///
44/// Fully-interned format (all cases not covered above):
45/// - `span.lo_or_index == index` (indexes into the interner table)
46/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
47/// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER`
48///
49/// The partially-interned form requires looking in the interning table for
50/// lo and length, but the context is stored inline as well as interned.
51/// This is useful because context lookups are often done in isolation, and
52/// inline lookups are quicker.
53///
54/// Notes about the choice of field sizes:
55/// - `lo` is 32 bits in both `Span` and `SpanData`, which means that `lo`
56///   values never cause interning. The number of bits needed for `lo`
57///   depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
58///   Having no compression on this field means there is no performance cliff
59///   if a crate exceeds a particular size.
60/// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32
61///   bits in `SpanData`, which means that large `len` values will cause
62///   interning. The number of bits needed for `len` does not depend on the
63///   crate size. The most common numbers of bits for `len` are from 0 to 7,
64///   with a peak usually at 3 or 4, and then it drops off quickly from 8
65///   onwards. 15 bits is enough for 99.99%+ of cases, but larger values
66///   (sometimes 20+ bits) might occur dozens of times in a typical crate.
67/// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in
68///   `SpanData`, which means intering will happen if `ctxt` is large, if
69///   `parent` is large, or if both values are non-zero. The number of bits
70///   needed for `ctxt` values depend partly on the crate size and partly on
71///   the form of the code. No crates in `rustc-perf` need more than 15 bits
72///   for `ctxt_or_parent_or_marker`, but larger crates might need more than 16
73///   bits. The number of bits needed for `parent` hasn't been measured,
74///   because `parent` isn't currently used by default.
75///
76/// In order to reliably use parented spans in incremental compilation,
77/// the dependency to the parent definition's span. This is performed
78/// using the callback `SPAN_TRACK` to access the query engine.
79///
80#[derive(Clone, Copy, Eq, PartialEq, Hash)]
81#[rustc_pass_by_value]
82pub struct Span {
83    lo_or_index: u32,
84    len_with_tag_or_marker: u16,
85    ctxt_or_parent_or_marker: u16,
86}
87
88// Convenience structures for all span formats.
89#[derive(Clone, Copy)]
90struct InlineCtxt {
91    lo: u32,
92    len: u16,
93    ctxt: u16,
94}
95
96#[derive(Clone, Copy)]
97struct InlineParent {
98    lo: u32,
99    len_with_tag: u16,
100    parent: u16,
101}
102
103#[derive(Clone, Copy)]
104struct PartiallyInterned {
105    index: u32,
106    ctxt: u16,
107}
108
109#[derive(Clone, Copy)]
110struct Interned {
111    index: u32,
112}
113
114impl InlineCtxt {
115    #[inline]
116    fn data(self) -> SpanData {
117        let len = self.len as u32;
118        debug_assert!(len <= MAX_LEN);
119        SpanData {
120            lo: BytePos(self.lo),
121            hi: BytePos(self.lo.debug_strict_add(len)),
122            ctxt: SyntaxContext::from_u16(self.ctxt),
123            parent: None,
124        }
125    }
126    #[inline]
127    fn span(lo: u32, len: u16, ctxt: u16) -> Span {
128        Span { lo_or_index: lo, len_with_tag_or_marker: len, ctxt_or_parent_or_marker: ctxt }
129    }
130    #[inline]
131    fn from_span(span: Span) -> InlineCtxt {
132        let (lo, len, ctxt) =
133            (span.lo_or_index, span.len_with_tag_or_marker, span.ctxt_or_parent_or_marker);
134        InlineCtxt { lo, len, ctxt }
135    }
136}
137
138impl InlineParent {
139    #[inline]
140    fn data(self) -> SpanData {
141        let len = (self.len_with_tag & !PARENT_TAG) as u32;
142        debug_assert!(len <= MAX_LEN);
143        SpanData {
144            lo: BytePos(self.lo),
145            hi: BytePos(self.lo.debug_strict_add(len)),
146            ctxt: SyntaxContext::root(),
147            parent: Some(LocalDefId { local_def_index: DefIndex::from_u16(self.parent) }),
148        }
149    }
150    #[inline]
151    fn span(lo: u32, len: u16, parent: u16) -> Span {
152        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
153            (lo, PARENT_TAG | len, parent);
154        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
155    }
156    #[inline]
157    fn from_span(span: Span) -> InlineParent {
158        let (lo, len_with_tag, parent) =
159            (span.lo_or_index, span.len_with_tag_or_marker, span.ctxt_or_parent_or_marker);
160        InlineParent { lo, len_with_tag, parent }
161    }
162}
163
164impl PartiallyInterned {
165    #[inline]
166    fn data(self) -> SpanData {
167        SpanData {
168            ctxt: SyntaxContext::from_u16(self.ctxt),
169            ..with_span_interner(|interner| interner.spans[self.index as usize])
170        }
171    }
172    #[inline]
173    fn span(index: u32, ctxt: u16) -> Span {
174        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
175            (index, BASE_LEN_INTERNED_MARKER, ctxt);
176        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
177    }
178    #[inline]
179    fn from_span(span: Span) -> PartiallyInterned {
180        PartiallyInterned { index: span.lo_or_index, ctxt: span.ctxt_or_parent_or_marker }
181    }
182}
183
184impl Interned {
185    #[inline]
186    fn data(self) -> SpanData {
187        with_span_interner(|interner| interner.spans[self.index as usize])
188    }
189    #[inline]
190    fn span(index: u32) -> Span {
191        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
192            (index, BASE_LEN_INTERNED_MARKER, CTXT_INTERNED_MARKER);
193        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
194    }
195    #[inline]
196    fn from_span(span: Span) -> Interned {
197        Interned { index: span.lo_or_index }
198    }
199}
200
201// This code is very hot, and converting span to an enum and matching on it doesn't optimize away
202// properly. So we are using a macro emulating such a match, but expand it directly to an if-else
203// chain.
204macro_rules! match_span_kind {
205    (
206        $span:expr,
207        InlineCtxt($span1:ident) => $arm1:expr,
208        InlineParent($span2:ident) => $arm2:expr,
209        PartiallyInterned($span3:ident) => $arm3:expr,
210        Interned($span4:ident) => $arm4:expr,
211    ) => {
212        if $span.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
213            if $span.len_with_tag_or_marker & PARENT_TAG == 0 {
214                // Inline-context format.
215                let $span1 = InlineCtxt::from_span($span);
216                $arm1
217            } else {
218                // Inline-parent format.
219                let $span2 = InlineParent::from_span($span);
220                $arm2
221            }
222        } else if $span.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
223            // Partially-interned format.
224            let $span3 = PartiallyInterned::from_span($span);
225            $arm3
226        } else {
227            // Interned format.
228            let $span4 = Interned::from_span($span);
229            $arm4
230        }
231    };
232}
233
234// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
235// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
236const MAX_LEN: u32 = 0b0111_1111_1111_1110;
237const MAX_CTXT: u32 = 0b0111_1111_1111_1110;
238const PARENT_TAG: u16 = 0b1000_0000_0000_0000;
239const BASE_LEN_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
240const CTXT_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
241
242/// The dummy span has zero position, length, and context, and no parent.
243pub const DUMMY_SP: Span =
244    Span { lo_or_index: 0, len_with_tag_or_marker: 0, ctxt_or_parent_or_marker: 0 };
245
246impl Span {
247    #[inline]
248    pub fn new(
249        mut lo: BytePos,
250        mut hi: BytePos,
251        ctxt: SyntaxContext,
252        parent: Option<LocalDefId>,
253    ) -> Self {
254        if lo > hi {
255            std::mem::swap(&mut lo, &mut hi);
256        }
257
258        // Small len and ctxt may enable one of fully inline formats (or may not).
259        let (len, ctxt32) = (hi.0 - lo.0, ctxt.as_u32());
260        if len <= MAX_LEN && ctxt32 <= MAX_CTXT {
261            match parent {
262                None => return InlineCtxt::span(lo.0, len as u16, ctxt32 as u16),
263                Some(parent) => {
264                    let parent32 = parent.local_def_index.as_u32();
265                    if ctxt32 == 0 && parent32 <= MAX_CTXT {
266                        return InlineParent::span(lo.0, len as u16, parent32 as u16);
267                    }
268                }
269            }
270        }
271
272        // Otherwise small ctxt may enable the partially inline format.
273        let index = |ctxt| {
274            with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }))
275        };
276        if ctxt32 <= MAX_CTXT {
277            // Interned ctxt should never be read, so it can use any value.
278            PartiallyInterned::span(index(SyntaxContext::from_u32(u32::MAX)), ctxt32 as u16)
279        } else {
280            Interned::span(index(ctxt))
281        }
282    }
283
284    #[inline]
285    pub fn data(self) -> SpanData {
286        let data = self.data_untracked();
287        if let Some(parent) = data.parent {
288            (*SPAN_TRACK)(parent);
289        }
290        data
291    }
292
293    /// Internal function to translate between an encoded span and the expanded representation.
294    /// This function must not be used outside the incremental engine.
295    #[inline]
296    pub fn data_untracked(self) -> SpanData {
297        match_span_kind! {
298            self,
299            InlineCtxt(span) => span.data(),
300            InlineParent(span) => span.data(),
301            PartiallyInterned(span) => span.data(),
302            Interned(span) => span.data(),
303        }
304    }
305
306    /// Returns `true` if this span comes from any kind of macro, desugaring or inlining.
307    #[inline]
308    pub fn from_expansion(self) -> bool {
309        // If the span is fully inferred then ctxt > MAX_CTXT
310        self.inline_ctxt().map_or(true, |ctxt| !ctxt.is_root())
311    }
312
313    /// Returns `true` if this is a dummy span with any hygienic context.
314    #[inline]
315    pub fn is_dummy(self) -> bool {
316        if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
317            // Inline-context or inline-parent format.
318            let lo = self.lo_or_index;
319            let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
320            debug_assert!(len <= MAX_LEN);
321            lo == 0 && len == 0
322        } else {
323            // Fully-interned or partially-interned format.
324            let index = self.lo_or_index;
325            let data = with_span_interner(|interner| interner.spans[index as usize]);
326            data.lo == BytePos(0) && data.hi == BytePos(0)
327        }
328    }
329
330    #[inline]
331    pub fn map_ctxt(self, map: impl FnOnce(SyntaxContext) -> SyntaxContext) -> Span {
332        let data = match_span_kind! {
333            self,
334            InlineCtxt(span) => {
335                // This format occurs 1-2 orders of magnitude more often than others (#125017),
336                // so it makes sense to micro-optimize it to avoid `span.data()` and `Span::new()`.
337                let new_ctxt = map(SyntaxContext::from_u16(span.ctxt));
338                let new_ctxt32 = new_ctxt.as_u32();
339                return if new_ctxt32 <= MAX_CTXT {
340                    // Any small new context including zero will preserve the format.
341                    InlineCtxt::span(span.lo, span.len, new_ctxt32 as u16)
342                } else {
343                    span.data().with_ctxt(new_ctxt)
344                };
345            },
346            InlineParent(span) => span.data(),
347            PartiallyInterned(span) => span.data(),
348            Interned(span) => span.data(),
349        };
350
351        data.with_ctxt(map(data.ctxt))
352    }
353
354    // Returns either syntactic context, if it can be retrieved without taking the interner lock,
355    // or an index into the interner if it cannot.
356    #[inline]
357    fn inline_ctxt(self) -> Result<SyntaxContext, usize> {
358        match_span_kind! {
359            self,
360            InlineCtxt(span) => Ok(SyntaxContext::from_u16(span.ctxt)),
361            InlineParent(_span) => Ok(SyntaxContext::root()),
362            PartiallyInterned(span) => Ok(SyntaxContext::from_u16(span.ctxt)),
363            Interned(span) => Err(span.index as usize),
364        }
365    }
366
367    /// This function is used as a fast path when decoding the full `SpanData` is not necessary.
368    /// It's a cut-down version of `data_untracked`.
369    #[cfg_attr(not(test), rustc_diagnostic_item = "SpanCtxt")]
370    #[inline]
371    pub fn ctxt(self) -> SyntaxContext {
372        self.inline_ctxt()
373            .unwrap_or_else(|index| with_span_interner(|interner| interner.spans[index].ctxt))
374    }
375
376    #[inline]
377    pub fn eq_ctxt(self, other: Span) -> bool {
378        match (self.inline_ctxt(), other.inline_ctxt()) {
379            (Ok(ctxt1), Ok(ctxt2)) => ctxt1 == ctxt2,
380            // If `inline_ctxt` returns `Ok` the context is <= MAX_CTXT.
381            // If it returns `Err` the span is fully interned and the context is > MAX_CTXT.
382            // As these do not overlap an `Ok` and `Err` result cannot have an equal context.
383            (Ok(_), Err(_)) | (Err(_), Ok(_)) => false,
384            (Err(index1), Err(index2)) => with_span_interner(|interner| {
385                interner.spans[index1].ctxt == interner.spans[index2].ctxt
386            }),
387        }
388    }
389
390    #[inline]
391    pub fn with_parent(self, parent: Option<LocalDefId>) -> Span {
392        let data = match_span_kind! {
393            self,
394            InlineCtxt(span) => {
395                // This format occurs 1-2 orders of magnitude more often than others (#126544),
396                // so it makes sense to micro-optimize it to avoid `span.data()` and `Span::new()`.
397                // Copypaste from `Span::new`, the small len & ctxt conditions are known to hold.
398                match parent {
399                    None => return self,
400                    Some(parent) => {
401                        let parent32 = parent.local_def_index.as_u32();
402                        if span.ctxt == 0 && parent32 <= MAX_CTXT {
403                            return InlineParent::span(span.lo, span.len, parent32 as u16);
404                        }
405                    }
406                }
407                span.data()
408            },
409            InlineParent(span) => span.data(),
410            PartiallyInterned(span) => span.data(),
411            Interned(span) => span.data(),
412        };
413
414        if let Some(old_parent) = data.parent {
415            (*SPAN_TRACK)(old_parent);
416        }
417        data.with_parent(parent)
418    }
419
420    #[inline]
421    pub fn parent(self) -> Option<LocalDefId> {
422        let interned_parent =
423            |index: u32| with_span_interner(|interner| interner.spans[index as usize].parent);
424        match_span_kind! {
425            self,
426            InlineCtxt(_span) => None,
427            InlineParent(span) => Some(LocalDefId { local_def_index: DefIndex::from_u16(span.parent) }),
428            PartiallyInterned(span) => interned_parent(span.index),
429            Interned(span) => interned_parent(span.index),
430        }
431    }
432}
433
434#[derive(Default)]
435pub(crate) struct SpanInterner {
436    spans: FxIndexSet<SpanData>,
437}
438
439impl SpanInterner {
440    fn intern(&mut self, span_data: &SpanData) -> u32 {
441        let (index, _) = self.spans.insert_full(*span_data);
442        index as u32
443    }
444}
445
446// If an interner exists, return it. Otherwise, prepare a fresh one.
447#[inline]
448fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
449    crate::with_session_globals(|session_globals| f(&mut session_globals.span_interner.lock()))
450}