rustc_span/
lib.rs

1//! Source positions and related helper functions.
2//!
3//! Important concepts in this module include:
4//!
5//! - the *span*, represented by [`SpanData`] and related types;
6//! - source code as represented by a [`SourceMap`]; and
7//! - interned strings, represented by [`Symbol`]s, with some common symbols available statically
8//!   in the [`sym`] module.
9//!
10//! Unlike most compilers, the span contains not only the position in the source code, but also
11//! various other metadata, such as the edition and macro hygiene. This metadata is stored in
12//! [`SyntaxContext`] and [`ExpnData`].
13//!
14//! ## Note
15//!
16//! This API is completely unstable and subject to change.
17
18// tidy-alphabetical-start
19#![allow(internal_features)]
20#![cfg_attr(bootstrap, feature(array_windows))]
21#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
22#![feature(cfg_select)]
23#![feature(core_io_borrowed_buf)]
24#![feature(if_let_guard)]
25#![feature(map_try_insert)]
26#![feature(negative_impls)]
27#![feature(read_buf)]
28#![feature(rustc_attrs)]
29// tidy-alphabetical-end
30
31// The code produced by the `Encodable`/`Decodable` derive macros refer to
32// `rustc_span::Span{Encoder,Decoder}`. That's fine outside this crate, but doesn't work inside
33// this crate without this line making `rustc_span` available.
34extern crate self as rustc_span;
35
36use derive_where::derive_where;
37use rustc_data_structures::{AtomicRef, outline};
38use rustc_macros::{Decodable, Encodable, HashStable_Generic};
39use rustc_serialize::opaque::{FileEncoder, MemDecoder};
40use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
41use tracing::debug;
42
43mod caching_source_map_view;
44pub mod source_map;
45use source_map::{SourceMap, SourceMapInputs};
46
47pub use self::caching_source_map_view::CachingSourceMapView;
48use crate::fatal_error::FatalError;
49
50pub mod edition;
51use edition::Edition;
52pub mod hygiene;
53use hygiene::Transparency;
54pub use hygiene::{
55    DesugaringKind, ExpnData, ExpnHash, ExpnId, ExpnKind, LocalExpnId, MacroKind, SyntaxContext,
56};
57use rustc_data_structures::stable_hasher::HashingControls;
58pub mod def_id;
59use def_id::{CrateNum, DefId, DefIndex, DefPathHash, LOCAL_CRATE, LocalDefId, StableCrateId};
60pub mod edit_distance;
61mod span_encoding;
62pub use span_encoding::{DUMMY_SP, Span};
63
64pub mod symbol;
65pub use symbol::{
66    ByteSymbol, Ident, MacroRulesNormalizedIdent, Macros20NormalizedIdent, STDLIB_STABLE_CRATES,
67    Symbol, kw, sym,
68};
69
70mod analyze_source_file;
71pub mod fatal_error;
72
73pub mod profiling;
74
75use std::borrow::Cow;
76use std::cmp::{self, Ordering};
77use std::fmt::Display;
78use std::hash::Hash;
79use std::io::{self, Read};
80use std::ops::{Add, Range, Sub};
81use std::path::{Path, PathBuf};
82use std::str::FromStr;
83use std::sync::Arc;
84use std::{fmt, iter};
85
86use md5::{Digest, Md5};
87use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
88use rustc_data_structures::sync::{FreezeLock, FreezeWriteGuard, Lock};
89use rustc_data_structures::unord::UnordMap;
90use rustc_hashes::{Hash64, Hash128};
91use sha1::Sha1;
92use sha2::Sha256;
93
94#[cfg(test)]
95mod tests;
96
97/// Per-session global variables: this struct is stored in thread-local storage
98/// in such a way that it is accessible without any kind of handle to all
99/// threads within the compilation session, but is not accessible outside the
100/// session.
101pub struct SessionGlobals {
102    symbol_interner: symbol::Interner,
103    span_interner: Lock<span_encoding::SpanInterner>,
104    /// Maps a macro argument token into use of the corresponding metavariable in the macro body.
105    /// Collisions are possible and processed in `maybe_use_metavar_location` on best effort basis.
106    metavar_spans: MetavarSpansMap,
107    hygiene_data: Lock<hygiene::HygieneData>,
108
109    /// The session's source map, if there is one. This field should only be
110    /// used in places where the `Session` is truly not available, such as
111    /// `<Span as Debug>::fmt`.
112    source_map: Option<Arc<SourceMap>>,
113}
114
115impl SessionGlobals {
116    pub fn new(
117        edition: Edition,
118        extra_symbols: &[&'static str],
119        sm_inputs: Option<SourceMapInputs>,
120    ) -> SessionGlobals {
121        SessionGlobals {
122            symbol_interner: symbol::Interner::with_extra_symbols(extra_symbols),
123            span_interner: Lock::new(span_encoding::SpanInterner::default()),
124            metavar_spans: Default::default(),
125            hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
126            source_map: sm_inputs.map(|inputs| Arc::new(SourceMap::with_inputs(inputs))),
127        }
128    }
129}
130
131pub fn create_session_globals_then<R>(
132    edition: Edition,
133    extra_symbols: &[&'static str],
134    sm_inputs: Option<SourceMapInputs>,
135    f: impl FnOnce() -> R,
136) -> R {
137    assert!(
138        !SESSION_GLOBALS.is_set(),
139        "SESSION_GLOBALS should never be overwritten! \
140         Use another thread if you need another SessionGlobals"
141    );
142    let session_globals = SessionGlobals::new(edition, extra_symbols, sm_inputs);
143    SESSION_GLOBALS.set(&session_globals, f)
144}
145
146pub fn set_session_globals_then<R>(session_globals: &SessionGlobals, f: impl FnOnce() -> R) -> R {
147    assert!(
148        !SESSION_GLOBALS.is_set(),
149        "SESSION_GLOBALS should never be overwritten! \
150         Use another thread if you need another SessionGlobals"
151    );
152    SESSION_GLOBALS.set(session_globals, f)
153}
154
155/// No source map.
156pub fn create_session_if_not_set_then<R, F>(edition: Edition, f: F) -> R
157where
158    F: FnOnce(&SessionGlobals) -> R,
159{
160    if !SESSION_GLOBALS.is_set() {
161        let session_globals = SessionGlobals::new(edition, &[], None);
162        SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f))
163    } else {
164        SESSION_GLOBALS.with(f)
165    }
166}
167
168#[inline]
169pub fn with_session_globals<R, F>(f: F) -> R
170where
171    F: FnOnce(&SessionGlobals) -> R,
172{
173    SESSION_GLOBALS.with(f)
174}
175
176/// Default edition, no source map.
177pub fn create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R {
178    create_session_globals_then(edition::DEFAULT_EDITION, &[], None, f)
179}
180
181// If this ever becomes non thread-local, `decode_syntax_context`
182// and `decode_expn_id` will need to be updated to handle concurrent
183// deserialization.
184scoped_tls::scoped_thread_local!(static SESSION_GLOBALS: SessionGlobals);
185
186#[derive(Default)]
187pub struct MetavarSpansMap(FreezeLock<UnordMap<Span, (Span, bool)>>);
188
189impl MetavarSpansMap {
190    pub fn insert(&self, span: Span, var_span: Span) -> bool {
191        match self.0.write().try_insert(span, (var_span, false)) {
192            Ok(_) => true,
193            Err(entry) => entry.entry.get().0 == var_span,
194        }
195    }
196
197    /// Read a span and record that it was read.
198    pub fn get(&self, span: Span) -> Option<Span> {
199        if let Some(mut mspans) = self.0.try_write() {
200            if let Some((var_span, read)) = mspans.get_mut(&span) {
201                *read = true;
202                Some(*var_span)
203            } else {
204                None
205            }
206        } else {
207            if let Some((span, true)) = self.0.read().get(&span) { Some(*span) } else { None }
208        }
209    }
210
211    /// Freeze the set, and return the spans which have been read.
212    ///
213    /// After this is frozen, no spans that have not been read can be read.
214    pub fn freeze_and_get_read_spans(&self) -> UnordMap<Span, Span> {
215        self.0.freeze().items().filter(|(_, (_, b))| *b).map(|(s1, (s2, _))| (*s1, *s2)).collect()
216    }
217}
218
219#[inline]
220pub fn with_metavar_spans<R>(f: impl FnOnce(&MetavarSpansMap) -> R) -> R {
221    with_session_globals(|session_globals| f(&session_globals.metavar_spans))
222}
223
224bitflags::bitflags! {
225    /// Scopes used to determined if it need to apply to `--remap-path-prefix`
226    #[derive(Debug, Eq, PartialEq, Clone, Copy, Ord, PartialOrd, Hash)]
227    pub struct RemapPathScopeComponents: u8 {
228        /// Apply remappings to the expansion of `std::file!()` macro
229        const MACRO = 1 << 0;
230        /// Apply remappings to printed compiler diagnostics
231        const DIAGNOSTICS = 1 << 1;
232        /// Apply remappings to debug information
233        const DEBUGINFO = 1 << 3;
234        /// Apply remappings to coverage information
235        const COVERAGE = 1 << 4;
236
237        /// An alias for `macro`, `debuginfo` and `coverage`. This ensures all paths in compiled
238        /// executables, libraries and objects are remapped but not elsewhere.
239        const OBJECT = Self::MACRO.bits() | Self::DEBUGINFO.bits() | Self::COVERAGE.bits();
240    }
241}
242
243impl<E: Encoder> Encodable<E> for RemapPathScopeComponents {
244    fn encode(&self, s: &mut E) {
245        s.emit_u8(self.bits());
246    }
247}
248
249impl<D: Decoder> Decodable<D> for RemapPathScopeComponents {
250    fn decode(s: &mut D) -> RemapPathScopeComponents {
251        RemapPathScopeComponents::from_bits(s.read_u8())
252            .expect("invalid bits for RemapPathScopeComponents")
253    }
254}
255
256/// A self-contained "real" filename.
257///
258/// It is produced by `SourceMap::to_real_filename`.
259///
260/// `RealFileName` represents a filename that may have been (partly) remapped
261/// by `--remap-path-prefix` and `-Zremap-path-scope`.
262///
263/// It also contains an embedabble component which gives a working directory
264/// and a maybe-remapped maybe-aboslote name. This is useful for debuginfo where
265/// some formats and tools highly prefer absolute paths.
266///
267/// ## Consistency across compiler sessions
268///
269/// The type-system, const-eval and other parts of the compiler rely on `FileName`
270/// and by extension `RealFileName` to be consistent across compiler sessions.
271///
272/// Otherwise unsoudness (like rust-lang/rust#148328) may occur.
273///
274/// As such this type is self-sufficient and consistent in it's output.
275///
276/// The [`RealFileName::path`] and [`RealFileName::embeddable_name`] methods
277/// are guaranteed to always return the same output across compiler sessions.
278///
279/// ## Usage
280///
281/// Creation of a [`RealFileName`] should be done using
282/// [`FilePathMapping::to_real_filename`][rustc_span::source_map::FilePathMapping::to_real_filename].
283///
284/// Retrieving a path can be done in two main ways:
285///  - by using [`RealFileName::path`] with a given scope (should be preferred)
286///  - or by using [`RealFileName::embeddable_name`] with a given scope
287#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Decodable, Encodable)]
288pub struct RealFileName {
289    /// The local name (always present in the original crate)
290    local: Option<InnerRealFileName>,
291    /// The maybe remapped part. Correspond to `local` when no remapped happened.
292    maybe_remapped: InnerRealFileName,
293    /// The remapped scopes. Any active scope MUST use `maybe_virtual`
294    scopes: RemapPathScopeComponents,
295}
296
297/// The inner workings of `RealFileName`.
298///
299/// It contains the `name`, `working_directory` and `embeddable_name` components.
300#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Decodable, Encodable, Hash)]
301struct InnerRealFileName {
302    /// The name.
303    name: PathBuf,
304    /// The working directory associated with the embeddable name.
305    working_directory: PathBuf,
306    /// The embeddable name.
307    embeddable_name: PathBuf,
308}
309
310impl Hash for RealFileName {
311    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
312        // To prevent #70924 from happening again we should only hash the
313        // remapped path if that exists. This is because remapped paths to
314        // sysroot crates (/rust/$hash or /rust/$version) remain stable even
315        // if the corresponding local path changes.
316        if !self.scopes.is_all() {
317            self.local.hash(state);
318        }
319        self.maybe_remapped.hash(state);
320        self.scopes.bits().hash(state);
321    }
322}
323
324impl RealFileName {
325    /// Returns the associated path for the given remapping scope.
326    ///
327    /// ## Panic
328    ///
329    /// Only one scope components can be given to this function.
330    pub fn path(&self, scope: RemapPathScopeComponents) -> &Path {
331        assert!(
332            scope.bits().count_ones() == 1,
333            "one and only one scope should be passed to `RealFileName::path`: {scope:?}"
334        );
335        if !self.scopes.contains(scope)
336            && let Some(local_name) = &self.local
337        {
338            local_name.name.as_path()
339        } else {
340            self.maybe_remapped.name.as_path()
341        }
342    }
343
344    /// Returns the working directory and embeddable path for the given remapping scope.
345    ///
346    /// Useful for embedding a mostly abosolute path (modulo remapping) in the compiler outputs.
347    ///
348    /// The embedabble path is not guaranteed to be an absolute path, nor is it garuenteed
349    /// that the working directory part is always a prefix of embeddable path.
350    ///
351    /// ## Panic
352    ///
353    /// Only one scope components can be given to this function.
354    pub fn embeddable_name(&self, scope: RemapPathScopeComponents) -> (&Path, &Path) {
355        assert!(
356            scope.bits().count_ones() == 1,
357            "one and only one scope should be passed to `RealFileName::embeddable_path`: {scope:?}"
358        );
359        if !self.scopes.contains(scope)
360            && let Some(local_name) = &self.local
361        {
362            (&local_name.working_directory, &local_name.embeddable_name)
363        } else {
364            (&self.maybe_remapped.working_directory, &self.maybe_remapped.embeddable_name)
365        }
366    }
367
368    /// Returns the path suitable for reading from the file system on the local host,
369    /// if this information exists.
370    ///
371    /// May not exists if the filename was imported from another crate.
372    pub fn local_path(&self) -> Option<&Path> {
373        self.local.as_ref().map(|lp| lp.name.as_ref())
374    }
375
376    /// Returns the path suitable for reading from the file system on the local host,
377    /// if this information exists.
378    ///
379    /// May not exists if the filename was imported from another crate.
380    pub fn into_local_path(self) -> Option<PathBuf> {
381        self.local.map(|lp| lp.name)
382    }
383
384    /// Returns whenever the filename was remapped.
385    pub(crate) fn was_remapped(&self) -> bool {
386        !self.scopes.is_empty()
387    }
388
389    /// Returns an empty `RealFileName`
390    ///
391    /// Useful as the working directory input to `SourceMap::to_real_filename`.
392    pub fn empty() -> RealFileName {
393        RealFileName {
394            local: Some(InnerRealFileName {
395                name: PathBuf::new(),
396                working_directory: PathBuf::new(),
397                embeddable_name: PathBuf::new(),
398            }),
399            maybe_remapped: InnerRealFileName {
400                name: PathBuf::new(),
401                working_directory: PathBuf::new(),
402                embeddable_name: PathBuf::new(),
403            },
404            scopes: RemapPathScopeComponents::empty(),
405        }
406    }
407
408    /// Returns a `RealFileName` that is completely remapped without any local components.
409    ///
410    /// Only exposed for the purpose of `-Zsimulate-remapped-rust-src-base`.
411    pub fn from_virtual_path(path: &Path) -> RealFileName {
412        let name = InnerRealFileName {
413            name: path.to_owned(),
414            embeddable_name: path.to_owned(),
415            working_directory: PathBuf::new(),
416        };
417        RealFileName { local: None, maybe_remapped: name, scopes: RemapPathScopeComponents::all() }
418    }
419
420    /// Update the filename for encoding in the crate metadata.
421    ///
422    /// Currently it's about removing the local part when the filename
423    /// is fully remapped.
424    pub fn update_for_crate_metadata(&mut self) {
425        if self.scopes.is_all() {
426            self.local = None;
427        }
428    }
429
430    /// Internal routine to display the filename.
431    ///
432    /// Users should always use the `RealFileName::path` method or `FileName` methods instead.
433    fn to_string_lossy<'a>(&'a self, display_pref: FileNameDisplayPreference) -> Cow<'a, str> {
434        match display_pref {
435            FileNameDisplayPreference::Remapped => self.maybe_remapped.name.to_string_lossy(),
436            FileNameDisplayPreference::Local => {
437                self.local.as_ref().unwrap_or(&self.maybe_remapped).name.to_string_lossy()
438            }
439            FileNameDisplayPreference::Short => self
440                .maybe_remapped
441                .name
442                .file_name()
443                .map_or_else(|| "".into(), |f| f.to_string_lossy()),
444            FileNameDisplayPreference::Scope(scope) => self.path(scope).to_string_lossy(),
445        }
446    }
447}
448
449/// Differentiates between real files and common virtual files.
450#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Hash, Decodable, Encodable)]
451pub enum FileName {
452    Real(RealFileName),
453    /// Strings provided as `--cfg [cfgspec]`.
454    CfgSpec(Hash64),
455    /// Command line.
456    Anon(Hash64),
457    /// Hack in `src/librustc_ast/parse.rs`.
458    // FIXME(jseyfried)
459    MacroExpansion(Hash64),
460    ProcMacroSourceCode(Hash64),
461    /// Strings provided as crate attributes in the CLI.
462    CliCrateAttr(Hash64),
463    /// Custom sources for explicit parser calls from plugins and drivers.
464    Custom(String),
465    DocTest(PathBuf, isize),
466    /// Post-substitution inline assembly from LLVM.
467    InlineAsm(Hash64),
468}
469
470pub struct FileNameDisplay<'a> {
471    inner: &'a FileName,
472    display_pref: FileNameDisplayPreference,
473}
474
475// Internal enum. Should not be exposed.
476#[derive(Clone, Copy)]
477enum FileNameDisplayPreference {
478    Remapped,
479    Local,
480    Short,
481    Scope(RemapPathScopeComponents),
482}
483
484impl fmt::Display for FileNameDisplay<'_> {
485    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
486        use FileName::*;
487        match *self.inner {
488            Real(ref name) => {
489                write!(fmt, "{}", name.to_string_lossy(self.display_pref))
490            }
491            CfgSpec(_) => write!(fmt, "<cfgspec>"),
492            MacroExpansion(_) => write!(fmt, "<macro expansion>"),
493            Anon(_) => write!(fmt, "<anon>"),
494            ProcMacroSourceCode(_) => write!(fmt, "<proc-macro source code>"),
495            CliCrateAttr(_) => write!(fmt, "<crate attribute>"),
496            Custom(ref s) => write!(fmt, "<{s}>"),
497            DocTest(ref path, _) => write!(fmt, "{}", path.display()),
498            InlineAsm(_) => write!(fmt, "<inline asm>"),
499        }
500    }
501}
502
503impl<'a> FileNameDisplay<'a> {
504    pub fn to_string_lossy(&self) -> Cow<'a, str> {
505        match self.inner {
506            FileName::Real(inner) => inner.to_string_lossy(self.display_pref),
507            _ => Cow::from(self.to_string()),
508        }
509    }
510}
511
512impl FileName {
513    pub fn is_real(&self) -> bool {
514        use FileName::*;
515        match *self {
516            Real(_) => true,
517            Anon(_)
518            | MacroExpansion(_)
519            | ProcMacroSourceCode(_)
520            | CliCrateAttr(_)
521            | Custom(_)
522            | CfgSpec(_)
523            | DocTest(_, _)
524            | InlineAsm(_) => false,
525        }
526    }
527
528    /// Returns the path suitable for reading from the file system on the local host,
529    /// if this information exists.
530    ///
531    /// Avoid embedding this in build artifacts. Prefer using the `display` method.
532    pub fn prefer_remapped_unconditionally(&self) -> FileNameDisplay<'_> {
533        FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Remapped }
534    }
535
536    /// Returns the path suitable for reading from the file system on the local host,
537    /// if this information exists.
538    ///
539    /// Avoid embedding this in build artifacts. Prefer using the `display` method.
540    pub fn prefer_local_unconditionally(&self) -> FileNameDisplay<'_> {
541        FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Local }
542    }
543
544    /// Returns a short (either the filename or an empty string).
545    pub fn short(&self) -> FileNameDisplay<'_> {
546        FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Short }
547    }
548
549    /// Returns a `Display`-able path for the given scope.
550    pub fn display(&self, scope: RemapPathScopeComponents) -> FileNameDisplay<'_> {
551        FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Scope(scope) }
552    }
553
554    pub fn macro_expansion_source_code(src: &str) -> FileName {
555        let mut hasher = StableHasher::new();
556        src.hash(&mut hasher);
557        FileName::MacroExpansion(hasher.finish())
558    }
559
560    pub fn anon_source_code(src: &str) -> FileName {
561        let mut hasher = StableHasher::new();
562        src.hash(&mut hasher);
563        FileName::Anon(hasher.finish())
564    }
565
566    pub fn proc_macro_source_code(src: &str) -> FileName {
567        let mut hasher = StableHasher::new();
568        src.hash(&mut hasher);
569        FileName::ProcMacroSourceCode(hasher.finish())
570    }
571
572    pub fn cfg_spec_source_code(src: &str) -> FileName {
573        let mut hasher = StableHasher::new();
574        src.hash(&mut hasher);
575        FileName::CfgSpec(hasher.finish())
576    }
577
578    pub fn cli_crate_attr_source_code(src: &str) -> FileName {
579        let mut hasher = StableHasher::new();
580        src.hash(&mut hasher);
581        FileName::CliCrateAttr(hasher.finish())
582    }
583
584    pub fn doc_test_source_code(path: PathBuf, line: isize) -> FileName {
585        FileName::DocTest(path, line)
586    }
587
588    pub fn inline_asm_source_code(src: &str) -> FileName {
589        let mut hasher = StableHasher::new();
590        src.hash(&mut hasher);
591        FileName::InlineAsm(hasher.finish())
592    }
593
594    /// Returns the path suitable for reading from the file system on the local host,
595    /// if this information exists.
596    ///
597    /// Avoid embedding this in build artifacts.
598    pub fn into_local_path(self) -> Option<PathBuf> {
599        match self {
600            FileName::Real(path) => path.into_local_path(),
601            FileName::DocTest(path, _) => Some(path),
602            _ => None,
603        }
604    }
605}
606
607/// Represents a span.
608///
609/// Spans represent a region of code, used for error reporting. Positions in spans
610/// are *absolute* positions from the beginning of the [`SourceMap`], not positions
611/// relative to [`SourceFile`]s. Methods on the `SourceMap` can be used to relate spans back
612/// to the original source.
613///
614/// You must be careful if the span crosses more than one file, since you will not be
615/// able to use many of the functions on spans in source_map and you cannot assume
616/// that the length of the span is equal to `span.hi - span.lo`; there may be space in the
617/// [`BytePos`] range between files.
618///
619/// `SpanData` is public because `Span` uses a thread-local interner and can't be
620/// sent to other threads, but some pieces of performance infra run in a separate thread.
621/// Using `Span` is generally preferred.
622#[derive(Clone, Copy, Hash, PartialEq, Eq)]
623#[derive_where(PartialOrd, Ord)]
624pub struct SpanData {
625    pub lo: BytePos,
626    pub hi: BytePos,
627    /// Information about where the macro came from, if this piece of
628    /// code was created by a macro expansion.
629    #[derive_where(skip)]
630    // `SyntaxContext` does not implement `Ord`.
631    // The other fields are enough to determine in-file order.
632    pub ctxt: SyntaxContext,
633    #[derive_where(skip)]
634    // `LocalDefId` does not implement `Ord`.
635    // The other fields are enough to determine in-file order.
636    pub parent: Option<LocalDefId>,
637}
638
639impl SpanData {
640    #[inline]
641    pub fn span(&self) -> Span {
642        Span::new(self.lo, self.hi, self.ctxt, self.parent)
643    }
644    #[inline]
645    pub fn with_lo(&self, lo: BytePos) -> Span {
646        Span::new(lo, self.hi, self.ctxt, self.parent)
647    }
648    #[inline]
649    pub fn with_hi(&self, hi: BytePos) -> Span {
650        Span::new(self.lo, hi, self.ctxt, self.parent)
651    }
652    /// Avoid if possible, `Span::map_ctxt` should be preferred.
653    #[inline]
654    fn with_ctxt(&self, ctxt: SyntaxContext) -> Span {
655        Span::new(self.lo, self.hi, ctxt, self.parent)
656    }
657    /// Avoid if possible, `Span::with_parent` should be preferred.
658    #[inline]
659    fn with_parent(&self, parent: Option<LocalDefId>) -> Span {
660        Span::new(self.lo, self.hi, self.ctxt, parent)
661    }
662    /// Returns `true` if this is a dummy span with any hygienic context.
663    #[inline]
664    pub fn is_dummy(self) -> bool {
665        self.lo.0 == 0 && self.hi.0 == 0
666    }
667    /// Returns `true` if `self` fully encloses `other`.
668    pub fn contains(self, other: Self) -> bool {
669        self.lo <= other.lo && other.hi <= self.hi
670    }
671}
672
673impl Default for SpanData {
674    fn default() -> Self {
675        Self { lo: BytePos(0), hi: BytePos(0), ctxt: SyntaxContext::root(), parent: None }
676    }
677}
678
679impl PartialOrd for Span {
680    fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
681        PartialOrd::partial_cmp(&self.data(), &rhs.data())
682    }
683}
684impl Ord for Span {
685    fn cmp(&self, rhs: &Self) -> Ordering {
686        Ord::cmp(&self.data(), &rhs.data())
687    }
688}
689
690impl Span {
691    #[inline]
692    pub fn lo(self) -> BytePos {
693        self.data().lo
694    }
695    #[inline]
696    pub fn with_lo(self, lo: BytePos) -> Span {
697        self.data().with_lo(lo)
698    }
699    #[inline]
700    pub fn hi(self) -> BytePos {
701        self.data().hi
702    }
703    #[inline]
704    pub fn with_hi(self, hi: BytePos) -> Span {
705        self.data().with_hi(hi)
706    }
707    #[inline]
708    pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span {
709        self.map_ctxt(|_| ctxt)
710    }
711
712    #[inline]
713    pub fn is_visible(self, sm: &SourceMap) -> bool {
714        !self.is_dummy() && sm.is_span_accessible(self)
715    }
716
717    /// Returns whether this span originates in a foreign crate's external macro.
718    ///
719    /// This is used to test whether a lint should not even begin to figure out whether it should
720    /// be reported on the current node.
721    #[inline]
722    pub fn in_external_macro(self, sm: &SourceMap) -> bool {
723        self.ctxt().in_external_macro(sm)
724    }
725
726    /// Returns `true` if `span` originates in a derive-macro's expansion.
727    pub fn in_derive_expansion(self) -> bool {
728        matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _))
729    }
730
731    /// Return whether `span` is generated by `async` or `await`.
732    pub fn is_from_async_await(self) -> bool {
733        matches!(
734            self.ctxt().outer_expn_data().kind,
735            ExpnKind::Desugaring(DesugaringKind::Async | DesugaringKind::Await),
736        )
737    }
738
739    /// Gate suggestions that would not be appropriate in a context the user didn't write.
740    pub fn can_be_used_for_suggestions(self) -> bool {
741        !self.from_expansion()
742        // FIXME: If this span comes from a `derive` macro but it points at code the user wrote,
743        // the callsite span and the span will be pointing at different places. It also means that
744        // we can safely provide suggestions on this span.
745            || (self.in_derive_expansion()
746                && self.parent_callsite().map(|p| (p.lo(), p.hi())) != Some((self.lo(), self.hi())))
747    }
748
749    #[inline]
750    pub fn with_root_ctxt(lo: BytePos, hi: BytePos) -> Span {
751        Span::new(lo, hi, SyntaxContext::root(), None)
752    }
753
754    /// Returns a new span representing an empty span at the beginning of this span.
755    #[inline]
756    pub fn shrink_to_lo(self) -> Span {
757        let span = self.data_untracked();
758        span.with_hi(span.lo)
759    }
760    /// Returns a new span representing an empty span at the end of this span.
761    #[inline]
762    pub fn shrink_to_hi(self) -> Span {
763        let span = self.data_untracked();
764        span.with_lo(span.hi)
765    }
766
767    #[inline]
768    /// Returns `true` if `hi == lo`.
769    pub fn is_empty(self) -> bool {
770        let span = self.data_untracked();
771        span.hi == span.lo
772    }
773
774    /// Returns `self` if `self` is not the dummy span, and `other` otherwise.
775    pub fn substitute_dummy(self, other: Span) -> Span {
776        if self.is_dummy() { other } else { self }
777    }
778
779    /// Returns `true` if `self` fully encloses `other`.
780    pub fn contains(self, other: Span) -> bool {
781        let span = self.data();
782        let other = other.data();
783        span.contains(other)
784    }
785
786    /// Returns `true` if `self` touches `other`.
787    pub fn overlaps(self, other: Span) -> bool {
788        let span = self.data();
789        let other = other.data();
790        span.lo < other.hi && other.lo < span.hi
791    }
792
793    /// Returns `true` if `self` touches or adjoins `other`.
794    pub fn overlaps_or_adjacent(self, other: Span) -> bool {
795        let span = self.data();
796        let other = other.data();
797        span.lo <= other.hi && other.lo <= span.hi
798    }
799
800    /// Returns `true` if the spans are equal with regards to the source text.
801    ///
802    /// Use this instead of `==` when either span could be generated code,
803    /// and you only care that they point to the same bytes of source text.
804    pub fn source_equal(self, other: Span) -> bool {
805        let span = self.data();
806        let other = other.data();
807        span.lo == other.lo && span.hi == other.hi
808    }
809
810    /// Returns `Some(span)`, where the start is trimmed by the end of `other`.
811    pub fn trim_start(self, other: Span) -> Option<Span> {
812        let span = self.data();
813        let other = other.data();
814        if span.hi > other.hi { Some(span.with_lo(cmp::max(span.lo, other.hi))) } else { None }
815    }
816
817    /// Returns `Some(span)`, where the end is trimmed by the start of `other`.
818    pub fn trim_end(self, other: Span) -> Option<Span> {
819        let span = self.data();
820        let other = other.data();
821        if span.lo < other.lo { Some(span.with_hi(cmp::min(span.hi, other.lo))) } else { None }
822    }
823
824    /// Returns the source span -- this is either the supplied span, or the span for
825    /// the macro callsite that expanded to it.
826    pub fn source_callsite(self) -> Span {
827        let ctxt = self.ctxt();
828        if !ctxt.is_root() { ctxt.outer_expn_data().call_site.source_callsite() } else { self }
829    }
830
831    /// Returns the call-site span of the last macro expansion which produced this `Span`.
832    /// (see [`ExpnData::call_site`]). Returns `None` if this is not an expansion.
833    pub fn parent_callsite(self) -> Option<Span> {
834        let ctxt = self.ctxt();
835        (!ctxt.is_root()).then(|| ctxt.outer_expn_data().call_site)
836    }
837
838    /// Find the first ancestor span that's contained within `outer`.
839    ///
840    /// This method traverses the macro expansion ancestors until it finds the first span
841    /// that's contained within `outer`.
842    ///
843    /// The span returned by this method may have a different [`SyntaxContext`] than `outer`.
844    /// If you need to extend the span, use [`find_ancestor_inside_same_ctxt`] instead,
845    /// because joining spans with different syntax contexts can create unexpected results.
846    ///
847    /// This is used to find the span of the macro call when a parent expr span, i.e. `outer`, is known.
848    ///
849    /// [`find_ancestor_inside_same_ctxt`]: Self::find_ancestor_inside_same_ctxt
850    pub fn find_ancestor_inside(mut self, outer: Span) -> Option<Span> {
851        while !outer.contains(self) {
852            self = self.parent_callsite()?;
853        }
854        Some(self)
855    }
856
857    /// Find the first ancestor span with the same [`SyntaxContext`] as `other`.
858    ///
859    /// This method traverses the macro expansion ancestors until it finds a span
860    /// that has the same [`SyntaxContext`] as `other`.
861    ///
862    /// Like [`find_ancestor_inside_same_ctxt`], but specifically for when spans might not
863    /// overlap. Take care when using this, and prefer [`find_ancestor_inside`] or
864    /// [`find_ancestor_inside_same_ctxt`] when you know that the spans are nested (modulo
865    /// macro expansion).
866    ///
867    /// [`find_ancestor_inside`]: Self::find_ancestor_inside
868    /// [`find_ancestor_inside_same_ctxt`]: Self::find_ancestor_inside_same_ctxt
869    pub fn find_ancestor_in_same_ctxt(mut self, other: Span) -> Option<Span> {
870        while !self.eq_ctxt(other) {
871            self = self.parent_callsite()?;
872        }
873        Some(self)
874    }
875
876    /// Find the first ancestor span that's contained within `outer` and
877    /// has the same [`SyntaxContext`] as `outer`.
878    ///
879    /// This method traverses the macro expansion ancestors until it finds a span
880    /// that is both contained within `outer` and has the same [`SyntaxContext`] as `outer`.
881    ///
882    /// This method is the combination of [`find_ancestor_inside`] and
883    /// [`find_ancestor_in_same_ctxt`] and should be preferred when extending the returned span.
884    /// If you do not need to modify the span, use [`find_ancestor_inside`] instead.
885    ///
886    /// [`find_ancestor_inside`]: Self::find_ancestor_inside
887    /// [`find_ancestor_in_same_ctxt`]: Self::find_ancestor_in_same_ctxt
888    pub fn find_ancestor_inside_same_ctxt(mut self, outer: Span) -> Option<Span> {
889        while !outer.contains(self) || !self.eq_ctxt(outer) {
890            self = self.parent_callsite()?;
891        }
892        Some(self)
893    }
894
895    /// Find the first ancestor span that does not come from an external macro.
896    ///
897    /// This method traverses the macro expansion ancestors until it finds a span
898    /// that is either from user-written code or from a local macro (defined in the current crate).
899    ///
900    /// External macros are those defined in dependencies or the standard library.
901    /// This method is useful for reporting errors in user-controllable code and avoiding
902    /// diagnostics inside external macros.
903    ///
904    /// # See also
905    ///
906    /// - [`Self::find_ancestor_not_from_macro`]
907    /// - [`Self::in_external_macro`]
908    pub fn find_ancestor_not_from_extern_macro(mut self, sm: &SourceMap) -> Option<Span> {
909        while self.in_external_macro(sm) {
910            self = self.parent_callsite()?;
911        }
912        Some(self)
913    }
914
915    /// Find the first ancestor span that does not come from any macro expansion.
916    ///
917    /// This method traverses the macro expansion ancestors until it finds a span
918    /// that originates from user-written code rather than any macro-generated code.
919    ///
920    /// This method is useful for reporting errors at the exact location users wrote code
921    /// and providing suggestions at directly editable locations.
922    ///
923    /// # See also
924    ///
925    /// - [`Self::find_ancestor_not_from_extern_macro`]
926    /// - [`Span::from_expansion`]
927    pub fn find_ancestor_not_from_macro(mut self) -> Option<Span> {
928        while self.from_expansion() {
929            self = self.parent_callsite()?;
930        }
931        Some(self)
932    }
933
934    /// Edition of the crate from which this span came.
935    pub fn edition(self) -> edition::Edition {
936        self.ctxt().edition()
937    }
938
939    /// Is this edition 2015?
940    #[inline]
941    pub fn is_rust_2015(self) -> bool {
942        self.edition().is_rust_2015()
943    }
944
945    /// Are we allowed to use features from the Rust 2018 edition?
946    #[inline]
947    pub fn at_least_rust_2018(self) -> bool {
948        self.edition().at_least_rust_2018()
949    }
950
951    /// Are we allowed to use features from the Rust 2021 edition?
952    #[inline]
953    pub fn at_least_rust_2021(self) -> bool {
954        self.edition().at_least_rust_2021()
955    }
956
957    /// Are we allowed to use features from the Rust 2024 edition?
958    #[inline]
959    pub fn at_least_rust_2024(self) -> bool {
960        self.edition().at_least_rust_2024()
961    }
962
963    /// Returns the source callee.
964    ///
965    /// Returns `None` if the supplied span has no expansion trace,
966    /// else returns the `ExpnData` for the macro definition
967    /// corresponding to the source callsite.
968    pub fn source_callee(self) -> Option<ExpnData> {
969        let mut ctxt = self.ctxt();
970        let mut opt_expn_data = None;
971        while !ctxt.is_root() {
972            let expn_data = ctxt.outer_expn_data();
973            ctxt = expn_data.call_site.ctxt();
974            opt_expn_data = Some(expn_data);
975        }
976        opt_expn_data
977    }
978
979    /// Checks if a span is "internal" to a macro in which `#[unstable]`
980    /// items can be used (that is, a macro marked with
981    /// `#[allow_internal_unstable]`).
982    pub fn allows_unstable(self, feature: Symbol) -> bool {
983        self.ctxt()
984            .outer_expn_data()
985            .allow_internal_unstable
986            .is_some_and(|features| features.contains(&feature))
987    }
988
989    /// Checks if this span arises from a compiler desugaring of kind `kind`.
990    pub fn is_desugaring(self, kind: DesugaringKind) -> bool {
991        match self.ctxt().outer_expn_data().kind {
992            ExpnKind::Desugaring(k) => k == kind,
993            _ => false,
994        }
995    }
996
997    /// Returns the compiler desugaring that created this span, or `None`
998    /// if this span is not from a desugaring.
999    pub fn desugaring_kind(self) -> Option<DesugaringKind> {
1000        match self.ctxt().outer_expn_data().kind {
1001            ExpnKind::Desugaring(k) => Some(k),
1002            _ => None,
1003        }
1004    }
1005
1006    /// Checks if a span is "internal" to a macro in which `unsafe`
1007    /// can be used without triggering the `unsafe_code` lint.
1008    /// (that is, a macro marked with `#[allow_internal_unsafe]`).
1009    pub fn allows_unsafe(self) -> bool {
1010        self.ctxt().outer_expn_data().allow_internal_unsafe
1011    }
1012
1013    pub fn macro_backtrace(mut self) -> impl Iterator<Item = ExpnData> {
1014        let mut prev_span = DUMMY_SP;
1015        iter::from_fn(move || {
1016            loop {
1017                let ctxt = self.ctxt();
1018                if ctxt.is_root() {
1019                    return None;
1020                }
1021
1022                let expn_data = ctxt.outer_expn_data();
1023                let is_recursive = expn_data.call_site.source_equal(prev_span);
1024
1025                prev_span = self;
1026                self = expn_data.call_site;
1027
1028                // Don't print recursive invocations.
1029                if !is_recursive {
1030                    return Some(expn_data);
1031                }
1032            }
1033        })
1034    }
1035
1036    /// Splits a span into two composite spans around a certain position.
1037    pub fn split_at(self, pos: u32) -> (Span, Span) {
1038        let len = self.hi().0 - self.lo().0;
1039        debug_assert!(pos <= len);
1040
1041        let split_pos = BytePos(self.lo().0 + pos);
1042        (
1043            Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
1044            Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
1045        )
1046    }
1047
1048    /// Check if you can select metavar spans for the given spans to get matching contexts.
1049    fn try_metavars(a: SpanData, b: SpanData, a_orig: Span, b_orig: Span) -> (SpanData, SpanData) {
1050        match with_metavar_spans(|mspans| (mspans.get(a_orig), mspans.get(b_orig))) {
1051            (None, None) => {}
1052            (Some(meta_a), None) => {
1053                let meta_a = meta_a.data();
1054                if meta_a.ctxt == b.ctxt {
1055                    return (meta_a, b);
1056                }
1057            }
1058            (None, Some(meta_b)) => {
1059                let meta_b = meta_b.data();
1060                if a.ctxt == meta_b.ctxt {
1061                    return (a, meta_b);
1062                }
1063            }
1064            (Some(meta_a), Some(meta_b)) => {
1065                let meta_b = meta_b.data();
1066                if a.ctxt == meta_b.ctxt {
1067                    return (a, meta_b);
1068                }
1069                let meta_a = meta_a.data();
1070                if meta_a.ctxt == b.ctxt {
1071                    return (meta_a, b);
1072                } else if meta_a.ctxt == meta_b.ctxt {
1073                    return (meta_a, meta_b);
1074                }
1075            }
1076        }
1077
1078        (a, b)
1079    }
1080
1081    /// Prepare two spans to a combine operation like `to` or `between`.
1082    fn prepare_to_combine(
1083        a_orig: Span,
1084        b_orig: Span,
1085    ) -> Result<(SpanData, SpanData, Option<LocalDefId>), Span> {
1086        let (a, b) = (a_orig.data(), b_orig.data());
1087        if a.ctxt == b.ctxt {
1088            return Ok((a, b, if a.parent == b.parent { a.parent } else { None }));
1089        }
1090
1091        let (a, b) = Span::try_metavars(a, b, a_orig, b_orig);
1092        if a.ctxt == b.ctxt {
1093            return Ok((a, b, if a.parent == b.parent { a.parent } else { None }));
1094        }
1095
1096        // Context mismatches usually happen when procedural macros combine spans copied from
1097        // the macro input with spans produced by the macro (`Span::*_site`).
1098        // In that case we consider the combined span to be produced by the macro and return
1099        // the original macro-produced span as the result.
1100        // Otherwise we just fall back to returning the first span.
1101        // Combining locations typically doesn't make sense in case of context mismatches.
1102        // `is_root` here is a fast path optimization.
1103        let a_is_callsite = a.ctxt.is_root() || a.ctxt == b.span().source_callsite().ctxt();
1104        Err(if a_is_callsite { b_orig } else { a_orig })
1105    }
1106
1107    /// This span, but in a larger context, may switch to the metavariable span if suitable.
1108    pub fn with_neighbor(self, neighbor: Span) -> Span {
1109        match Span::prepare_to_combine(self, neighbor) {
1110            Ok((this, ..)) => this.span(),
1111            Err(_) => self,
1112        }
1113    }
1114
1115    /// Returns a `Span` that would enclose both `self` and `end`.
1116    ///
1117    /// Note that this can also be used to extend the span "backwards":
1118    /// `start.to(end)` and `end.to(start)` return the same `Span`.
1119    ///
1120    /// ```text
1121    ///     ____             ___
1122    ///     self lorem ipsum end
1123    ///     ^^^^^^^^^^^^^^^^^^^^
1124    /// ```
1125    pub fn to(self, end: Span) -> Span {
1126        match Span::prepare_to_combine(self, end) {
1127            Ok((from, to, parent)) => {
1128                Span::new(cmp::min(from.lo, to.lo), cmp::max(from.hi, to.hi), from.ctxt, parent)
1129            }
1130            Err(fallback) => fallback,
1131        }
1132    }
1133
1134    /// Returns a `Span` between the end of `self` to the beginning of `end`.
1135    ///
1136    /// ```text
1137    ///     ____             ___
1138    ///     self lorem ipsum end
1139    ///         ^^^^^^^^^^^^^
1140    /// ```
1141    pub fn between(self, end: Span) -> Span {
1142        match Span::prepare_to_combine(self, end) {
1143            Ok((from, to, parent)) => {
1144                Span::new(cmp::min(from.hi, to.hi), cmp::max(from.lo, to.lo), from.ctxt, parent)
1145            }
1146            Err(fallback) => fallback,
1147        }
1148    }
1149
1150    /// Returns a `Span` from the beginning of `self` until the beginning of `end`.
1151    ///
1152    /// ```text
1153    ///     ____             ___
1154    ///     self lorem ipsum end
1155    ///     ^^^^^^^^^^^^^^^^^
1156    /// ```
1157    pub fn until(self, end: Span) -> Span {
1158        match Span::prepare_to_combine(self, end) {
1159            Ok((from, to, parent)) => {
1160                Span::new(cmp::min(from.lo, to.lo), cmp::max(from.lo, to.lo), from.ctxt, parent)
1161            }
1162            Err(fallback) => fallback,
1163        }
1164    }
1165
1166    /// Returns the `Span` within the syntax context of "within". This is useful when
1167    /// "self" is an expansion from a macro variable, since this can be used for
1168    /// providing extra macro expansion context for certain errors.
1169    ///
1170    /// ```text
1171    /// macro_rules! m {
1172    ///     ($ident:ident) => { ($ident,) }
1173    /// }
1174    ///
1175    /// m!(outer_ident);
1176    /// ```
1177    ///
1178    /// If "self" is the span of the outer_ident, and "within" is the span of the `($ident,)`
1179    /// expr, then this will return the span of the `$ident` macro variable.
1180    pub fn within_macro(self, within: Span, sm: &SourceMap) -> Option<Span> {
1181        match Span::prepare_to_combine(self, within) {
1182            // Only return something if it doesn't overlap with the original span,
1183            // and the span isn't "imported" (i.e. from unavailable sources).
1184            // FIXME: This does limit the usefulness of the error when the macro is
1185            // from a foreign crate; we could also take into account `-Zmacro-backtrace`,
1186            // which doesn't redact this span (but that would mean passing in even more
1187            // args to this function, lol).
1188            Ok((self_, _, parent))
1189                if self_.hi < self.lo() || self.hi() < self_.lo && !sm.is_imported(within) =>
1190            {
1191                Some(Span::new(self_.lo, self_.hi, self_.ctxt, parent))
1192            }
1193            _ => None,
1194        }
1195    }
1196
1197    pub fn from_inner(self, inner: InnerSpan) -> Span {
1198        let span = self.data();
1199        Span::new(
1200            span.lo + BytePos::from_usize(inner.start),
1201            span.lo + BytePos::from_usize(inner.end),
1202            span.ctxt,
1203            span.parent,
1204        )
1205    }
1206
1207    /// Equivalent of `Span::def_site` from the proc macro API,
1208    /// except that the location is taken from the `self` span.
1209    pub fn with_def_site_ctxt(self, expn_id: ExpnId) -> Span {
1210        self.with_ctxt_from_mark(expn_id, Transparency::Opaque)
1211    }
1212
1213    /// Equivalent of `Span::call_site` from the proc macro API,
1214    /// except that the location is taken from the `self` span.
1215    pub fn with_call_site_ctxt(self, expn_id: ExpnId) -> Span {
1216        self.with_ctxt_from_mark(expn_id, Transparency::Transparent)
1217    }
1218
1219    /// Equivalent of `Span::mixed_site` from the proc macro API,
1220    /// except that the location is taken from the `self` span.
1221    pub fn with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span {
1222        self.with_ctxt_from_mark(expn_id, Transparency::SemiOpaque)
1223    }
1224
1225    /// Produces a span with the same location as `self` and context produced by a macro with the
1226    /// given ID and transparency, assuming that macro was defined directly and not produced by
1227    /// some other macro (which is the case for built-in and procedural macros).
1228    fn with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
1229        self.with_ctxt(SyntaxContext::root().apply_mark(expn_id, transparency))
1230    }
1231
1232    #[inline]
1233    pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
1234        self.map_ctxt(|ctxt| ctxt.apply_mark(expn_id, transparency))
1235    }
1236
1237    #[inline]
1238    pub fn remove_mark(&mut self) -> ExpnId {
1239        let mut mark = ExpnId::root();
1240        *self = self.map_ctxt(|mut ctxt| {
1241            mark = ctxt.remove_mark();
1242            ctxt
1243        });
1244        mark
1245    }
1246
1247    #[inline]
1248    pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
1249        let mut mark = None;
1250        *self = self.map_ctxt(|mut ctxt| {
1251            mark = ctxt.adjust(expn_id);
1252            ctxt
1253        });
1254        mark
1255    }
1256
1257    #[inline]
1258    pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
1259        let mut mark = None;
1260        *self = self.map_ctxt(|mut ctxt| {
1261            mark = ctxt.normalize_to_macros_2_0_and_adjust(expn_id);
1262            ctxt
1263        });
1264        mark
1265    }
1266
1267    #[inline]
1268    pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> {
1269        let mut mark = None;
1270        *self = self.map_ctxt(|mut ctxt| {
1271            mark = ctxt.glob_adjust(expn_id, glob_span);
1272            ctxt
1273        });
1274        mark
1275    }
1276
1277    #[inline]
1278    pub fn reverse_glob_adjust(
1279        &mut self,
1280        expn_id: ExpnId,
1281        glob_span: Span,
1282    ) -> Option<Option<ExpnId>> {
1283        let mut mark = None;
1284        *self = self.map_ctxt(|mut ctxt| {
1285            mark = ctxt.reverse_glob_adjust(expn_id, glob_span);
1286            ctxt
1287        });
1288        mark
1289    }
1290
1291    #[inline]
1292    pub fn normalize_to_macros_2_0(self) -> Span {
1293        self.map_ctxt(|ctxt| ctxt.normalize_to_macros_2_0())
1294    }
1295
1296    #[inline]
1297    pub fn normalize_to_macro_rules(self) -> Span {
1298        self.map_ctxt(|ctxt| ctxt.normalize_to_macro_rules())
1299    }
1300}
1301
1302impl Default for Span {
1303    fn default() -> Self {
1304        DUMMY_SP
1305    }
1306}
1307
1308rustc_index::newtype_index! {
1309    #[orderable]
1310    #[debug_format = "AttrId({})"]
1311    pub struct AttrId {}
1312}
1313
1314/// This trait is used to allow encoder specific encodings of certain types.
1315/// It is similar to rustc_type_ir's TyEncoder.
1316pub trait SpanEncoder: Encoder {
1317    fn encode_span(&mut self, span: Span);
1318    fn encode_symbol(&mut self, sym: Symbol);
1319    fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol);
1320    fn encode_expn_id(&mut self, expn_id: ExpnId);
1321    fn encode_syntax_context(&mut self, syntax_context: SyntaxContext);
1322    /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a
1323    /// tcx. Therefore, make sure to include the context when encode a `CrateNum`.
1324    fn encode_crate_num(&mut self, crate_num: CrateNum);
1325    fn encode_def_index(&mut self, def_index: DefIndex);
1326    fn encode_def_id(&mut self, def_id: DefId);
1327}
1328
1329impl SpanEncoder for FileEncoder {
1330    fn encode_span(&mut self, span: Span) {
1331        let span = span.data();
1332        span.lo.encode(self);
1333        span.hi.encode(self);
1334    }
1335
1336    fn encode_symbol(&mut self, sym: Symbol) {
1337        self.emit_str(sym.as_str());
1338    }
1339
1340    fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) {
1341        self.emit_byte_str(byte_sym.as_byte_str());
1342    }
1343
1344    fn encode_expn_id(&mut self, _expn_id: ExpnId) {
1345        panic!("cannot encode `ExpnId` with `FileEncoder`");
1346    }
1347
1348    fn encode_syntax_context(&mut self, _syntax_context: SyntaxContext) {
1349        panic!("cannot encode `SyntaxContext` with `FileEncoder`");
1350    }
1351
1352    fn encode_crate_num(&mut self, crate_num: CrateNum) {
1353        self.emit_u32(crate_num.as_u32());
1354    }
1355
1356    fn encode_def_index(&mut self, _def_index: DefIndex) {
1357        panic!("cannot encode `DefIndex` with `FileEncoder`");
1358    }
1359
1360    fn encode_def_id(&mut self, def_id: DefId) {
1361        def_id.krate.encode(self);
1362        def_id.index.encode(self);
1363    }
1364}
1365
1366impl<E: SpanEncoder> Encodable<E> for Span {
1367    fn encode(&self, s: &mut E) {
1368        s.encode_span(*self);
1369    }
1370}
1371
1372impl<E: SpanEncoder> Encodable<E> for Symbol {
1373    fn encode(&self, s: &mut E) {
1374        s.encode_symbol(*self);
1375    }
1376}
1377
1378impl<E: SpanEncoder> Encodable<E> for ByteSymbol {
1379    fn encode(&self, s: &mut E) {
1380        s.encode_byte_symbol(*self);
1381    }
1382}
1383
1384impl<E: SpanEncoder> Encodable<E> for ExpnId {
1385    fn encode(&self, s: &mut E) {
1386        s.encode_expn_id(*self)
1387    }
1388}
1389
1390impl<E: SpanEncoder> Encodable<E> for SyntaxContext {
1391    fn encode(&self, s: &mut E) {
1392        s.encode_syntax_context(*self)
1393    }
1394}
1395
1396impl<E: SpanEncoder> Encodable<E> for CrateNum {
1397    fn encode(&self, s: &mut E) {
1398        s.encode_crate_num(*self)
1399    }
1400}
1401
1402impl<E: SpanEncoder> Encodable<E> for DefIndex {
1403    fn encode(&self, s: &mut E) {
1404        s.encode_def_index(*self)
1405    }
1406}
1407
1408impl<E: SpanEncoder> Encodable<E> for DefId {
1409    fn encode(&self, s: &mut E) {
1410        s.encode_def_id(*self)
1411    }
1412}
1413
1414impl<E: SpanEncoder> Encodable<E> for AttrId {
1415    fn encode(&self, _s: &mut E) {
1416        // A fresh id will be generated when decoding
1417    }
1418}
1419
1420pub trait BlobDecoder: Decoder {
1421    fn decode_symbol(&mut self) -> Symbol;
1422    fn decode_byte_symbol(&mut self) -> ByteSymbol;
1423    fn decode_def_index(&mut self) -> DefIndex;
1424}
1425
1426/// This trait is used to allow decoder specific encodings of certain types.
1427/// It is similar to rustc_type_ir's TyDecoder.
1428///
1429/// Specifically for metadata, an important note is that spans can only be decoded once
1430/// some other metadata is already read.
1431/// Spans have to be properly mapped into the decoding crate's sourcemap,
1432/// and crate numbers have to be converted sometimes.
1433/// This can only be done once the `CrateRoot` is available.
1434///
1435/// As such, some methods that used to be in the `SpanDecoder` trait
1436/// are now in the `BlobDecoder` trait. This hierarchy is not mirrored for `Encoder`s.
1437/// `BlobDecoder` has methods for deserializing types that are more complex than just those
1438/// that can be decoded with `Decoder`, but which can be decoded on their own, *before* any other metadata is.
1439/// Importantly, that means that types that can be decoded with `BlobDecoder` can show up in the crate root.
1440/// The place where this distinction is relevant is in `rustc_metadata` where metadata is decoded using either the
1441/// `MetadataDecodeContext` or the `BlobDecodeContext`.
1442pub trait SpanDecoder: BlobDecoder {
1443    fn decode_span(&mut self) -> Span;
1444    fn decode_expn_id(&mut self) -> ExpnId;
1445    fn decode_syntax_context(&mut self) -> SyntaxContext;
1446    fn decode_crate_num(&mut self) -> CrateNum;
1447    fn decode_def_id(&mut self) -> DefId;
1448    fn decode_attr_id(&mut self) -> AttrId;
1449}
1450
1451impl BlobDecoder for MemDecoder<'_> {
1452    fn decode_symbol(&mut self) -> Symbol {
1453        Symbol::intern(self.read_str())
1454    }
1455
1456    fn decode_byte_symbol(&mut self) -> ByteSymbol {
1457        ByteSymbol::intern(self.read_byte_str())
1458    }
1459
1460    fn decode_def_index(&mut self) -> DefIndex {
1461        panic!("cannot decode `DefIndex` with `MemDecoder`");
1462    }
1463}
1464
1465impl SpanDecoder for MemDecoder<'_> {
1466    fn decode_span(&mut self) -> Span {
1467        let lo = Decodable::decode(self);
1468        let hi = Decodable::decode(self);
1469
1470        Span::new(lo, hi, SyntaxContext::root(), None)
1471    }
1472
1473    fn decode_expn_id(&mut self) -> ExpnId {
1474        panic!("cannot decode `ExpnId` with `MemDecoder`");
1475    }
1476
1477    fn decode_syntax_context(&mut self) -> SyntaxContext {
1478        panic!("cannot decode `SyntaxContext` with `MemDecoder`");
1479    }
1480
1481    fn decode_crate_num(&mut self) -> CrateNum {
1482        CrateNum::from_u32(self.read_u32())
1483    }
1484
1485    fn decode_def_id(&mut self) -> DefId {
1486        DefId { krate: Decodable::decode(self), index: Decodable::decode(self) }
1487    }
1488
1489    fn decode_attr_id(&mut self) -> AttrId {
1490        panic!("cannot decode `AttrId` with `MemDecoder`");
1491    }
1492}
1493
1494impl<D: SpanDecoder> Decodable<D> for Span {
1495    fn decode(s: &mut D) -> Span {
1496        s.decode_span()
1497    }
1498}
1499
1500impl<D: BlobDecoder> Decodable<D> for Symbol {
1501    fn decode(s: &mut D) -> Symbol {
1502        s.decode_symbol()
1503    }
1504}
1505
1506impl<D: BlobDecoder> Decodable<D> for ByteSymbol {
1507    fn decode(s: &mut D) -> ByteSymbol {
1508        s.decode_byte_symbol()
1509    }
1510}
1511
1512impl<D: SpanDecoder> Decodable<D> for ExpnId {
1513    fn decode(s: &mut D) -> ExpnId {
1514        s.decode_expn_id()
1515    }
1516}
1517
1518impl<D: SpanDecoder> Decodable<D> for SyntaxContext {
1519    fn decode(s: &mut D) -> SyntaxContext {
1520        s.decode_syntax_context()
1521    }
1522}
1523
1524impl<D: SpanDecoder> Decodable<D> for CrateNum {
1525    fn decode(s: &mut D) -> CrateNum {
1526        s.decode_crate_num()
1527    }
1528}
1529
1530impl<D: BlobDecoder> Decodable<D> for DefIndex {
1531    fn decode(s: &mut D) -> DefIndex {
1532        s.decode_def_index()
1533    }
1534}
1535
1536impl<D: SpanDecoder> Decodable<D> for DefId {
1537    fn decode(s: &mut D) -> DefId {
1538        s.decode_def_id()
1539    }
1540}
1541
1542impl<D: SpanDecoder> Decodable<D> for AttrId {
1543    fn decode(s: &mut D) -> AttrId {
1544        s.decode_attr_id()
1545    }
1546}
1547
1548impl fmt::Debug for Span {
1549    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1550        // Use the global `SourceMap` to print the span. If that's not
1551        // available, fall back to printing the raw values.
1552
1553        fn fallback(span: Span, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1554            f.debug_struct("Span")
1555                .field("lo", &span.lo())
1556                .field("hi", &span.hi())
1557                .field("ctxt", &span.ctxt())
1558                .finish()
1559        }
1560
1561        if SESSION_GLOBALS.is_set() {
1562            with_session_globals(|session_globals| {
1563                if let Some(source_map) = &session_globals.source_map {
1564                    write!(f, "{} ({:?})", source_map.span_to_diagnostic_string(*self), self.ctxt())
1565                } else {
1566                    fallback(*self, f)
1567                }
1568            })
1569        } else {
1570            fallback(*self, f)
1571        }
1572    }
1573}
1574
1575impl fmt::Debug for SpanData {
1576    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1577        fmt::Debug::fmt(&self.span(), f)
1578    }
1579}
1580
1581/// Identifies an offset of a multi-byte character in a `SourceFile`.
1582#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
1583pub struct MultiByteChar {
1584    /// The relative offset of the character in the `SourceFile`.
1585    pub pos: RelativeBytePos,
1586    /// The number of bytes, `>= 2`.
1587    pub bytes: u8,
1588}
1589
1590/// Identifies an offset of a character that was normalized away from `SourceFile`.
1591#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
1592pub struct NormalizedPos {
1593    /// The relative offset of the character in the `SourceFile`.
1594    pub pos: RelativeBytePos,
1595    /// The difference between original and normalized string at position.
1596    pub diff: u32,
1597}
1598
1599#[derive(PartialEq, Eq, Clone, Debug)]
1600pub enum ExternalSource {
1601    /// No external source has to be loaded, since the `SourceFile` represents a local crate.
1602    Unneeded,
1603    Foreign {
1604        kind: ExternalSourceKind,
1605        /// Index of the file inside metadata.
1606        metadata_index: u32,
1607    },
1608}
1609
1610/// The state of the lazy external source loading mechanism of a `SourceFile`.
1611#[derive(PartialEq, Eq, Clone, Debug)]
1612pub enum ExternalSourceKind {
1613    /// The external source has been loaded already.
1614    Present(Arc<String>),
1615    /// No attempt has been made to load the external source.
1616    AbsentOk,
1617    /// A failed attempt has been made to load the external source.
1618    AbsentErr,
1619}
1620
1621impl ExternalSource {
1622    pub fn get_source(&self) -> Option<&str> {
1623        match self {
1624            ExternalSource::Foreign { kind: ExternalSourceKind::Present(src), .. } => Some(src),
1625            _ => None,
1626        }
1627    }
1628}
1629
1630#[derive(Debug)]
1631pub struct OffsetOverflowError;
1632
1633#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable, Decodable)]
1634#[derive(HashStable_Generic)]
1635pub enum SourceFileHashAlgorithm {
1636    Md5,
1637    Sha1,
1638    Sha256,
1639    Blake3,
1640}
1641
1642impl Display for SourceFileHashAlgorithm {
1643    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1644        f.write_str(match self {
1645            Self::Md5 => "md5",
1646            Self::Sha1 => "sha1",
1647            Self::Sha256 => "sha256",
1648            Self::Blake3 => "blake3",
1649        })
1650    }
1651}
1652
1653impl FromStr for SourceFileHashAlgorithm {
1654    type Err = ();
1655
1656    fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> {
1657        match s {
1658            "md5" => Ok(SourceFileHashAlgorithm::Md5),
1659            "sha1" => Ok(SourceFileHashAlgorithm::Sha1),
1660            "sha256" => Ok(SourceFileHashAlgorithm::Sha256),
1661            "blake3" => Ok(SourceFileHashAlgorithm::Blake3),
1662            _ => Err(()),
1663        }
1664    }
1665}
1666
1667/// The hash of the on-disk source file used for debug info and cargo freshness checks.
1668#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
1669#[derive(HashStable_Generic, Encodable, Decodable)]
1670pub struct SourceFileHash {
1671    pub kind: SourceFileHashAlgorithm,
1672    value: [u8; 32],
1673}
1674
1675impl Display for SourceFileHash {
1676    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1677        write!(f, "{}=", self.kind)?;
1678        for byte in self.value[0..self.hash_len()].into_iter() {
1679            write!(f, "{byte:02x}")?;
1680        }
1681        Ok(())
1682    }
1683}
1684
1685impl SourceFileHash {
1686    pub fn new_in_memory(kind: SourceFileHashAlgorithm, src: impl AsRef<[u8]>) -> SourceFileHash {
1687        let mut hash = SourceFileHash { kind, value: Default::default() };
1688        let len = hash.hash_len();
1689        let value = &mut hash.value[..len];
1690        let data = src.as_ref();
1691        match kind {
1692            SourceFileHashAlgorithm::Md5 => {
1693                value.copy_from_slice(&Md5::digest(data));
1694            }
1695            SourceFileHashAlgorithm::Sha1 => {
1696                value.copy_from_slice(&Sha1::digest(data));
1697            }
1698            SourceFileHashAlgorithm::Sha256 => {
1699                value.copy_from_slice(&Sha256::digest(data));
1700            }
1701            SourceFileHashAlgorithm::Blake3 => value.copy_from_slice(blake3::hash(data).as_bytes()),
1702        };
1703        hash
1704    }
1705
1706    pub fn new(kind: SourceFileHashAlgorithm, src: impl Read) -> Result<SourceFileHash, io::Error> {
1707        let mut hash = SourceFileHash { kind, value: Default::default() };
1708        let len = hash.hash_len();
1709        let value = &mut hash.value[..len];
1710        // Buffer size is the recommended amount to fully leverage SIMD instructions on AVX-512 as per
1711        // blake3 documentation.
1712        let mut buf = vec![0; 16 * 1024];
1713
1714        fn digest<T>(
1715            mut hasher: T,
1716            mut update: impl FnMut(&mut T, &[u8]),
1717            finish: impl FnOnce(T, &mut [u8]),
1718            mut src: impl Read,
1719            buf: &mut [u8],
1720            value: &mut [u8],
1721        ) -> Result<(), io::Error> {
1722            loop {
1723                let bytes_read = src.read(buf)?;
1724                if bytes_read == 0 {
1725                    break;
1726                }
1727                update(&mut hasher, &buf[0..bytes_read]);
1728            }
1729            finish(hasher, value);
1730            Ok(())
1731        }
1732
1733        match kind {
1734            SourceFileHashAlgorithm::Sha256 => {
1735                digest(
1736                    Sha256::new(),
1737                    |h, b| {
1738                        h.update(b);
1739                    },
1740                    |h, out| out.copy_from_slice(&h.finalize()),
1741                    src,
1742                    &mut buf,
1743                    value,
1744                )?;
1745            }
1746            SourceFileHashAlgorithm::Sha1 => {
1747                digest(
1748                    Sha1::new(),
1749                    |h, b| {
1750                        h.update(b);
1751                    },
1752                    |h, out| out.copy_from_slice(&h.finalize()),
1753                    src,
1754                    &mut buf,
1755                    value,
1756                )?;
1757            }
1758            SourceFileHashAlgorithm::Md5 => {
1759                digest(
1760                    Md5::new(),
1761                    |h, b| {
1762                        h.update(b);
1763                    },
1764                    |h, out| out.copy_from_slice(&h.finalize()),
1765                    src,
1766                    &mut buf,
1767                    value,
1768                )?;
1769            }
1770            SourceFileHashAlgorithm::Blake3 => {
1771                digest(
1772                    blake3::Hasher::new(),
1773                    |h, b| {
1774                        h.update(b);
1775                    },
1776                    |h, out| out.copy_from_slice(h.finalize().as_bytes()),
1777                    src,
1778                    &mut buf,
1779                    value,
1780                )?;
1781            }
1782        }
1783        Ok(hash)
1784    }
1785
1786    /// Check if the stored hash matches the hash of the string.
1787    pub fn matches(&self, src: &str) -> bool {
1788        Self::new_in_memory(self.kind, src.as_bytes()) == *self
1789    }
1790
1791    /// The bytes of the hash.
1792    pub fn hash_bytes(&self) -> &[u8] {
1793        let len = self.hash_len();
1794        &self.value[..len]
1795    }
1796
1797    fn hash_len(&self) -> usize {
1798        match self.kind {
1799            SourceFileHashAlgorithm::Md5 => 16,
1800            SourceFileHashAlgorithm::Sha1 => 20,
1801            SourceFileHashAlgorithm::Sha256 | SourceFileHashAlgorithm::Blake3 => 32,
1802        }
1803    }
1804}
1805
1806#[derive(Clone)]
1807pub enum SourceFileLines {
1808    /// The source file lines, in decoded (random-access) form.
1809    Lines(Vec<RelativeBytePos>),
1810
1811    /// The source file lines, in undecoded difference list form.
1812    Diffs(SourceFileDiffs),
1813}
1814
1815impl SourceFileLines {
1816    pub fn is_lines(&self) -> bool {
1817        matches!(self, SourceFileLines::Lines(_))
1818    }
1819}
1820
1821/// The source file lines in difference list form. This matches the form
1822/// used within metadata, which saves space by exploiting the fact that the
1823/// lines list is sorted and individual lines are usually not that long.
1824///
1825/// We read it directly from metadata and only decode it into `Lines` form
1826/// when necessary. This is a significant performance win, especially for
1827/// small crates where very little of `std`'s metadata is used.
1828#[derive(Clone)]
1829pub struct SourceFileDiffs {
1830    /// Always 1, 2, or 4. Always as small as possible, while being big
1831    /// enough to hold the length of the longest line in the source file.
1832    /// The 1 case is by far the most common.
1833    bytes_per_diff: usize,
1834
1835    /// The number of diffs encoded in `raw_diffs`. Always one less than
1836    /// the number of lines in the source file.
1837    num_diffs: usize,
1838
1839    /// The diffs in "raw" form. Each segment of `bytes_per_diff` length
1840    /// encodes one little-endian diff. Note that they aren't LEB128
1841    /// encoded. This makes for much faster decoding. Besides, the
1842    /// bytes_per_diff==1 case is by far the most common, and LEB128
1843    /// encoding has no effect on that case.
1844    raw_diffs: Vec<u8>,
1845}
1846
1847/// A single source in the [`SourceMap`].
1848pub struct SourceFile {
1849    /// The name of the file that the source came from. Source that doesn't
1850    /// originate from files has names between angle brackets by convention
1851    /// (e.g., `<anon>`).
1852    pub name: FileName,
1853    /// The complete source code.
1854    pub src: Option<Arc<String>>,
1855    /// The source code's hash.
1856    pub src_hash: SourceFileHash,
1857    /// Used to enable cargo to use checksums to check if a crate is fresh rather
1858    /// than mtimes. This might be the same as `src_hash`, and if the requested algorithm
1859    /// is identical we won't compute it twice.
1860    pub checksum_hash: Option<SourceFileHash>,
1861    /// The external source code (used for external crates, which will have a `None`
1862    /// value as `self.src`.
1863    pub external_src: FreezeLock<ExternalSource>,
1864    /// The start position of this source in the `SourceMap`.
1865    pub start_pos: BytePos,
1866    /// The byte length of this source after normalization.
1867    pub normalized_source_len: RelativeBytePos,
1868    /// The byte length of this source before normalization.
1869    pub unnormalized_source_len: u32,
1870    /// Locations of lines beginnings in the source code.
1871    pub lines: FreezeLock<SourceFileLines>,
1872    /// Locations of multi-byte characters in the source code.
1873    pub multibyte_chars: Vec<MultiByteChar>,
1874    /// Locations of characters removed during normalization.
1875    pub normalized_pos: Vec<NormalizedPos>,
1876    /// A hash of the filename & crate-id, used for uniquely identifying source
1877    /// files within the crate graph and for speeding up hashing in incremental
1878    /// compilation.
1879    pub stable_id: StableSourceFileId,
1880    /// Indicates which crate this `SourceFile` was imported from.
1881    pub cnum: CrateNum,
1882}
1883
1884impl Clone for SourceFile {
1885    fn clone(&self) -> Self {
1886        Self {
1887            name: self.name.clone(),
1888            src: self.src.clone(),
1889            src_hash: self.src_hash,
1890            checksum_hash: self.checksum_hash,
1891            external_src: self.external_src.clone(),
1892            start_pos: self.start_pos,
1893            normalized_source_len: self.normalized_source_len,
1894            unnormalized_source_len: self.unnormalized_source_len,
1895            lines: self.lines.clone(),
1896            multibyte_chars: self.multibyte_chars.clone(),
1897            normalized_pos: self.normalized_pos.clone(),
1898            stable_id: self.stable_id,
1899            cnum: self.cnum,
1900        }
1901    }
1902}
1903
1904impl<S: SpanEncoder> Encodable<S> for SourceFile {
1905    fn encode(&self, s: &mut S) {
1906        self.name.encode(s);
1907        self.src_hash.encode(s);
1908        self.checksum_hash.encode(s);
1909        // Do not encode `start_pos` as it's global state for this session.
1910        self.normalized_source_len.encode(s);
1911        self.unnormalized_source_len.encode(s);
1912
1913        // We are always in `Lines` form by the time we reach here.
1914        assert!(self.lines.read().is_lines());
1915        let lines = self.lines();
1916        // Store the length.
1917        s.emit_u32(lines.len() as u32);
1918
1919        // Compute and store the difference list.
1920        if lines.len() != 0 {
1921            let max_line_length = if lines.len() == 1 {
1922                0
1923            } else {
1924                lines
1925                    .array_windows()
1926                    .map(|&[fst, snd]| snd - fst)
1927                    .map(|bp| bp.to_usize())
1928                    .max()
1929                    .unwrap()
1930            };
1931
1932            let bytes_per_diff: usize = match max_line_length {
1933                0..=0xFF => 1,
1934                0x100..=0xFFFF => 2,
1935                _ => 4,
1936            };
1937
1938            // Encode the number of bytes used per diff.
1939            s.emit_u8(bytes_per_diff as u8);
1940
1941            // Encode the first element.
1942            assert_eq!(lines[0], RelativeBytePos(0));
1943
1944            // Encode the difference list.
1945            let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst);
1946            let num_diffs = lines.len() - 1;
1947            let mut raw_diffs;
1948            match bytes_per_diff {
1949                1 => {
1950                    raw_diffs = Vec::with_capacity(num_diffs);
1951                    for diff in diff_iter {
1952                        raw_diffs.push(diff.0 as u8);
1953                    }
1954                }
1955                2 => {
1956                    raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
1957                    for diff in diff_iter {
1958                        raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes());
1959                    }
1960                }
1961                4 => {
1962                    raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
1963                    for diff in diff_iter {
1964                        raw_diffs.extend_from_slice(&(diff.0).to_le_bytes());
1965                    }
1966                }
1967                _ => unreachable!(),
1968            }
1969            s.emit_raw_bytes(&raw_diffs);
1970        }
1971
1972        self.multibyte_chars.encode(s);
1973        self.stable_id.encode(s);
1974        self.normalized_pos.encode(s);
1975        self.cnum.encode(s);
1976    }
1977}
1978
1979impl<D: SpanDecoder> Decodable<D> for SourceFile {
1980    fn decode(d: &mut D) -> SourceFile {
1981        let name: FileName = Decodable::decode(d);
1982        let src_hash: SourceFileHash = Decodable::decode(d);
1983        let checksum_hash: Option<SourceFileHash> = Decodable::decode(d);
1984        let normalized_source_len: RelativeBytePos = Decodable::decode(d);
1985        let unnormalized_source_len = Decodable::decode(d);
1986        let lines = {
1987            let num_lines: u32 = Decodable::decode(d);
1988            if num_lines > 0 {
1989                // Read the number of bytes used per diff.
1990                let bytes_per_diff = d.read_u8() as usize;
1991
1992                // Read the difference list.
1993                let num_diffs = num_lines as usize - 1;
1994                let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec();
1995                SourceFileLines::Diffs(SourceFileDiffs { bytes_per_diff, num_diffs, raw_diffs })
1996            } else {
1997                SourceFileLines::Lines(vec![])
1998            }
1999        };
2000        let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
2001        let stable_id = Decodable::decode(d);
2002        let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
2003        let cnum: CrateNum = Decodable::decode(d);
2004        SourceFile {
2005            name,
2006            start_pos: BytePos::from_u32(0),
2007            normalized_source_len,
2008            unnormalized_source_len,
2009            src: None,
2010            src_hash,
2011            checksum_hash,
2012            // Unused - the metadata decoder will construct
2013            // a new SourceFile, filling in `external_src` properly
2014            external_src: FreezeLock::frozen(ExternalSource::Unneeded),
2015            lines: FreezeLock::new(lines),
2016            multibyte_chars,
2017            normalized_pos,
2018            stable_id,
2019            cnum,
2020        }
2021    }
2022}
2023
2024impl fmt::Debug for SourceFile {
2025    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
2026        write!(fmt, "SourceFile({:?})", self.name)
2027    }
2028}
2029
2030/// This is a [SourceFile] identifier that is used to correlate source files between
2031/// subsequent compilation sessions (which is something we need to do during
2032/// incremental compilation).
2033///
2034/// It is a hash value (so we can efficiently consume it when stable-hashing
2035/// spans) that consists of the `FileName` and the `StableCrateId` of the crate
2036/// the source file is from. The crate id is needed because sometimes the
2037/// `FileName` is not unique within the crate graph (think `src/lib.rs`, for
2038/// example).
2039///
2040/// The way the crate-id part is handled is a bit special: source files of the
2041/// local crate are hashed as `(filename, None)`, while source files from
2042/// upstream crates have a hash of `(filename, Some(stable_crate_id))`. This
2043/// is because SourceFiles for the local crate are allocated very early in the
2044/// compilation process when the `StableCrateId` is not yet known. If, due to
2045/// some refactoring of the compiler, the `StableCrateId` of the local crate
2046/// were to become available, it would be better to uniformly make this a
2047/// hash of `(filename, stable_crate_id)`.
2048///
2049/// When `SourceFile`s are exported in crate metadata, the `StableSourceFileId`
2050/// is updated to incorporate the `StableCrateId` of the exporting crate.
2051#[derive(
2052    Debug,
2053    Clone,
2054    Copy,
2055    Hash,
2056    PartialEq,
2057    Eq,
2058    HashStable_Generic,
2059    Encodable,
2060    Decodable,
2061    Default,
2062    PartialOrd,
2063    Ord
2064)]
2065pub struct StableSourceFileId(Hash128);
2066
2067impl StableSourceFileId {
2068    fn from_filename_in_current_crate(filename: &FileName) -> Self {
2069        Self::from_filename_and_stable_crate_id(filename, None)
2070    }
2071
2072    pub fn from_filename_for_export(
2073        filename: &FileName,
2074        local_crate_stable_crate_id: StableCrateId,
2075    ) -> Self {
2076        Self::from_filename_and_stable_crate_id(filename, Some(local_crate_stable_crate_id))
2077    }
2078
2079    fn from_filename_and_stable_crate_id(
2080        filename: &FileName,
2081        stable_crate_id: Option<StableCrateId>,
2082    ) -> Self {
2083        let mut hasher = StableHasher::new();
2084        filename.hash(&mut hasher);
2085        stable_crate_id.hash(&mut hasher);
2086        StableSourceFileId(hasher.finish())
2087    }
2088}
2089
2090impl SourceFile {
2091    const MAX_FILE_SIZE: u32 = u32::MAX - 1;
2092
2093    pub fn new(
2094        name: FileName,
2095        mut src: String,
2096        hash_kind: SourceFileHashAlgorithm,
2097        checksum_hash_kind: Option<SourceFileHashAlgorithm>,
2098    ) -> Result<Self, OffsetOverflowError> {
2099        // Compute the file hash before any normalization.
2100        let src_hash = SourceFileHash::new_in_memory(hash_kind, src.as_bytes());
2101        let checksum_hash = checksum_hash_kind.map(|checksum_hash_kind| {
2102            if checksum_hash_kind == hash_kind {
2103                src_hash
2104            } else {
2105                SourceFileHash::new_in_memory(checksum_hash_kind, src.as_bytes())
2106            }
2107        });
2108        // Capture the original source length before normalization.
2109        let unnormalized_source_len = u32::try_from(src.len()).map_err(|_| OffsetOverflowError)?;
2110        if unnormalized_source_len > Self::MAX_FILE_SIZE {
2111            return Err(OffsetOverflowError);
2112        }
2113
2114        let normalized_pos = normalize_src(&mut src);
2115
2116        let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
2117        let normalized_source_len = u32::try_from(src.len()).map_err(|_| OffsetOverflowError)?;
2118        if normalized_source_len > Self::MAX_FILE_SIZE {
2119            return Err(OffsetOverflowError);
2120        }
2121
2122        let (lines, multibyte_chars) = analyze_source_file::analyze_source_file(&src);
2123
2124        Ok(SourceFile {
2125            name,
2126            src: Some(Arc::new(src)),
2127            src_hash,
2128            checksum_hash,
2129            external_src: FreezeLock::frozen(ExternalSource::Unneeded),
2130            start_pos: BytePos::from_u32(0),
2131            normalized_source_len: RelativeBytePos::from_u32(normalized_source_len),
2132            unnormalized_source_len,
2133            lines: FreezeLock::frozen(SourceFileLines::Lines(lines)),
2134            multibyte_chars,
2135            normalized_pos,
2136            stable_id,
2137            cnum: LOCAL_CRATE,
2138        })
2139    }
2140
2141    /// This converts the `lines` field to contain `SourceFileLines::Lines` if needed and freezes
2142    /// it.
2143    fn convert_diffs_to_lines_frozen(&self) {
2144        let mut guard = if let Some(guard) = self.lines.try_write() { guard } else { return };
2145
2146        let SourceFileDiffs { bytes_per_diff, num_diffs, raw_diffs } = match &*guard {
2147            SourceFileLines::Diffs(diffs) => diffs,
2148            SourceFileLines::Lines(..) => {
2149                FreezeWriteGuard::freeze(guard);
2150                return;
2151            }
2152        };
2153
2154        // Convert from "diffs" form to "lines" form.
2155        let num_lines = num_diffs + 1;
2156        let mut lines = Vec::with_capacity(num_lines);
2157        let mut line_start = RelativeBytePos(0);
2158        lines.push(line_start);
2159
2160        assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff);
2161        match bytes_per_diff {
2162            1 => {
2163                lines.extend(raw_diffs.into_iter().map(|&diff| {
2164                    line_start = line_start + RelativeBytePos(diff as u32);
2165                    line_start
2166                }));
2167            }
2168            2 => {
2169                lines.extend((0..*num_diffs).map(|i| {
2170                    let pos = bytes_per_diff * i;
2171                    let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
2172                    let diff = u16::from_le_bytes(bytes);
2173                    line_start = line_start + RelativeBytePos(diff as u32);
2174                    line_start
2175                }));
2176            }
2177            4 => {
2178                lines.extend((0..*num_diffs).map(|i| {
2179                    let pos = bytes_per_diff * i;
2180                    let bytes = [
2181                        raw_diffs[pos],
2182                        raw_diffs[pos + 1],
2183                        raw_diffs[pos + 2],
2184                        raw_diffs[pos + 3],
2185                    ];
2186                    let diff = u32::from_le_bytes(bytes);
2187                    line_start = line_start + RelativeBytePos(diff);
2188                    line_start
2189                }));
2190            }
2191            _ => unreachable!(),
2192        }
2193
2194        *guard = SourceFileLines::Lines(lines);
2195
2196        FreezeWriteGuard::freeze(guard);
2197    }
2198
2199    pub fn lines(&self) -> &[RelativeBytePos] {
2200        if let Some(SourceFileLines::Lines(lines)) = self.lines.get() {
2201            return &lines[..];
2202        }
2203
2204        outline(|| {
2205            self.convert_diffs_to_lines_frozen();
2206            if let Some(SourceFileLines::Lines(lines)) = self.lines.get() {
2207                return &lines[..];
2208            }
2209            unreachable!()
2210        })
2211    }
2212
2213    /// Returns the `BytePos` of the beginning of the current line.
2214    pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
2215        let pos = self.relative_position(pos);
2216        let line_index = self.lookup_line(pos).unwrap();
2217        let line_start_pos = self.lines()[line_index];
2218        self.absolute_position(line_start_pos)
2219    }
2220
2221    /// Add externally loaded source.
2222    /// If the hash of the input doesn't match or no input is supplied via None,
2223    /// it is interpreted as an error and the corresponding enum variant is set.
2224    /// The return value signifies whether some kind of source is present.
2225    pub fn add_external_src<F>(&self, get_src: F) -> bool
2226    where
2227        F: FnOnce() -> Option<String>,
2228    {
2229        if !self.external_src.is_frozen() {
2230            let src = get_src();
2231            let src = src.and_then(|mut src| {
2232                // The src_hash needs to be computed on the pre-normalized src.
2233                self.src_hash.matches(&src).then(|| {
2234                    normalize_src(&mut src);
2235                    src
2236                })
2237            });
2238
2239            self.external_src.try_write().map(|mut external_src| {
2240                if let ExternalSource::Foreign {
2241                    kind: src_kind @ ExternalSourceKind::AbsentOk,
2242                    ..
2243                } = &mut *external_src
2244                {
2245                    *src_kind = if let Some(src) = src {
2246                        ExternalSourceKind::Present(Arc::new(src))
2247                    } else {
2248                        ExternalSourceKind::AbsentErr
2249                    };
2250                } else {
2251                    panic!("unexpected state {:?}", *external_src)
2252                }
2253
2254                // Freeze this so we don't try to load the source again.
2255                FreezeWriteGuard::freeze(external_src)
2256            });
2257        }
2258
2259        self.src.is_some() || self.external_src.read().get_source().is_some()
2260    }
2261
2262    /// Gets a line from the list of pre-computed line-beginnings.
2263    /// The line number here is 0-based.
2264    pub fn get_line(&self, line_number: usize) -> Option<Cow<'_, str>> {
2265        fn get_until_newline(src: &str, begin: usize) -> &str {
2266            // We can't use `lines.get(line_number+1)` because we might
2267            // be parsing when we call this function and thus the current
2268            // line is the last one we have line info for.
2269            let slice = &src[begin..];
2270            match slice.find('\n') {
2271                Some(e) => &slice[..e],
2272                None => slice,
2273            }
2274        }
2275
2276        let begin = {
2277            let line = self.lines().get(line_number).copied()?;
2278            line.to_usize()
2279        };
2280
2281        if let Some(ref src) = self.src {
2282            Some(Cow::from(get_until_newline(src, begin)))
2283        } else {
2284            self.external_src
2285                .borrow()
2286                .get_source()
2287                .map(|src| Cow::Owned(String::from(get_until_newline(src, begin))))
2288        }
2289    }
2290
2291    pub fn is_real_file(&self) -> bool {
2292        self.name.is_real()
2293    }
2294
2295    #[inline]
2296    pub fn is_imported(&self) -> bool {
2297        self.src.is_none()
2298    }
2299
2300    pub fn count_lines(&self) -> usize {
2301        self.lines().len()
2302    }
2303
2304    #[inline]
2305    pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
2306        BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
2307    }
2308
2309    #[inline]
2310    pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
2311        RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
2312    }
2313
2314    #[inline]
2315    pub fn end_position(&self) -> BytePos {
2316        self.absolute_position(self.normalized_source_len)
2317    }
2318
2319    /// Finds the line containing the given position. The return value is the
2320    /// index into the `lines` array of this `SourceFile`, not the 1-based line
2321    /// number. If the source_file is empty or the position is located before the
2322    /// first line, `None` is returned.
2323    pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
2324        self.lines().partition_point(|x| x <= &pos).checked_sub(1)
2325    }
2326
2327    pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
2328        if self.is_empty() {
2329            return self.start_pos..self.start_pos;
2330        }
2331
2332        let lines = self.lines();
2333        assert!(line_index < lines.len());
2334        if line_index == (lines.len() - 1) {
2335            self.absolute_position(lines[line_index])..self.end_position()
2336        } else {
2337            self.absolute_position(lines[line_index])..self.absolute_position(lines[line_index + 1])
2338        }
2339    }
2340
2341    /// Returns whether or not the file contains the given `SourceMap` byte
2342    /// position. The position one past the end of the file is considered to be
2343    /// contained by the file. This implies that files for which `is_empty`
2344    /// returns true still contain one byte position according to this function.
2345    #[inline]
2346    pub fn contains(&self, byte_pos: BytePos) -> bool {
2347        byte_pos >= self.start_pos && byte_pos <= self.end_position()
2348    }
2349
2350    #[inline]
2351    pub fn is_empty(&self) -> bool {
2352        self.normalized_source_len.to_u32() == 0
2353    }
2354
2355    /// Calculates the original byte position relative to the start of the file
2356    /// based on the given byte position.
2357    pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
2358        let pos = self.relative_position(pos);
2359
2360        // Diff before any records is 0. Otherwise use the previously recorded
2361        // diff as that applies to the following characters until a new diff
2362        // is recorded.
2363        let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) {
2364            Ok(i) => self.normalized_pos[i].diff,
2365            Err(0) => 0,
2366            Err(i) => self.normalized_pos[i - 1].diff,
2367        };
2368
2369        RelativeBytePos::from_u32(pos.0 + diff)
2370    }
2371
2372    /// Calculates a normalized byte position from a byte offset relative to the
2373    /// start of the file.
2374    ///
2375    /// When we get an inline assembler error from LLVM during codegen, we
2376    /// import the expanded assembly code as a new `SourceFile`, which can then
2377    /// be used for error reporting with spans. However the byte offsets given
2378    /// to us by LLVM are relative to the start of the original buffer, not the
2379    /// normalized one. Hence we need to convert those offsets to the normalized
2380    /// form when constructing spans.
2381    pub fn normalized_byte_pos(&self, offset: u32) -> BytePos {
2382        let diff = match self
2383            .normalized_pos
2384            .binary_search_by(|np| (np.pos.0 + np.diff).cmp(&(self.start_pos.0 + offset)))
2385        {
2386            Ok(i) => self.normalized_pos[i].diff,
2387            Err(0) => 0,
2388            Err(i) => self.normalized_pos[i - 1].diff,
2389        };
2390
2391        BytePos::from_u32(self.start_pos.0 + offset - diff)
2392    }
2393
2394    /// Converts an relative `RelativeBytePos` to a `CharPos` relative to the `SourceFile`.
2395    fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
2396        // The number of extra bytes due to multibyte chars in the `SourceFile`.
2397        let mut total_extra_bytes = 0;
2398
2399        for mbc in self.multibyte_chars.iter() {
2400            debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
2401            if mbc.pos < bpos {
2402                // Every character is at least one byte, so we only
2403                // count the actual extra bytes.
2404                total_extra_bytes += mbc.bytes as u32 - 1;
2405                // We should never see a byte position in the middle of a
2406                // character.
2407                assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
2408            } else {
2409                break;
2410            }
2411        }
2412
2413        assert!(total_extra_bytes <= bpos.to_u32());
2414        CharPos(bpos.to_usize() - total_extra_bytes as usize)
2415    }
2416
2417    /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
2418    /// given `RelativeBytePos`.
2419    fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
2420        let chpos = self.bytepos_to_file_charpos(pos);
2421        match self.lookup_line(pos) {
2422            Some(a) => {
2423                let line = a + 1; // Line numbers start at 1
2424                let linebpos = self.lines()[a];
2425                let linechpos = self.bytepos_to_file_charpos(linebpos);
2426                let col = chpos - linechpos;
2427                debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos);
2428                debug!("char pos {:?} is on the line at char pos {:?}", chpos, linechpos);
2429                debug!("byte is on line: {}", line);
2430                assert!(chpos >= linechpos);
2431                (line, col)
2432            }
2433            None => (0, chpos),
2434        }
2435    }
2436
2437    /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
2438    /// column offset when displayed, for a given `BytePos`.
2439    pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
2440        let pos = self.relative_position(pos);
2441        let (line, col_or_chpos) = self.lookup_file_pos(pos);
2442        if line > 0 {
2443            let Some(code) = self.get_line(line - 1) else {
2444                // If we don't have the code available, it is ok as a fallback to return the bytepos
2445                // instead of the "display" column, which is only used to properly show underlines
2446                // in the terminal.
2447                // FIXME: we'll want better handling of this in the future for the sake of tools
2448                // that want to use the display col instead of byte offsets to modify Rust code, but
2449                // that is a problem for another day, the previous code was already incorrect for
2450                // both displaying *and* third party tools using the json output naïvely.
2451                tracing::info!("couldn't find line {line} {:?}", self.name);
2452                return (line, col_or_chpos, col_or_chpos.0);
2453            };
2454            let display_col = code.chars().take(col_or_chpos.0).map(|ch| char_width(ch)).sum();
2455            (line, col_or_chpos, display_col)
2456        } else {
2457            // This is never meant to happen?
2458            (0, col_or_chpos, col_or_chpos.0)
2459        }
2460    }
2461}
2462
2463pub fn char_width(ch: char) -> usize {
2464    // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now,
2465    // just accept that sometimes the code line will be longer than desired.
2466    match ch {
2467        '\t' => 4,
2468        // Keep the following list in sync with `rustc_errors::emitter::OUTPUT_REPLACEMENTS`. These
2469        // are control points that we replace before printing with a visible codepoint for the sake
2470        // of being able to point at them with underlines.
2471        '\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}'
2472        | '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}'
2473        | '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}'
2474        | '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}'
2475        | '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}'
2476        | '\u{007F}' | '\u{202A}' | '\u{202B}' | '\u{202D}' | '\u{202E}' | '\u{2066}'
2477        | '\u{2067}' | '\u{2068}' | '\u{202C}' | '\u{2069}' => 1,
2478        _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
2479    }
2480}
2481
2482pub fn str_width(s: &str) -> usize {
2483    s.chars().map(char_width).sum()
2484}
2485
2486/// Normalizes the source code and records the normalizations.
2487fn normalize_src(src: &mut String) -> Vec<NormalizedPos> {
2488    let mut normalized_pos = vec![];
2489    remove_bom(src, &mut normalized_pos);
2490    normalize_newlines(src, &mut normalized_pos);
2491    normalized_pos
2492}
2493
2494/// Removes UTF-8 BOM, if any.
2495fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
2496    if src.starts_with('\u{feff}') {
2497        src.drain(..3);
2498        normalized_pos.push(NormalizedPos { pos: RelativeBytePos(0), diff: 3 });
2499    }
2500}
2501
2502/// Replaces `\r\n` with `\n` in-place in `src`.
2503///
2504/// Leaves any occurrences of lone `\r` unchanged.
2505fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
2506    if !src.as_bytes().contains(&b'\r') {
2507        return;
2508    }
2509
2510    // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
2511    // While we *can* call `as_mut_vec` and do surgery on the live string
2512    // directly, let's rather steal the contents of `src`. This makes the code
2513    // safe even if a panic occurs.
2514
2515    let mut buf = std::mem::replace(src, String::new()).into_bytes();
2516    let mut gap_len = 0;
2517    let mut tail = buf.as_mut_slice();
2518    let mut cursor = 0;
2519    let original_gap = normalized_pos.last().map_or(0, |l| l.diff);
2520    loop {
2521        let idx = match find_crlf(&tail[gap_len..]) {
2522            None => tail.len(),
2523            Some(idx) => idx + gap_len,
2524        };
2525        tail.copy_within(gap_len..idx, 0);
2526        tail = &mut tail[idx - gap_len..];
2527        if tail.len() == gap_len {
2528            break;
2529        }
2530        cursor += idx - gap_len;
2531        gap_len += 1;
2532        normalized_pos.push(NormalizedPos {
2533            pos: RelativeBytePos::from_usize(cursor + 1),
2534            diff: original_gap + gap_len as u32,
2535        });
2536    }
2537
2538    // Account for removed `\r`.
2539    // After `set_len`, `buf` is guaranteed to contain utf-8 again.
2540    let new_len = buf.len() - gap_len;
2541    unsafe {
2542        buf.set_len(new_len);
2543        *src = String::from_utf8_unchecked(buf);
2544    }
2545
2546    fn find_crlf(src: &[u8]) -> Option<usize> {
2547        let mut search_idx = 0;
2548        while let Some(idx) = find_cr(&src[search_idx..]) {
2549            if src[search_idx..].get(idx + 1) != Some(&b'\n') {
2550                search_idx += idx + 1;
2551                continue;
2552            }
2553            return Some(search_idx + idx);
2554        }
2555        None
2556    }
2557
2558    fn find_cr(src: &[u8]) -> Option<usize> {
2559        src.iter().position(|&b| b == b'\r')
2560    }
2561}
2562
2563// _____________________________________________________________________________
2564// Pos, BytePos, CharPos
2565//
2566
2567pub trait Pos {
2568    fn from_usize(n: usize) -> Self;
2569    fn to_usize(&self) -> usize;
2570    fn from_u32(n: u32) -> Self;
2571    fn to_u32(&self) -> u32;
2572}
2573
2574macro_rules! impl_pos {
2575    (
2576        $(
2577            $(#[$attr:meta])*
2578            $vis:vis struct $ident:ident($inner_vis:vis $inner_ty:ty);
2579        )*
2580    ) => {
2581        $(
2582            $(#[$attr])*
2583            $vis struct $ident($inner_vis $inner_ty);
2584
2585            impl Pos for $ident {
2586                #[inline(always)]
2587                fn from_usize(n: usize) -> $ident {
2588                    $ident(n as $inner_ty)
2589                }
2590
2591                #[inline(always)]
2592                fn to_usize(&self) -> usize {
2593                    self.0 as usize
2594                }
2595
2596                #[inline(always)]
2597                fn from_u32(n: u32) -> $ident {
2598                    $ident(n as $inner_ty)
2599                }
2600
2601                #[inline(always)]
2602                fn to_u32(&self) -> u32 {
2603                    self.0 as u32
2604                }
2605            }
2606
2607            impl Add for $ident {
2608                type Output = $ident;
2609
2610                #[inline(always)]
2611                fn add(self, rhs: $ident) -> $ident {
2612                    $ident(self.0 + rhs.0)
2613                }
2614            }
2615
2616            impl Sub for $ident {
2617                type Output = $ident;
2618
2619                #[inline(always)]
2620                fn sub(self, rhs: $ident) -> $ident {
2621                    $ident(self.0 - rhs.0)
2622                }
2623            }
2624        )*
2625    };
2626}
2627
2628impl_pos! {
2629    /// A byte offset.
2630    ///
2631    /// Keep this small (currently 32-bits), as AST contains a lot of them.
2632    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
2633    pub struct BytePos(pub u32);
2634
2635    /// A byte offset relative to file beginning.
2636    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
2637    pub struct RelativeBytePos(pub u32);
2638
2639    /// A character offset.
2640    ///
2641    /// Because of multibyte UTF-8 characters, a byte offset
2642    /// is not equivalent to a character offset. The [`SourceMap`] will convert [`BytePos`]
2643    /// values to `CharPos` values as necessary.
2644    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
2645    pub struct CharPos(pub usize);
2646}
2647
2648impl<S: Encoder> Encodable<S> for BytePos {
2649    fn encode(&self, s: &mut S) {
2650        s.emit_u32(self.0);
2651    }
2652}
2653
2654impl<D: Decoder> Decodable<D> for BytePos {
2655    fn decode(d: &mut D) -> BytePos {
2656        BytePos(d.read_u32())
2657    }
2658}
2659
2660impl<H: HashStableContext> HashStable<H> for RelativeBytePos {
2661    fn hash_stable(&self, hcx: &mut H, hasher: &mut StableHasher) {
2662        self.0.hash_stable(hcx, hasher);
2663    }
2664}
2665
2666impl<S: Encoder> Encodable<S> for RelativeBytePos {
2667    fn encode(&self, s: &mut S) {
2668        s.emit_u32(self.0);
2669    }
2670}
2671
2672impl<D: Decoder> Decodable<D> for RelativeBytePos {
2673    fn decode(d: &mut D) -> RelativeBytePos {
2674        RelativeBytePos(d.read_u32())
2675    }
2676}
2677
2678// _____________________________________________________________________________
2679// Loc, SourceFileAndLine, SourceFileAndBytePos
2680//
2681
2682/// A source code location used for error reporting.
2683#[derive(Debug, Clone)]
2684pub struct Loc {
2685    /// Information about the original source.
2686    pub file: Arc<SourceFile>,
2687    /// The (1-based) line number.
2688    pub line: usize,
2689    /// The (0-based) column offset.
2690    pub col: CharPos,
2691    /// The (0-based) column offset when displayed.
2692    pub col_display: usize,
2693}
2694
2695// Used to be structural records.
2696#[derive(Debug)]
2697pub struct SourceFileAndLine {
2698    pub sf: Arc<SourceFile>,
2699    /// Index of line, starting from 0.
2700    pub line: usize,
2701}
2702#[derive(Debug)]
2703pub struct SourceFileAndBytePos {
2704    pub sf: Arc<SourceFile>,
2705    pub pos: BytePos,
2706}
2707
2708#[derive(Copy, Clone, Debug, PartialEq, Eq)]
2709pub struct LineInfo {
2710    /// Index of line, starting from 0.
2711    pub line_index: usize,
2712
2713    /// Column in line where span begins, starting from 0.
2714    pub start_col: CharPos,
2715
2716    /// Column in line where span ends, starting from 0, exclusive.
2717    pub end_col: CharPos,
2718}
2719
2720pub struct FileLines {
2721    pub file: Arc<SourceFile>,
2722    pub lines: Vec<LineInfo>,
2723}
2724
2725pub static SPAN_TRACK: AtomicRef<fn(LocalDefId)> = AtomicRef::new(&((|_| {}) as fn(_)));
2726
2727// _____________________________________________________________________________
2728// SpanLinesError, SpanSnippetError, DistinctSources, MalformedSourceMapPositions
2729//
2730
2731pub type FileLinesResult = Result<FileLines, SpanLinesError>;
2732
2733#[derive(Clone, PartialEq, Eq, Debug)]
2734pub enum SpanLinesError {
2735    DistinctSources(Box<DistinctSources>),
2736}
2737
2738#[derive(Clone, PartialEq, Eq, Debug)]
2739pub enum SpanSnippetError {
2740    IllFormedSpan(Span),
2741    DistinctSources(Box<DistinctSources>),
2742    MalformedForSourcemap(MalformedSourceMapPositions),
2743    SourceNotAvailable { filename: FileName },
2744}
2745
2746#[derive(Clone, PartialEq, Eq, Debug)]
2747pub struct DistinctSources {
2748    pub begin: (FileName, BytePos),
2749    pub end: (FileName, BytePos),
2750}
2751
2752#[derive(Clone, PartialEq, Eq, Debug)]
2753pub struct MalformedSourceMapPositions {
2754    pub name: FileName,
2755    pub source_len: usize,
2756    pub begin_pos: BytePos,
2757    pub end_pos: BytePos,
2758}
2759
2760/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
2761#[derive(Copy, Clone, PartialEq, Eq, Debug)]
2762pub struct InnerSpan {
2763    pub start: usize,
2764    pub end: usize,
2765}
2766
2767impl InnerSpan {
2768    pub fn new(start: usize, end: usize) -> InnerSpan {
2769        InnerSpan { start, end }
2770    }
2771}
2772
2773/// Requirements for a `StableHashingContext` to be used in this crate.
2774///
2775/// This is a hack to allow using the [`HashStable_Generic`] derive macro
2776/// instead of implementing everything in rustc_middle.
2777pub trait HashStableContext {
2778    fn def_path_hash(&self, def_id: DefId) -> DefPathHash;
2779    fn hash_spans(&self) -> bool;
2780    /// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since
2781    /// we don't have easy access to a `Session`
2782    fn unstable_opts_incremental_ignore_spans(&self) -> bool;
2783    fn def_span(&self, def_id: LocalDefId) -> Span;
2784    fn span_data_to_lines_and_cols(
2785        &mut self,
2786        span: &SpanData,
2787    ) -> Option<(StableSourceFileId, usize, BytePos, usize, BytePos)>;
2788    fn hashing_controls(&self) -> HashingControls;
2789}
2790
2791impl<CTX> HashStable<CTX> for Span
2792where
2793    CTX: HashStableContext,
2794{
2795    /// Hashes a span in a stable way. We can't directly hash the span's `BytePos`
2796    /// fields (that would be similar to hashing pointers, since those are just
2797    /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column)
2798    /// triple, which stays the same even if the containing `SourceFile` has moved
2799    /// within the `SourceMap`.
2800    ///
2801    /// Also note that we are hashing byte offsets for the column, not unicode
2802    /// codepoint offsets. For the purpose of the hash that's sufficient.
2803    /// Also, hashing filenames is expensive so we avoid doing it twice when the
2804    /// span starts and ends in the same file, which is almost always the case.
2805    fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
2806        const TAG_VALID_SPAN: u8 = 0;
2807        const TAG_INVALID_SPAN: u8 = 1;
2808        const TAG_RELATIVE_SPAN: u8 = 2;
2809
2810        if !ctx.hash_spans() {
2811            return;
2812        }
2813
2814        let span = self.data_untracked();
2815        span.ctxt.hash_stable(ctx, hasher);
2816        span.parent.hash_stable(ctx, hasher);
2817
2818        if span.is_dummy() {
2819            Hash::hash(&TAG_INVALID_SPAN, hasher);
2820            return;
2821        }
2822
2823        if let Some(parent) = span.parent {
2824            let def_span = ctx.def_span(parent).data_untracked();
2825            if def_span.contains(span) {
2826                // This span is enclosed in a definition: only hash the relative position.
2827                Hash::hash(&TAG_RELATIVE_SPAN, hasher);
2828                (span.lo - def_span.lo).to_u32().hash_stable(ctx, hasher);
2829                (span.hi - def_span.lo).to_u32().hash_stable(ctx, hasher);
2830                return;
2831            }
2832        }
2833
2834        // If this is not an empty or invalid span, we want to hash the last
2835        // position that belongs to it, as opposed to hashing the first
2836        // position past it.
2837        let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span)
2838        else {
2839            Hash::hash(&TAG_INVALID_SPAN, hasher);
2840            return;
2841        };
2842
2843        Hash::hash(&TAG_VALID_SPAN, hasher);
2844        Hash::hash(&file, hasher);
2845
2846        // Hash both the length and the end location (line/column) of a span. If we
2847        // hash only the length, for example, then two otherwise equal spans with
2848        // different end locations will have the same hash. This can cause a problem
2849        // during incremental compilation wherein a previous result for a query that
2850        // depends on the end location of a span will be incorrectly reused when the
2851        // end location of the span it depends on has changed (see issue #74890). A
2852        // similar analysis applies if some query depends specifically on the length
2853        // of the span, but we only hash the end location. So hash both.
2854
2855        let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
2856        let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
2857        let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
2858        let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
2859        let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
2860        let len = (span.hi - span.lo).0;
2861        Hash::hash(&col_line, hasher);
2862        Hash::hash(&len, hasher);
2863    }
2864}
2865
2866/// Useful type to use with `Result<>` indicate that an error has already
2867/// been reported to the user, so no need to continue checking.
2868///
2869/// The `()` field is necessary: it is non-`pub`, which means values of this
2870/// type cannot be constructed outside of this crate.
2871#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
2872#[derive(HashStable_Generic)]
2873pub struct ErrorGuaranteed(());
2874
2875impl ErrorGuaranteed {
2876    /// Don't use this outside of `DiagCtxtInner::emit_diagnostic`!
2877    #[deprecated = "should only be used in `DiagCtxtInner::emit_diagnostic`"]
2878    pub fn unchecked_error_guaranteed() -> Self {
2879        ErrorGuaranteed(())
2880    }
2881
2882    pub fn raise_fatal(self) -> ! {
2883        FatalError.raise()
2884    }
2885}
2886
2887impl<E: rustc_serialize::Encoder> Encodable<E> for ErrorGuaranteed {
2888    #[inline]
2889    fn encode(&self, _e: &mut E) {
2890        panic!(
2891            "should never serialize an `ErrorGuaranteed`, as we do not write metadata or \
2892            incremental caches in case errors occurred"
2893        )
2894    }
2895}
2896impl<D: rustc_serialize::Decoder> Decodable<D> for ErrorGuaranteed {
2897    #[inline]
2898    fn decode(_d: &mut D) -> ErrorGuaranteed {
2899        panic!(
2900            "`ErrorGuaranteed` should never have been serialized to metadata or incremental caches"
2901        )
2902    }
2903}