Skip to main content

cargo/core/
source_id.rs

1use crate::core::GitReference;
2use crate::core::SourceKind;
3use crate::sources::registry::CRATES_IO_HTTP_INDEX;
4use crate::sources::source::Source;
5use crate::sources::{CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_IO_REGISTRY, DirectorySource};
6use crate::sources::{GitSource, PathSource, RegistrySource};
7use crate::util::interning::InternedString;
8use crate::util::{CanonicalUrl, CargoResult, GlobalContext, IntoUrl, context};
9use anyhow::Context as _;
10use serde::de;
11use serde::ser;
12use std::cmp::{self, Ordering};
13use std::collections::HashSet;
14use std::fmt::{self, Formatter};
15use std::hash::{self, Hash};
16use std::path::{Path, PathBuf};
17use std::ptr;
18use std::sync::Mutex;
19use std::sync::OnceLock;
20use tracing::trace;
21use url::Url;
22
23static SOURCE_ID_CACHE: OnceLock<Mutex<HashSet<&'static SourceIdInner>>> = OnceLock::new();
24
25/// Unique identifier for a source of packages.
26///
27/// Cargo uniquely identifies packages using [`PackageId`], a combination of the
28/// package name, version, and the code source. `SourceId` exactly represents
29/// the "code source" in `PackageId`. See [`SourceId::hash`] to learn what are
30/// taken into account for the uniqueness of a source.
31///
32/// `SourceId` is usually associated with an instance of [`Source`], which is
33/// supposed to provide a `SourceId` via [`Source::source_id`] method.
34///
35/// [`Source`]: crate::sources::source::Source
36/// [`Source::source_id`]: crate::sources::source::Source::source_id
37/// [`PackageId`]: super::PackageId
38#[derive(Clone, Copy, Eq, Debug)]
39pub struct SourceId {
40    inner: &'static SourceIdInner,
41}
42
43/// The interned version of [`SourceId`] to avoid excessive clones and borrows.
44/// Values are cached in `SOURCE_ID_CACHE` once created.
45#[derive(Eq, Clone, Debug)]
46struct SourceIdInner {
47    /// The source URL.
48    url: Url,
49    /// The canonical version of the above url. See [`CanonicalUrl`] to learn
50    /// why it is needed and how it normalizes a URL.
51    canonical_url: CanonicalUrl,
52    /// The source kind.
53    kind: SourceKind,
54    /// For example, the exact Git revision of the specified branch for a Git Source.
55    precise: Option<Precise>,
56    /// Name of the remote registry.
57    ///
58    /// WARNING: this is not always set when the name is not known,
59    /// e.g. registry coming from `--index` or Cargo.lock
60    registry_key: Option<KeyOf>,
61}
62
63#[derive(Eq, PartialEq, Clone, Debug, Hash)]
64enum Precise {
65    Locked,
66    Updated {
67        name: InternedString,
68        from: semver::Version,
69        to: semver::Version,
70    },
71    GitUrlFragment(String),
72}
73
74impl fmt::Display for Precise {
75    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
76        match self {
77            Precise::Locked => "locked".fmt(f),
78            Precise::Updated { name, from, to } => {
79                write!(f, "{name}={from}->{to}")
80            }
81            Precise::GitUrlFragment(s) => s.fmt(f),
82        }
83    }
84}
85
86/// Where the remote source key is defined.
87///
88/// The purpose of this is to provide better diagnostics for different sources of keys.
89#[derive(Debug, Clone, PartialEq, Eq)]
90enum KeyOf {
91    /// Defined in the `[registries]` table or the built-in `crates-io` key.
92    Registry(String),
93    /// Defined in the `[source]` replacement table.
94    Source(String),
95}
96
97impl SourceId {
98    /// Creates a `SourceId` object from the kind and URL.
99    ///
100    /// The canonical url will be calculated, but the precise field will not
101    fn new(kind: SourceKind, url: Url, key: Option<KeyOf>) -> CargoResult<SourceId> {
102        if kind == SourceKind::SparseRegistry {
103            // Sparse URLs are different because they store the kind prefix (sparse+)
104            // in the URL. This is because the prefix is necessary to differentiate
105            // from regular registries (git-based). The sparse+ prefix is included
106            // everywhere, including user-facing locations such as the `config.toml`
107            // file that defines the registry, or whenever Cargo displays it to the user.
108            assert!(url.as_str().starts_with("sparse+"));
109        }
110        let source_id = SourceId::wrap(SourceIdInner {
111            kind,
112            canonical_url: CanonicalUrl::new(&url)?,
113            url,
114            precise: None,
115            registry_key: key,
116        });
117        Ok(source_id)
118    }
119
120    /// Interns the value and returns the wrapped type.
121    fn wrap(inner: SourceIdInner) -> SourceId {
122        let mut cache = SOURCE_ID_CACHE
123            .get_or_init(|| Default::default())
124            .lock()
125            .unwrap();
126        let inner = cache.get(&inner).cloned().unwrap_or_else(|| {
127            let inner = Box::leak(Box::new(inner));
128            cache.insert(inner);
129            inner
130        });
131        SourceId { inner }
132    }
133
134    fn remote_source_kind(url: &Url) -> SourceKind {
135        if url.as_str().starts_with("sparse+") {
136            SourceKind::SparseRegistry
137        } else {
138            SourceKind::Registry
139        }
140    }
141
142    /// Parses a source URL and returns the corresponding ID.
143    ///
144    /// ## Example
145    ///
146    /// ```
147    /// use cargo::core::SourceId;
148    /// SourceId::from_url("git+https://github.com/alexcrichton/\
149    ///                     libssh2-static-sys#80e71a3021618eb05\
150    ///                     656c58fb7c5ef5f12bc747f");
151    /// ```
152    pub fn from_url(string: &str) -> CargoResult<SourceId> {
153        let (kind, url) = string
154            .split_once('+')
155            .ok_or_else(|| anyhow::format_err!("invalid source `{}`", string))?;
156
157        match kind {
158            "git" => {
159                let mut url = url.into_url()?;
160                let reference = GitReference::from_query(url.query_pairs());
161                let precise = url.fragment().map(|s| s.to_owned());
162                url.set_fragment(None);
163                url.set_query(None);
164                Ok(SourceId::for_git(&url, reference)?.with_git_precise(precise))
165            }
166            "registry" => {
167                let url = url.into_url()?;
168                Ok(SourceId::new(SourceKind::Registry, url, None)?.with_locked_precise())
169            }
170            "sparse" => {
171                let url = string.into_url()?;
172                Ok(SourceId::new(SourceKind::SparseRegistry, url, None)?.with_locked_precise())
173            }
174            "path" => {
175                let url = url.into_url()?;
176                SourceId::new(SourceKind::Path, url, None)
177            }
178            kind => Err(anyhow::format_err!("unsupported source protocol: {}", kind)),
179        }
180    }
181
182    /// A view of the [`SourceId`] that can be `Display`ed as a URL.
183    pub fn as_url(&self) -> SourceIdAsUrl<'_> {
184        SourceIdAsUrl {
185            inner: &*self.inner,
186            encoded: false,
187        }
188    }
189
190    /// Like [`Self::as_url`] but with URL parameters encoded.
191    pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
192        SourceIdAsUrl {
193            inner: &*self.inner,
194            encoded: true,
195        }
196    }
197
198    /// Creates a `SourceId` from a filesystem path.
199    ///
200    /// `path`: an absolute path.
201    pub fn for_path(path: &Path) -> CargoResult<SourceId> {
202        let url = path.into_url()?;
203        SourceId::new(SourceKind::Path, url, None)
204    }
205
206    /// Creates a `SourceId` from a filesystem path.
207    ///
208    /// `path`: an absolute path.
209    pub fn for_manifest_path(manifest_path: &Path) -> CargoResult<SourceId> {
210        if crate::util::toml::is_embedded(manifest_path) && manifest_path.is_file() {
211            Self::for_path(manifest_path)
212        } else {
213            Self::for_path(manifest_path.parent().unwrap())
214        }
215    }
216
217    /// Creates a `SourceId` from a Git reference.
218    pub fn for_git(url: &Url, reference: GitReference) -> CargoResult<SourceId> {
219        SourceId::new(SourceKind::Git(reference), url.clone(), None)
220    }
221
222    /// Creates a `SourceId` from a remote registry URL when the registry name
223    /// cannot be determined, e.g. a user passes `--index` directly from CLI.
224    ///
225    /// Use [`SourceId::for_alt_registry`] if a name can provided, which
226    /// generates better messages for cargo.
227    pub fn for_registry(url: &Url) -> CargoResult<SourceId> {
228        let kind = Self::remote_source_kind(url);
229        SourceId::new(kind, url.to_owned(), None)
230    }
231
232    /// Creates a `SourceId` for a remote registry from the `[registries]` table or crates.io.
233    pub fn for_alt_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
234        let kind = Self::remote_source_kind(url);
235        let key = KeyOf::Registry(key.into());
236        SourceId::new(kind, url.to_owned(), Some(key))
237    }
238
239    /// Creates a `SourceId` for a remote registry from the `[source]` replacement table.
240    pub fn for_source_replacement_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
241        let kind = Self::remote_source_kind(url);
242        let key = KeyOf::Source(key.into());
243        SourceId::new(kind, url.to_owned(), Some(key))
244    }
245
246    /// Creates a `SourceId` from a local registry path.
247    pub fn for_local_registry(path: &Path) -> CargoResult<SourceId> {
248        let url = path.into_url()?;
249        SourceId::new(SourceKind::LocalRegistry, url, None)
250    }
251
252    /// Creates a `SourceId` from a directory path.
253    pub fn for_directory(path: &Path) -> CargoResult<SourceId> {
254        let url = path.into_url()?;
255        SourceId::new(SourceKind::Directory, url, None)
256    }
257
258    /// Returns the `SourceId` corresponding to the main repository.
259    ///
260    /// This is the main cargo registry by default, but it can be overridden in
261    /// a `.cargo/config.toml`.
262    pub fn crates_io(gctx: &GlobalContext) -> CargoResult<SourceId> {
263        gctx.crates_io_source_id()
264    }
265
266    /// Returns the `SourceId` corresponding to the main repository, using the
267    /// sparse HTTP index if allowed.
268    pub fn crates_io_maybe_sparse_http(gctx: &GlobalContext) -> CargoResult<SourceId> {
269        if Self::crates_io_is_sparse(gctx)? {
270            gctx.check_registry_index_not_set()?;
271            let url = CRATES_IO_HTTP_INDEX.into_url().unwrap();
272            let key = KeyOf::Registry(CRATES_IO_REGISTRY.into());
273            SourceId::new(SourceKind::SparseRegistry, url, Some(key))
274        } else {
275            Self::crates_io(gctx)
276        }
277    }
278
279    /// Returns whether to access crates.io over the sparse protocol.
280    pub fn crates_io_is_sparse(gctx: &GlobalContext) -> CargoResult<bool> {
281        let proto: Option<context::Value<String>> = gctx.get("registries.crates-io.protocol")?;
282        let is_sparse = match proto.as_ref().map(|v| v.val.as_str()) {
283            Some("sparse") => true,
284            Some("git") => false,
285            Some(unknown) => anyhow::bail!(
286                "unsupported registry protocol `{unknown}` (defined in {})",
287                proto.as_ref().unwrap().definition
288            ),
289            None => true,
290        };
291        Ok(is_sparse)
292    }
293
294    /// Gets the `SourceId` associated with given name of the remote registry.
295    pub fn alt_registry(gctx: &GlobalContext, key: &str) -> CargoResult<SourceId> {
296        if key == CRATES_IO_REGISTRY {
297            return Self::crates_io(gctx);
298        }
299        let url = gctx.get_registry_index(key)?;
300        Self::for_alt_registry(&url, key)
301    }
302
303    /// Gets this source URL.
304    pub fn url(&self) -> &Url {
305        &self.inner.url
306    }
307
308    /// Gets the canonical URL of this source, used for internal comparison
309    /// purposes.
310    pub fn canonical_url(&self) -> &CanonicalUrl {
311        &self.inner.canonical_url
312    }
313
314    /// Displays the text "crates.io index" for Cargo shell status output.
315    pub fn display_index(self) -> String {
316        if self.is_crates_io() {
317            format!("{} index", CRATES_IO_DOMAIN)
318        } else {
319            format!("`{}` index", self.display_registry_name())
320        }
321    }
322
323    /// Displays the name of a registry if it has one. Otherwise just the URL.
324    pub fn display_registry_name(self) -> String {
325        if let Some(key) = self.inner.registry_key.as_ref().map(|k| k.key()) {
326            key.into()
327        } else if self.has_precise() {
328            // We remove `precise` here to retrieve an permissive version of
329            // `SourceIdInner`, which may contain the registry name.
330            self.without_precise().display_registry_name()
331        } else {
332            url_display(self.url())
333        }
334    }
335
336    /// Gets the name of the remote registry as defined in the `[registries]` table,
337    /// or the built-in `crates-io` key.
338    pub fn alt_registry_key(&self) -> Option<&str> {
339        self.inner.registry_key.as_ref()?.alternative_registry()
340    }
341
342    /// Returns `true` if this source is from a filesystem path.
343    pub fn is_path(self) -> bool {
344        self.inner.kind == SourceKind::Path
345    }
346
347    /// Returns the local path if this is a path dependency.
348    pub fn local_path(self) -> Option<PathBuf> {
349        if self.inner.kind != SourceKind::Path {
350            return None;
351        }
352
353        Some(self.inner.url.to_file_path().unwrap())
354    }
355
356    pub fn kind(&self) -> &SourceKind {
357        &self.inner.kind
358    }
359
360    /// Returns `true` if this source is from a registry (either local or not).
361    pub fn is_registry(self) -> bool {
362        matches!(
363            self.inner.kind,
364            SourceKind::Registry | SourceKind::SparseRegistry | SourceKind::LocalRegistry
365        )
366    }
367
368    /// Returns `true` if this source is from a sparse registry.
369    pub fn is_sparse(self) -> bool {
370        matches!(self.inner.kind, SourceKind::SparseRegistry)
371    }
372
373    /// Returns `true` if this source is a "remote" registry.
374    ///
375    /// "remote" may also mean a file URL to a git index, so it is not
376    /// necessarily "remote". This just means it is not `local-registry`.
377    pub fn is_remote_registry(self) -> bool {
378        matches!(
379            self.inner.kind,
380            SourceKind::Registry | SourceKind::SparseRegistry
381        )
382    }
383
384    /// Returns `true` if this source from a Git repository.
385    pub fn is_git(self) -> bool {
386        matches!(self.inner.kind, SourceKind::Git(_))
387    }
388
389    /// Creates an implementation of `Source` corresponding to this ID.
390    pub fn load<'a>(self, gctx: &'a GlobalContext) -> CargoResult<Box<dyn Source + 'a>> {
391        trace!("loading SourceId; {}", self);
392        match self.inner.kind {
393            SourceKind::Git(..) => Ok(Box::new(GitSource::new(self, gctx)?)),
394            SourceKind::Path => {
395                let path = self
396                    .inner
397                    .url
398                    .to_file_path()
399                    .expect("path sources cannot be remote");
400                if crate::util::toml::is_embedded(&path) && path.is_file() {
401                    anyhow::bail!("single file packages cannot be used as dependencies")
402                }
403                Ok(Box::new(PathSource::new(&path, self, gctx)))
404            }
405            SourceKind::Registry | SourceKind::SparseRegistry => {
406                Ok(Box::new(RegistrySource::remote(self, gctx)?))
407            }
408            SourceKind::LocalRegistry => {
409                let path = self
410                    .inner
411                    .url
412                    .to_file_path()
413                    .expect("path sources cannot be remote");
414                Ok(Box::new(RegistrySource::local(self, &path, gctx)))
415            }
416            SourceKind::Directory => {
417                let path = self
418                    .inner
419                    .url
420                    .to_file_path()
421                    .expect("path sources cannot be remote");
422                Ok(Box::new(DirectorySource::new(&path, self, gctx)))
423            }
424        }
425    }
426
427    /// Gets the Git reference if this is a git source, otherwise `None`.
428    pub fn git_reference(self) -> Option<&'static GitReference> {
429        match self.inner.kind {
430            SourceKind::Git(ref s) => Some(s),
431            _ => None,
432        }
433    }
434
435    /// Check if the precise data field has bean set
436    pub fn has_precise(self) -> bool {
437        self.inner.precise.is_some()
438    }
439
440    /// Check if the precise data field has bean set to "locked"
441    pub fn has_locked_precise(self) -> bool {
442        self.inner.precise == Some(Precise::Locked)
443    }
444
445    /// Check if two sources have the same precise data field
446    pub fn has_same_precise_as(self, other: Self) -> bool {
447        self.inner.precise == other.inner.precise
448    }
449
450    /// Check if the precise data field stores information for this `name`
451    /// from a call to [`SourceId::with_precise_registry_version`].
452    ///
453    /// If so return the version currently in the lock file and the version to be updated to.
454    pub fn precise_registry_version(
455        self,
456        pkg: &str,
457    ) -> Option<(&semver::Version, &semver::Version)> {
458        match &self.inner.precise {
459            Some(Precise::Updated { name, from, to }) if name == pkg => Some((from, to)),
460            _ => None,
461        }
462    }
463
464    pub fn precise_git_fragment(self) -> Option<&'static str> {
465        match &self.inner.precise {
466            Some(Precise::GitUrlFragment(s)) => Some(&s),
467            _ => None,
468        }
469    }
470
471    /// Creates a new `SourceId` from this source with the given `precise`.
472    pub fn with_git_precise(self, fragment: Option<String>) -> SourceId {
473        self.with_precise(&fragment.map(|f| Precise::GitUrlFragment(f)))
474    }
475
476    /// Creates a new `SourceId` from this source without a `precise`.
477    pub fn without_precise(self) -> SourceId {
478        self.with_precise(&None)
479    }
480
481    /// Creates a new `SourceId` from this source without a `precise`.
482    pub fn with_locked_precise(self) -> SourceId {
483        self.with_precise(&Some(Precise::Locked))
484    }
485
486    /// Creates a new `SourceId` from this source with the `precise` from some other `SourceId`.
487    pub fn with_precise_from(self, v: Self) -> SourceId {
488        self.with_precise(&v.inner.precise)
489    }
490
491    fn with_precise(self, precise: &Option<Precise>) -> SourceId {
492        if &self.inner.precise == precise {
493            self
494        } else {
495            SourceId::wrap(SourceIdInner {
496                precise: precise.clone(),
497                ..(*self.inner).clone()
498            })
499        }
500    }
501
502    /// When updating a lock file on a version using `cargo update --precise`
503    /// the requested version is stored in the precise field.
504    /// On a registry dependency we also need to keep track of the package that
505    /// should be updated and even which of the versions should be updated.
506    /// All of this gets encoded in the precise field using this method.
507    /// The data can be read with [`SourceId::precise_registry_version`]
508    pub fn with_precise_registry_version(
509        self,
510        name: InternedString,
511        version: semver::Version,
512        precise: &str,
513    ) -> CargoResult<SourceId> {
514        let precise = semver::Version::parse(precise).with_context(|| {
515            if let Some(stripped) = precise.strip_prefix("v") {
516                return format!(
517                    "the version provided, `{precise}` is not a \
518                    valid SemVer version\n\n\
519                    help: try changing the version to `{stripped}`",
520                );
521            }
522            format!("invalid version format for precise version `{precise}`")
523        })?;
524
525        Ok(SourceId::wrap(SourceIdInner {
526            precise: Some(Precise::Updated {
527                name,
528                from: version,
529                to: precise,
530            }),
531            ..(*self.inner).clone()
532        }))
533    }
534
535    /// Returns `true` if the remote registry is the standard <https://crates.io>.
536    pub fn is_crates_io(self) -> bool {
537        match self.inner.kind {
538            SourceKind::Registry | SourceKind::SparseRegistry => {}
539            _ => return false,
540        }
541        let url = self.inner.url.as_str();
542        url == CRATES_IO_INDEX || url == CRATES_IO_HTTP_INDEX || is_overridden_crates_io_url(url)
543    }
544
545    /// Hashes `self` to be used in the name of some Cargo folders, so shouldn't vary.
546    ///
547    /// For git and url, `as_str` gives the serialisation of a url (which has a spec) and so
548    /// insulates against possible changes in how the url crate does hashing.
549    ///
550    /// For paths, remove the workspace prefix so the same source will give the
551    /// same hash in different locations, helping reproducible builds.
552    pub fn stable_hash<S: hash::Hasher>(self, workspace: &Path, into: &mut S) {
553        if self.is_path() {
554            if let Ok(p) = self
555                .inner
556                .url
557                .to_file_path()
558                .unwrap()
559                .strip_prefix(workspace)
560            {
561                self.inner.kind.hash(into);
562                p.to_str().unwrap().hash(into);
563                return;
564            }
565        }
566        self.inner.kind.hash(into);
567        match self.inner.kind {
568            SourceKind::Git(_) => (&self).inner.canonical_url.hash(into),
569            _ => (&self).inner.url.as_str().hash(into),
570        }
571    }
572
573    pub fn full_eq(self, other: SourceId) -> bool {
574        ptr::eq(self.inner, other.inner)
575    }
576
577    pub fn full_hash<S: hash::Hasher>(self, into: &mut S) {
578        ptr::NonNull::from(self.inner).hash(into)
579    }
580}
581
582impl PartialEq for SourceId {
583    fn eq(&self, other: &SourceId) -> bool {
584        self.cmp(other) == Ordering::Equal
585    }
586}
587
588impl PartialOrd for SourceId {
589    fn partial_cmp(&self, other: &SourceId) -> Option<Ordering> {
590        Some(self.cmp(other))
591    }
592}
593
594// Custom comparison defined as source kind and canonical URL equality,
595// ignoring the `precise` and `name` fields.
596impl Ord for SourceId {
597    fn cmp(&self, other: &SourceId) -> Ordering {
598        // If our interior pointers are to the exact same `SourceIdInner` then
599        // we're guaranteed to be equal.
600        if ptr::eq(self.inner, other.inner) {
601            return Ordering::Equal;
602        }
603
604        // Sort first based on `kind`, deferring to the URL comparison if
605        // the kinds are equal.
606        let ord_kind = self.inner.kind.cmp(&other.inner.kind);
607        ord_kind.then_with(|| self.inner.canonical_url.cmp(&other.inner.canonical_url))
608    }
609}
610
611impl ser::Serialize for SourceId {
612    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
613    where
614        S: ser::Serializer,
615    {
616        if self.is_path() {
617            None::<String>.serialize(s)
618        } else {
619            s.collect_str(&self.as_url())
620        }
621    }
622}
623
624impl<'de> de::Deserialize<'de> for SourceId {
625    fn deserialize<D>(d: D) -> Result<SourceId, D::Error>
626    where
627        D: de::Deserializer<'de>,
628    {
629        let string = String::deserialize(d)?;
630        SourceId::from_url(&string).map_err(de::Error::custom)
631    }
632}
633
634fn url_display(url: &Url) -> String {
635    if url.scheme() == "file" {
636        if let Ok(path) = url.to_file_path() {
637            if let Some(path_str) = path.to_str() {
638                return path_str.to_string();
639            }
640        }
641    }
642
643    url.as_str().to_string()
644}
645
646impl fmt::Display for SourceId {
647    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
648        match self.inner.kind {
649            SourceKind::Git(ref reference) => {
650                // Don't replace the URL display for git references,
651                // because those are kind of expected to be URLs.
652                write!(f, "{}", self.inner.url)?;
653                if let Some(pretty) = reference.pretty_ref(true) {
654                    write!(f, "?{}", pretty)?;
655                }
656
657                if let Some(s) = &self.inner.precise {
658                    let s = s.to_string();
659                    let len = cmp::min(s.len(), 8);
660                    write!(f, "#{}", &s[..len])?;
661                }
662                Ok(())
663            }
664            SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)),
665            SourceKind::Registry | SourceKind::SparseRegistry => {
666                write!(f, "registry `{}`", self.display_registry_name())
667            }
668            SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)),
669            SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)),
670        }
671    }
672}
673
674impl Hash for SourceId {
675    fn hash<S: hash::Hasher>(&self, into: &mut S) {
676        self.inner.kind.hash(into);
677        self.inner.canonical_url.hash(into);
678    }
679}
680
681/// The hash of `SourceIdInner` is used to retrieve its interned value from
682/// `SOURCE_ID_CACHE`. We only care about fields that make `SourceIdInner`
683/// unique. Optional fields not affecting the uniqueness must be excluded,
684/// such as [`registry_key`]. That's why this is not derived.
685///
686/// [`registry_key`]: SourceIdInner::registry_key
687impl Hash for SourceIdInner {
688    fn hash<S: hash::Hasher>(&self, into: &mut S) {
689        self.kind.hash(into);
690        self.precise.hash(into);
691        self.canonical_url.hash(into);
692    }
693}
694
695/// This implementation must be synced with [`SourceIdInner::hash`].
696impl PartialEq for SourceIdInner {
697    fn eq(&self, other: &Self) -> bool {
698        self.kind == other.kind
699            && self.precise == other.precise
700            && self.canonical_url == other.canonical_url
701    }
702}
703
704/// A `Display`able view into a `SourceId` that will write it as a url
705pub struct SourceIdAsUrl<'a> {
706    inner: &'a SourceIdInner,
707    encoded: bool,
708}
709
710impl<'a> fmt::Display for SourceIdAsUrl<'a> {
711    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
712        if let Some(protocol) = self.inner.kind.protocol() {
713            write!(f, "{protocol}+")?;
714        }
715        write!(f, "{}", self.inner.url)?;
716        if let SourceIdInner {
717            kind: SourceKind::Git(ref reference),
718            ref precise,
719            ..
720        } = *self.inner
721        {
722            if let Some(pretty) = reference.pretty_ref(self.encoded) {
723                write!(f, "?{}", pretty)?;
724            }
725            if let Some(precise) = precise.as_ref() {
726                write!(f, "#{}", precise)?;
727            }
728        }
729        Ok(())
730    }
731}
732
733impl KeyOf {
734    /// Gets the underlying key.
735    fn key(&self) -> &str {
736        match self {
737            KeyOf::Registry(k) | KeyOf::Source(k) => k,
738        }
739    }
740
741    /// Gets the key if it's from an alternative registry.
742    fn alternative_registry(&self) -> Option<&str> {
743        match self {
744            KeyOf::Registry(k) => Some(k),
745            _ => None,
746        }
747    }
748}
749
750#[cfg(test)]
751mod tests {
752    use super::{GitReference, SourceId, SourceKind};
753    use crate::util::{GlobalContext, IntoUrl};
754
755    #[test]
756    fn github_sources_equal() {
757        let loc = "https://github.com/foo/bar".into_url().unwrap();
758        let default = SourceKind::Git(GitReference::DefaultBranch);
759        let s1 = SourceId::new(default.clone(), loc, None).unwrap();
760
761        let loc = "git://github.com/foo/bar".into_url().unwrap();
762        let s2 = SourceId::new(default, loc.clone(), None).unwrap();
763
764        assert_eq!(s1, s2);
765
766        let foo = SourceKind::Git(GitReference::Branch("foo".to_string()));
767        let s3 = SourceId::new(foo, loc, None).unwrap();
768        assert_ne!(s1, s3);
769    }
770
771    // This is a test that the hash of the `SourceId` for crates.io is a well-known
772    // value.
773    //
774    // Note that the hash value matches what the crates.io source id has hashed
775    // since Rust 1.84.0. We strive to keep this value the same across
776    // versions of Cargo because changing it means that users will need to
777    // redownload the index and all crates they use when using a new Cargo version.
778    //
779    // This isn't to say that this hash can *never* change, only that when changing
780    // this it should be explicitly done. If this hash changes accidentally and
781    // you're able to restore the hash to its original value, please do so!
782    // Otherwise please just leave a comment in your PR as to why the hash value is
783    // changing and why the old value can't be easily preserved.
784    // If it takes an ugly hack to restore it,
785    // then leave a link here so we can remove the hack next time we change the hash.
786    //
787    // Hacks to remove next time the hash changes:
788    // - (fill in your code here)
789    //
790    // The hash value should be stable across platforms, and doesn't depend on
791    // endianness and bit-width. One caveat is that absolute paths on Windows
792    // are inherently different than on Unix-like platforms. Unless we omit or
793    // strip the prefix components (e.g. `C:`), there is not way to have a true
794    // cross-platform stable hash for absolute paths.
795    #[test]
796    fn test_stable_hash() {
797        use std::hash::Hasher;
798        use std::path::Path;
799
800        use snapbox::IntoData as _;
801        use snapbox::assert_data_eq;
802        use snapbox::str;
803
804        use crate::util::StableHasher;
805        use crate::util::hex::short_hash;
806
807        #[cfg(not(windows))]
808        let ws_root = Path::new("/tmp/ws");
809        #[cfg(windows)]
810        let ws_root = Path::new(r"C:\\tmp\ws");
811
812        let gen_hash = |source_id: SourceId| {
813            let mut hasher = StableHasher::new();
814            source_id.stable_hash(ws_root, &mut hasher);
815            Hasher::finish(&hasher).to_string()
816        };
817
818        let source_id = SourceId::crates_io(&GlobalContext::default().unwrap()).unwrap();
819        assert_data_eq!(gen_hash(source_id), str!["7062945687441624357"].raw());
820        assert_data_eq!(short_hash(&source_id), str!["25cdd57fae9f0462"].raw());
821
822        let url = "https://my-crates.io".into_url().unwrap();
823        let source_id = SourceId::for_registry(&url).unwrap();
824        assert_data_eq!(gen_hash(source_id), str!["8310250053664888498"].raw());
825        assert_data_eq!(short_hash(&source_id), str!["b2d65deb64f05373"].raw());
826
827        let url = "https://your-crates.io".into_url().unwrap();
828        let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
829        assert_data_eq!(gen_hash(source_id), str!["14149534903000258933"].raw());
830        assert_data_eq!(short_hash(&source_id), str!["755952de063f5dc4"].raw());
831
832        let url = "sparse+https://my-crates.io".into_url().unwrap();
833        let source_id = SourceId::for_registry(&url).unwrap();
834        assert_data_eq!(gen_hash(source_id), str!["16249512552851930162"].raw());
835        assert_data_eq!(short_hash(&source_id), str!["327cfdbd92dd81e1"].raw());
836
837        let url = "sparse+https://your-crates.io".into_url().unwrap();
838        let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
839        assert_data_eq!(gen_hash(source_id), str!["6156697384053352292"].raw());
840        assert_data_eq!(short_hash(&source_id), str!["64a713b6a6fb7055"].raw());
841
842        let url = "file:///tmp/ws/crate".into_url().unwrap();
843        let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
844        assert_data_eq!(gen_hash(source_id), str!["473480029881867801"].raw());
845        assert_data_eq!(short_hash(&source_id), str!["199e591d94239206"].raw());
846
847        let path = &ws_root.join("crate");
848        let source_id = SourceId::for_local_registry(path).unwrap();
849        #[cfg(not(windows))]
850        {
851            assert_data_eq!(gen_hash(source_id), str!["11515846423845066584"].raw());
852            assert_data_eq!(short_hash(&source_id), str!["58d73c154f81d09f"].raw());
853        }
854        #[cfg(windows)]
855        {
856            assert_data_eq!(gen_hash(source_id), str!["6146331155906064276"].raw());
857            assert_data_eq!(short_hash(&source_id), str!["946fb2239f274c55"].raw());
858        }
859
860        let source_id = SourceId::for_path(path).unwrap();
861        assert_data_eq!(gen_hash(source_id), str!["215644081443634269"].raw());
862        #[cfg(not(windows))]
863        assert_data_eq!(short_hash(&source_id), str!["64bace89c92b101f"].raw());
864        #[cfg(windows)]
865        assert_data_eq!(short_hash(&source_id), str!["01e1e6c391813fb6"].raw());
866
867        let source_id = SourceId::for_directory(path).unwrap();
868        #[cfg(not(windows))]
869        {
870            assert_data_eq!(gen_hash(source_id), str!["6127590343904940368"].raw());
871            assert_data_eq!(short_hash(&source_id), str!["505191d1f3920955"].raw());
872        }
873        #[cfg(windows)]
874        {
875            assert_data_eq!(gen_hash(source_id), str!["10423446877655960172"].raw());
876            assert_data_eq!(short_hash(&source_id), str!["6c8ad69db585a790"].raw());
877        }
878    }
879
880    #[test]
881    fn serde_roundtrip() {
882        let url = "sparse+https://my-crates.io/".into_url().unwrap();
883        let source_id = SourceId::for_registry(&url).unwrap();
884        let formatted = format!("{}", source_id.as_url());
885        let deserialized = SourceId::from_url(&formatted).unwrap();
886        assert_eq!(formatted, "sparse+https://my-crates.io/");
887        assert_eq!(source_id, deserialized);
888    }
889
890    #[test]
891    fn gitrefs_roundtrip() {
892        let base = "https://host/path".into_url().unwrap();
893        let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
894        let s1 = SourceId::for_git(&base, branch).unwrap();
895        let ser1 = format!("{}", s1.as_encoded_url());
896        let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
897        let ser2 = format!("{}", s2.as_encoded_url());
898        // Serializing twice should yield the same result
899        assert_eq!(ser1, ser2, "Serialized forms don't match");
900        // SourceId serializing the same should have the same semantics
901        // This used to not be the case (# was ambiguous)
902        assert_eq!(s1, s2, "SourceId doesn't round-trip");
903        // Freeze the format to match an x-www-form-urlencoded query string
904        // https://url.spec.whatwg.org/#application/x-www-form-urlencoded
905        assert_eq!(
906            ser1,
907            "git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
908        );
909    }
910}
911
912/// Check if `url` equals to the overridden crates.io URL.
913#[expect(
914    clippy::disallowed_methods,
915    reason = "testing only, no reason for config support"
916)]
917fn is_overridden_crates_io_url(url: &str) -> bool {
918    std::env::var("__CARGO_TEST_CRATES_IO_URL_DO_NOT_USE_THIS").map_or(false, |v| v == url)
919}