cargo/core/
source_id.rs

1use crate::core::GitReference;
2use crate::core::PackageId;
3use crate::core::SourceKind;
4use crate::sources::registry::CRATES_IO_HTTP_INDEX;
5use crate::sources::source::Source;
6use crate::sources::{DirectorySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_IO_REGISTRY};
7use crate::sources::{GitSource, PathSource, RegistrySource};
8use crate::util::interning::InternedString;
9use crate::util::{context, CanonicalUrl, CargoResult, GlobalContext, IntoUrl};
10use anyhow::Context as _;
11use serde::de;
12use serde::ser;
13use std::cmp::{self, Ordering};
14use std::collections::HashSet;
15use std::fmt::{self, Formatter};
16use std::hash::{self, Hash};
17use std::path::{Path, PathBuf};
18use std::ptr;
19use std::sync::Mutex;
20use std::sync::OnceLock;
21use tracing::trace;
22use url::Url;
23
24static SOURCE_ID_CACHE: OnceLock<Mutex<HashSet<&'static SourceIdInner>>> = OnceLock::new();
25
26/// Unique identifier for a source of packages.
27///
28/// Cargo uniquely identifies packages using [`PackageId`], a combination of the
29/// package name, version, and the code source. `SourceId` exactly represents
30/// the "code source" in `PackageId`. See [`SourceId::hash`] to learn what are
31/// taken into account for the uniqueness of a source.
32///
33/// `SourceId` is usually associated with an instance of [`Source`], which is
34/// supposed to provide a `SourceId` via [`Source::source_id`] method.
35///
36/// [`Source`]: crate::sources::source::Source
37/// [`Source::source_id`]: crate::sources::source::Source::source_id
38/// [`PackageId`]: super::PackageId
39#[derive(Clone, Copy, Eq, Debug)]
40pub struct SourceId {
41    inner: &'static SourceIdInner,
42}
43
44/// The interned version of [`SourceId`] to avoid excessive clones and borrows.
45/// Values are cached in `SOURCE_ID_CACHE` once created.
46#[derive(Eq, Clone, Debug)]
47struct SourceIdInner {
48    /// The source URL.
49    url: Url,
50    /// The canonical version of the above url. See [`CanonicalUrl`] to learn
51    /// why it is needed and how it normalizes a URL.
52    canonical_url: CanonicalUrl,
53    /// The source kind.
54    kind: SourceKind,
55    /// For example, the exact Git revision of the specified branch for a Git Source.
56    precise: Option<Precise>,
57    /// Name of the remote registry.
58    ///
59    /// WARNING: this is not always set when the name is not known,
60    /// e.g. registry coming from `--index` or Cargo.lock
61    registry_key: Option<KeyOf>,
62}
63
64#[derive(Eq, PartialEq, Clone, Debug, Hash)]
65enum Precise {
66    Locked,
67    Updated {
68        name: InternedString,
69        from: semver::Version,
70        to: semver::Version,
71    },
72    GitUrlFragment(String),
73}
74
75impl fmt::Display for Precise {
76    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
77        match self {
78            Precise::Locked => "locked".fmt(f),
79            Precise::Updated { name, from, to } => {
80                write!(f, "{name}={from}->{to}")
81            }
82            Precise::GitUrlFragment(s) => s.fmt(f),
83        }
84    }
85}
86
87/// Where the remote source key is defined.
88///
89/// The purpose of this is to provide better diagnostics for different sources of keys.
90#[derive(Debug, Clone, PartialEq, Eq)]
91enum KeyOf {
92    /// Defined in the `[registries]` table or the built-in `crates-io` key.
93    Registry(String),
94    /// Defined in the `[source]` replacement table.
95    Source(String),
96}
97
98impl SourceId {
99    /// Creates a `SourceId` object from the kind and URL.
100    ///
101    /// The canonical url will be calculated, but the precise field will not
102    fn new(kind: SourceKind, url: Url, key: Option<KeyOf>) -> CargoResult<SourceId> {
103        if kind == SourceKind::SparseRegistry {
104            // Sparse URLs are different because they store the kind prefix (sparse+)
105            // in the URL. This is because the prefix is necessary to differentiate
106            // from regular registries (git-based). The sparse+ prefix is included
107            // everywhere, including user-facing locations such as the `config.toml`
108            // file that defines the registry, or whenever Cargo displays it to the user.
109            assert!(url.as_str().starts_with("sparse+"));
110        }
111        let source_id = SourceId::wrap(SourceIdInner {
112            kind,
113            canonical_url: CanonicalUrl::new(&url)?,
114            url,
115            precise: None,
116            registry_key: key,
117        });
118        Ok(source_id)
119    }
120
121    /// Interns the value and returns the wrapped type.
122    fn wrap(inner: SourceIdInner) -> SourceId {
123        let mut cache = SOURCE_ID_CACHE
124            .get_or_init(|| Default::default())
125            .lock()
126            .unwrap();
127        let inner = cache.get(&inner).cloned().unwrap_or_else(|| {
128            let inner = Box::leak(Box::new(inner));
129            cache.insert(inner);
130            inner
131        });
132        SourceId { inner }
133    }
134
135    fn remote_source_kind(url: &Url) -> SourceKind {
136        if url.as_str().starts_with("sparse+") {
137            SourceKind::SparseRegistry
138        } else {
139            SourceKind::Registry
140        }
141    }
142
143    /// Parses a source URL and returns the corresponding ID.
144    ///
145    /// ## Example
146    ///
147    /// ```
148    /// use cargo::core::SourceId;
149    /// SourceId::from_url("git+https://github.com/alexcrichton/\
150    ///                     libssh2-static-sys#80e71a3021618eb05\
151    ///                     656c58fb7c5ef5f12bc747f");
152    /// ```
153    pub fn from_url(string: &str) -> CargoResult<SourceId> {
154        let (kind, url) = string
155            .split_once('+')
156            .ok_or_else(|| anyhow::format_err!("invalid source `{}`", string))?;
157
158        match kind {
159            "git" => {
160                let mut url = url.into_url()?;
161                let reference = GitReference::from_query(url.query_pairs());
162                let precise = url.fragment().map(|s| s.to_owned());
163                url.set_fragment(None);
164                url.set_query(None);
165                Ok(SourceId::for_git(&url, reference)?.with_git_precise(precise))
166            }
167            "registry" => {
168                let url = url.into_url()?;
169                Ok(SourceId::new(SourceKind::Registry, url, None)?.with_locked_precise())
170            }
171            "sparse" => {
172                let url = string.into_url()?;
173                Ok(SourceId::new(SourceKind::SparseRegistry, url, None)?.with_locked_precise())
174            }
175            "path" => {
176                let url = url.into_url()?;
177                SourceId::new(SourceKind::Path, url, None)
178            }
179            kind => Err(anyhow::format_err!("unsupported source protocol: {}", kind)),
180        }
181    }
182
183    /// A view of the [`SourceId`] that can be `Display`ed as a URL.
184    pub fn as_url(&self) -> SourceIdAsUrl<'_> {
185        SourceIdAsUrl {
186            inner: &*self.inner,
187            encoded: false,
188        }
189    }
190
191    /// Like [`Self::as_url`] but with URL parameters encoded.
192    pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
193        SourceIdAsUrl {
194            inner: &*self.inner,
195            encoded: true,
196        }
197    }
198
199    /// Creates a `SourceId` from a filesystem path.
200    ///
201    /// `path`: an absolute path.
202    pub fn for_path(path: &Path) -> CargoResult<SourceId> {
203        let url = path.into_url()?;
204        SourceId::new(SourceKind::Path, url, None)
205    }
206
207    /// Creates a `SourceId` from a filesystem path.
208    ///
209    /// `path`: an absolute path.
210    pub fn for_manifest_path(manifest_path: &Path) -> CargoResult<SourceId> {
211        if crate::util::toml::is_embedded(manifest_path) {
212            Self::for_path(manifest_path)
213        } else {
214            Self::for_path(manifest_path.parent().unwrap())
215        }
216    }
217
218    /// Creates a `SourceId` from a Git reference.
219    pub fn for_git(url: &Url, reference: GitReference) -> CargoResult<SourceId> {
220        SourceId::new(SourceKind::Git(reference), url.clone(), None)
221    }
222
223    /// Creates a `SourceId` from a remote registry URL when the registry name
224    /// cannot be determined, e.g. a user passes `--index` directly from CLI.
225    ///
226    /// Use [`SourceId::for_alt_registry`] if a name can provided, which
227    /// generates better messages for cargo.
228    pub fn for_registry(url: &Url) -> CargoResult<SourceId> {
229        let kind = Self::remote_source_kind(url);
230        SourceId::new(kind, url.to_owned(), None)
231    }
232
233    /// Creates a `SourceId` for a remote registry from the `[registries]` table or crates.io.
234    pub fn for_alt_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
235        let kind = Self::remote_source_kind(url);
236        let key = KeyOf::Registry(key.into());
237        SourceId::new(kind, url.to_owned(), Some(key))
238    }
239
240    /// Creates a `SourceId` for a remote registry from the `[source]` replacement table.
241    pub fn for_source_replacement_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
242        let kind = Self::remote_source_kind(url);
243        let key = KeyOf::Source(key.into());
244        SourceId::new(kind, url.to_owned(), Some(key))
245    }
246
247    /// Creates a `SourceId` from a local registry path.
248    pub fn for_local_registry(path: &Path) -> CargoResult<SourceId> {
249        let url = path.into_url()?;
250        SourceId::new(SourceKind::LocalRegistry, url, None)
251    }
252
253    /// Creates a `SourceId` from a directory path.
254    pub fn for_directory(path: &Path) -> CargoResult<SourceId> {
255        let url = path.into_url()?;
256        SourceId::new(SourceKind::Directory, url, None)
257    }
258
259    /// Returns the `SourceId` corresponding to the main repository.
260    ///
261    /// This is the main cargo registry by default, but it can be overridden in
262    /// a `.cargo/config.toml`.
263    pub fn crates_io(gctx: &GlobalContext) -> CargoResult<SourceId> {
264        gctx.crates_io_source_id()
265    }
266
267    /// Returns the `SourceId` corresponding to the main repository, using the
268    /// sparse HTTP index if allowed.
269    pub fn crates_io_maybe_sparse_http(gctx: &GlobalContext) -> CargoResult<SourceId> {
270        if Self::crates_io_is_sparse(gctx)? {
271            gctx.check_registry_index_not_set()?;
272            let url = CRATES_IO_HTTP_INDEX.into_url().unwrap();
273            let key = KeyOf::Registry(CRATES_IO_REGISTRY.into());
274            SourceId::new(SourceKind::SparseRegistry, url, Some(key))
275        } else {
276            Self::crates_io(gctx)
277        }
278    }
279
280    /// Returns whether to access crates.io over the sparse protocol.
281    pub fn crates_io_is_sparse(gctx: &GlobalContext) -> CargoResult<bool> {
282        let proto: Option<context::Value<String>> = gctx.get("registries.crates-io.protocol")?;
283        let is_sparse = match proto.as_ref().map(|v| v.val.as_str()) {
284            Some("sparse") => true,
285            Some("git") => false,
286            Some(unknown) => anyhow::bail!(
287                "unsupported registry protocol `{unknown}` (defined in {})",
288                proto.as_ref().unwrap().definition
289            ),
290            None => true,
291        };
292        Ok(is_sparse)
293    }
294
295    /// Gets the `SourceId` associated with given name of the remote registry.
296    pub fn alt_registry(gctx: &GlobalContext, key: &str) -> CargoResult<SourceId> {
297        if key == CRATES_IO_REGISTRY {
298            return Self::crates_io(gctx);
299        }
300        let url = gctx.get_registry_index(key)?;
301        Self::for_alt_registry(&url, key)
302    }
303
304    /// Gets this source URL.
305    pub fn url(&self) -> &Url {
306        &self.inner.url
307    }
308
309    /// Gets the canonical URL of this source, used for internal comparison
310    /// purposes.
311    pub fn canonical_url(&self) -> &CanonicalUrl {
312        &self.inner.canonical_url
313    }
314
315    /// Displays the text "crates.io index" for Cargo shell status output.
316    pub fn display_index(self) -> String {
317        if self.is_crates_io() {
318            format!("{} index", CRATES_IO_DOMAIN)
319        } else {
320            format!("`{}` index", self.display_registry_name())
321        }
322    }
323
324    /// Displays the name of a registry if it has one. Otherwise just the URL.
325    pub fn display_registry_name(self) -> String {
326        if let Some(key) = self.inner.registry_key.as_ref().map(|k| k.key()) {
327            key.into()
328        } else if self.has_precise() {
329            // We remove `precise` here to retrieve an permissive version of
330            // `SourceIdInner`, which may contain the registry name.
331            self.without_precise().display_registry_name()
332        } else {
333            url_display(self.url())
334        }
335    }
336
337    /// Gets the name of the remote registry as defined in the `[registries]` table,
338    /// or the built-in `crates-io` key.
339    pub fn alt_registry_key(&self) -> Option<&str> {
340        self.inner.registry_key.as_ref()?.alternative_registry()
341    }
342
343    /// Returns `true` if this source is from a filesystem path.
344    pub fn is_path(self) -> bool {
345        self.inner.kind == SourceKind::Path
346    }
347
348    /// Returns the local path if this is a path dependency.
349    pub fn local_path(self) -> Option<PathBuf> {
350        if self.inner.kind != SourceKind::Path {
351            return None;
352        }
353
354        Some(self.inner.url.to_file_path().unwrap())
355    }
356
357    pub fn kind(&self) -> &SourceKind {
358        &self.inner.kind
359    }
360
361    /// Returns `true` if this source is from a registry (either local or not).
362    pub fn is_registry(self) -> bool {
363        matches!(
364            self.inner.kind,
365            SourceKind::Registry | SourceKind::SparseRegistry | SourceKind::LocalRegistry
366        )
367    }
368
369    /// Returns `true` if this source is from a sparse registry.
370    pub fn is_sparse(self) -> bool {
371        matches!(self.inner.kind, SourceKind::SparseRegistry)
372    }
373
374    /// Returns `true` if this source is a "remote" registry.
375    ///
376    /// "remote" may also mean a file URL to a git index, so it is not
377    /// necessarily "remote". This just means it is not `local-registry`.
378    pub fn is_remote_registry(self) -> bool {
379        matches!(
380            self.inner.kind,
381            SourceKind::Registry | SourceKind::SparseRegistry
382        )
383    }
384
385    /// Returns `true` if this source from a Git repository.
386    pub fn is_git(self) -> bool {
387        matches!(self.inner.kind, SourceKind::Git(_))
388    }
389
390    /// Creates an implementation of `Source` corresponding to this ID.
391    ///
392    /// * `yanked_whitelist` --- Packages allowed to be used, even if they are yanked.
393    pub fn load<'a>(
394        self,
395        gctx: &'a GlobalContext,
396        yanked_whitelist: &HashSet<PackageId>,
397    ) -> CargoResult<Box<dyn Source + 'a>> {
398        trace!("loading SourceId; {}", self);
399        match self.inner.kind {
400            SourceKind::Git(..) => Ok(Box::new(GitSource::new(self, gctx)?)),
401            SourceKind::Path => {
402                let path = self
403                    .inner
404                    .url
405                    .to_file_path()
406                    .expect("path sources cannot be remote");
407                if crate::util::toml::is_embedded(&path) {
408                    anyhow::bail!("Single file packages cannot be used as dependencies")
409                }
410                Ok(Box::new(PathSource::new(&path, self, gctx)))
411            }
412            SourceKind::Registry | SourceKind::SparseRegistry => Ok(Box::new(
413                RegistrySource::remote(self, yanked_whitelist, gctx)?,
414            )),
415            SourceKind::LocalRegistry => {
416                let path = self
417                    .inner
418                    .url
419                    .to_file_path()
420                    .expect("path sources cannot be remote");
421                Ok(Box::new(RegistrySource::local(
422                    self,
423                    &path,
424                    yanked_whitelist,
425                    gctx,
426                )))
427            }
428            SourceKind::Directory => {
429                let path = self
430                    .inner
431                    .url
432                    .to_file_path()
433                    .expect("path sources cannot be remote");
434                Ok(Box::new(DirectorySource::new(&path, self, gctx)))
435            }
436        }
437    }
438
439    /// Gets the Git reference if this is a git source, otherwise `None`.
440    pub fn git_reference(self) -> Option<&'static GitReference> {
441        match self.inner.kind {
442            SourceKind::Git(ref s) => Some(s),
443            _ => None,
444        }
445    }
446
447    /// Check if the precise data field has bean set
448    pub fn has_precise(self) -> bool {
449        self.inner.precise.is_some()
450    }
451
452    /// Check if the precise data field has bean set to "locked"
453    pub fn has_locked_precise(self) -> bool {
454        self.inner.precise == Some(Precise::Locked)
455    }
456
457    /// Check if two sources have the same precise data field
458    pub fn has_same_precise_as(self, other: Self) -> bool {
459        self.inner.precise == other.inner.precise
460    }
461
462    /// Check if the precise data field stores information for this `name`
463    /// from a call to [`SourceId::with_precise_registry_version`].
464    ///
465    /// If so return the version currently in the lock file and the version to be updated to.
466    pub fn precise_registry_version(
467        self,
468        pkg: &str,
469    ) -> Option<(&semver::Version, &semver::Version)> {
470        match &self.inner.precise {
471            Some(Precise::Updated { name, from, to }) if name == pkg => Some((from, to)),
472            _ => None,
473        }
474    }
475
476    pub fn precise_git_fragment(self) -> Option<&'static str> {
477        match &self.inner.precise {
478            Some(Precise::GitUrlFragment(s)) => Some(&s),
479            _ => None,
480        }
481    }
482
483    /// Creates a new `SourceId` from this source with the given `precise`.
484    pub fn with_git_precise(self, fragment: Option<String>) -> SourceId {
485        self.with_precise(&fragment.map(|f| Precise::GitUrlFragment(f)))
486    }
487
488    /// Creates a new `SourceId` from this source without a `precise`.
489    pub fn without_precise(self) -> SourceId {
490        self.with_precise(&None)
491    }
492
493    /// Creates a new `SourceId` from this source without a `precise`.
494    pub fn with_locked_precise(self) -> SourceId {
495        self.with_precise(&Some(Precise::Locked))
496    }
497
498    /// Creates a new `SourceId` from this source with the `precise` from some other `SourceId`.
499    pub fn with_precise_from(self, v: Self) -> SourceId {
500        self.with_precise(&v.inner.precise)
501    }
502
503    fn with_precise(self, precise: &Option<Precise>) -> SourceId {
504        if &self.inner.precise == precise {
505            self
506        } else {
507            SourceId::wrap(SourceIdInner {
508                precise: precise.clone(),
509                ..(*self.inner).clone()
510            })
511        }
512    }
513
514    /// When updating a lock file on a version using `cargo update --precise`
515    /// the requested version is stored in the precise field.
516    /// On a registry dependency we also need to keep track of the package that
517    /// should be updated and even which of the versions should be updated.
518    /// All of this gets encoded in the precise field using this method.
519    /// The data can be read with [`SourceId::precise_registry_version`]
520    pub fn with_precise_registry_version(
521        self,
522        name: InternedString,
523        version: semver::Version,
524        precise: &str,
525    ) -> CargoResult<SourceId> {
526        let precise = semver::Version::parse(precise)
527            .with_context(|| format!("invalid version format for precise version `{precise}`"))?;
528
529        Ok(SourceId::wrap(SourceIdInner {
530            precise: Some(Precise::Updated {
531                name,
532                from: version,
533                to: precise,
534            }),
535            ..(*self.inner).clone()
536        }))
537    }
538
539    /// Returns `true` if the remote registry is the standard <https://crates.io>.
540    pub fn is_crates_io(self) -> bool {
541        match self.inner.kind {
542            SourceKind::Registry | SourceKind::SparseRegistry => {}
543            _ => return false,
544        }
545        let url = self.inner.url.as_str();
546        url == CRATES_IO_INDEX || url == CRATES_IO_HTTP_INDEX || is_overridden_crates_io_url(url)
547    }
548
549    /// Hashes `self` to be used in the name of some Cargo folders, so shouldn't vary.
550    ///
551    /// For git and url, `as_str` gives the serialisation of a url (which has a spec) and so
552    /// insulates against possible changes in how the url crate does hashing.
553    ///
554    /// For paths, remove the workspace prefix so the same source will give the
555    /// same hash in different locations, helping reproducible builds.
556    pub fn stable_hash<S: hash::Hasher>(self, workspace: &Path, into: &mut S) {
557        if self.is_path() {
558            if let Ok(p) = self
559                .inner
560                .url
561                .to_file_path()
562                .unwrap()
563                .strip_prefix(workspace)
564            {
565                self.inner.kind.hash(into);
566                p.to_str().unwrap().hash(into);
567                return;
568            }
569        }
570        self.inner.kind.hash(into);
571        match self.inner.kind {
572            SourceKind::Git(_) => (&self).inner.canonical_url.hash(into),
573            _ => (&self).inner.url.as_str().hash(into),
574        }
575    }
576
577    pub fn full_eq(self, other: SourceId) -> bool {
578        ptr::eq(self.inner, other.inner)
579    }
580
581    pub fn full_hash<S: hash::Hasher>(self, into: &mut S) {
582        ptr::NonNull::from(self.inner).hash(into)
583    }
584}
585
586impl PartialEq for SourceId {
587    fn eq(&self, other: &SourceId) -> bool {
588        self.cmp(other) == Ordering::Equal
589    }
590}
591
592impl PartialOrd for SourceId {
593    fn partial_cmp(&self, other: &SourceId) -> Option<Ordering> {
594        Some(self.cmp(other))
595    }
596}
597
598// Custom comparison defined as source kind and canonical URL equality,
599// ignoring the `precise` and `name` fields.
600impl Ord for SourceId {
601    fn cmp(&self, other: &SourceId) -> Ordering {
602        // If our interior pointers are to the exact same `SourceIdInner` then
603        // we're guaranteed to be equal.
604        if ptr::eq(self.inner, other.inner) {
605            return Ordering::Equal;
606        }
607
608        // Sort first based on `kind`, deferring to the URL comparison if
609        // the kinds are equal.
610        let ord_kind = self.inner.kind.cmp(&other.inner.kind);
611        ord_kind.then_with(|| self.inner.canonical_url.cmp(&other.inner.canonical_url))
612    }
613}
614
615impl ser::Serialize for SourceId {
616    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
617    where
618        S: ser::Serializer,
619    {
620        if self.is_path() {
621            None::<String>.serialize(s)
622        } else {
623            s.collect_str(&self.as_url())
624        }
625    }
626}
627
628impl<'de> de::Deserialize<'de> for SourceId {
629    fn deserialize<D>(d: D) -> Result<SourceId, D::Error>
630    where
631        D: de::Deserializer<'de>,
632    {
633        let string = String::deserialize(d)?;
634        SourceId::from_url(&string).map_err(de::Error::custom)
635    }
636}
637
638fn url_display(url: &Url) -> String {
639    if url.scheme() == "file" {
640        if let Ok(path) = url.to_file_path() {
641            if let Some(path_str) = path.to_str() {
642                return path_str.to_string();
643            }
644        }
645    }
646
647    url.as_str().to_string()
648}
649
650impl fmt::Display for SourceId {
651    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
652        match self.inner.kind {
653            SourceKind::Git(ref reference) => {
654                // Don't replace the URL display for git references,
655                // because those are kind of expected to be URLs.
656                write!(f, "{}", self.inner.url)?;
657                if let Some(pretty) = reference.pretty_ref(true) {
658                    write!(f, "?{}", pretty)?;
659                }
660
661                if let Some(s) = &self.inner.precise {
662                    let s = s.to_string();
663                    let len = cmp::min(s.len(), 8);
664                    write!(f, "#{}", &s[..len])?;
665                }
666                Ok(())
667            }
668            SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)),
669            SourceKind::Registry | SourceKind::SparseRegistry => {
670                write!(f, "registry `{}`", self.display_registry_name())
671            }
672            SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)),
673            SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)),
674        }
675    }
676}
677
678impl Hash for SourceId {
679    fn hash<S: hash::Hasher>(&self, into: &mut S) {
680        self.inner.kind.hash(into);
681        self.inner.canonical_url.hash(into);
682    }
683}
684
685/// The hash of `SourceIdInner` is used to retrieve its interned value from
686/// `SOURCE_ID_CACHE`. We only care about fields that make `SourceIdInner`
687/// unique. Optional fields not affecting the uniqueness must be excluded,
688/// such as [`registry_key`]. That's why this is not derived.
689///
690/// [`registry_key`]: SourceIdInner::registry_key
691impl Hash for SourceIdInner {
692    fn hash<S: hash::Hasher>(&self, into: &mut S) {
693        self.kind.hash(into);
694        self.precise.hash(into);
695        self.canonical_url.hash(into);
696    }
697}
698
699/// This implementation must be synced with [`SourceIdInner::hash`].
700impl PartialEq for SourceIdInner {
701    fn eq(&self, other: &Self) -> bool {
702        self.kind == other.kind
703            && self.precise == other.precise
704            && self.canonical_url == other.canonical_url
705    }
706}
707
708/// A `Display`able view into a `SourceId` that will write it as a url
709pub struct SourceIdAsUrl<'a> {
710    inner: &'a SourceIdInner,
711    encoded: bool,
712}
713
714impl<'a> fmt::Display for SourceIdAsUrl<'a> {
715    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
716        if let Some(protocol) = self.inner.kind.protocol() {
717            write!(f, "{protocol}+")?;
718        }
719        write!(f, "{}", self.inner.url)?;
720        if let SourceIdInner {
721            kind: SourceKind::Git(ref reference),
722            ref precise,
723            ..
724        } = *self.inner
725        {
726            if let Some(pretty) = reference.pretty_ref(self.encoded) {
727                write!(f, "?{}", pretty)?;
728            }
729            if let Some(precise) = precise.as_ref() {
730                write!(f, "#{}", precise)?;
731            }
732        }
733        Ok(())
734    }
735}
736
737impl KeyOf {
738    /// Gets the underlying key.
739    fn key(&self) -> &str {
740        match self {
741            KeyOf::Registry(k) | KeyOf::Source(k) => k,
742        }
743    }
744
745    /// Gets the key if it's from an alternative registry.
746    fn alternative_registry(&self) -> Option<&str> {
747        match self {
748            KeyOf::Registry(k) => Some(k),
749            _ => None,
750        }
751    }
752}
753
754#[cfg(test)]
755mod tests {
756    use super::{GitReference, SourceId, SourceKind};
757    use crate::util::{GlobalContext, IntoUrl};
758
759    #[test]
760    fn github_sources_equal() {
761        let loc = "https://github.com/foo/bar".into_url().unwrap();
762        let default = SourceKind::Git(GitReference::DefaultBranch);
763        let s1 = SourceId::new(default.clone(), loc, None).unwrap();
764
765        let loc = "git://github.com/foo/bar".into_url().unwrap();
766        let s2 = SourceId::new(default, loc.clone(), None).unwrap();
767
768        assert_eq!(s1, s2);
769
770        let foo = SourceKind::Git(GitReference::Branch("foo".to_string()));
771        let s3 = SourceId::new(foo, loc, None).unwrap();
772        assert_ne!(s1, s3);
773    }
774
775    // This is a test that the hash of the `SourceId` for crates.io is a well-known
776    // value.
777    //
778    // Note that the hash value matches what the crates.io source id has hashed
779    // since Rust 1.84.0. We strive to keep this value the same across
780    // versions of Cargo because changing it means that users will need to
781    // redownload the index and all crates they use when using a new Cargo version.
782    //
783    // This isn't to say that this hash can *never* change, only that when changing
784    // this it should be explicitly done. If this hash changes accidentally and
785    // you're able to restore the hash to its original value, please do so!
786    // Otherwise please just leave a comment in your PR as to why the hash value is
787    // changing and why the old value can't be easily preserved.
788    // If it takes an ugly hack to restore it,
789    // then leave a link here so we can remove the hack next time we change the hash.
790    //
791    // Hacks to remove next time the hash changes:
792    // - (fill in your code here)
793    //
794    // The hash value should be stable across platforms, and doesn't depend on
795    // endianness and bit-width. One caveat is that absolute paths on Windows
796    // are inherently different than on Unix-like platforms. Unless we omit or
797    // strip the prefix components (e.g. `C:`), there is not way to have a true
798    // cross-platform stable hash for absolute paths.
799    #[test]
800    fn test_stable_hash() {
801        use std::hash::Hasher;
802        use std::path::Path;
803
804        use snapbox::assert_data_eq;
805        use snapbox::str;
806        use snapbox::IntoData as _;
807
808        use crate::util::hex::short_hash;
809        use crate::util::StableHasher;
810
811        #[cfg(not(windows))]
812        let ws_root = Path::new("/tmp/ws");
813        #[cfg(windows)]
814        let ws_root = Path::new(r"C:\\tmp\ws");
815
816        let gen_hash = |source_id: SourceId| {
817            let mut hasher = StableHasher::new();
818            source_id.stable_hash(ws_root, &mut hasher);
819            Hasher::finish(&hasher).to_string()
820        };
821
822        let source_id = SourceId::crates_io(&GlobalContext::default().unwrap()).unwrap();
823        assert_data_eq!(gen_hash(source_id), str!["7062945687441624357"].raw());
824        assert_data_eq!(short_hash(&source_id), str!["25cdd57fae9f0462"].raw());
825
826        let url = "https://my-crates.io".into_url().unwrap();
827        let source_id = SourceId::for_registry(&url).unwrap();
828        assert_data_eq!(gen_hash(source_id), str!["8310250053664888498"].raw());
829        assert_data_eq!(short_hash(&source_id), str!["b2d65deb64f05373"].raw());
830
831        let url = "https://your-crates.io".into_url().unwrap();
832        let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
833        assert_data_eq!(gen_hash(source_id), str!["14149534903000258933"].raw());
834        assert_data_eq!(short_hash(&source_id), str!["755952de063f5dc4"].raw());
835
836        let url = "sparse+https://my-crates.io".into_url().unwrap();
837        let source_id = SourceId::for_registry(&url).unwrap();
838        assert_data_eq!(gen_hash(source_id), str!["16249512552851930162"].raw());
839        assert_data_eq!(short_hash(&source_id), str!["327cfdbd92dd81e1"].raw());
840
841        let url = "sparse+https://your-crates.io".into_url().unwrap();
842        let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
843        assert_data_eq!(gen_hash(source_id), str!["6156697384053352292"].raw());
844        assert_data_eq!(short_hash(&source_id), str!["64a713b6a6fb7055"].raw());
845
846        let url = "file:///tmp/ws/crate".into_url().unwrap();
847        let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
848        assert_data_eq!(gen_hash(source_id), str!["473480029881867801"].raw());
849        assert_data_eq!(short_hash(&source_id), str!["199e591d94239206"].raw());
850
851        let path = &ws_root.join("crate");
852        let source_id = SourceId::for_local_registry(path).unwrap();
853        #[cfg(not(windows))]
854        {
855            assert_data_eq!(gen_hash(source_id), str!["11515846423845066584"].raw());
856            assert_data_eq!(short_hash(&source_id), str!["58d73c154f81d09f"].raw());
857        }
858        #[cfg(windows)]
859        {
860            assert_data_eq!(gen_hash(source_id), str!["6146331155906064276"].raw());
861            assert_data_eq!(short_hash(&source_id), str!["946fb2239f274c55"].raw());
862        }
863
864        let source_id = SourceId::for_path(path).unwrap();
865        assert_data_eq!(gen_hash(source_id), str!["215644081443634269"].raw());
866        #[cfg(not(windows))]
867        assert_data_eq!(short_hash(&source_id), str!["64bace89c92b101f"].raw());
868        #[cfg(windows)]
869        assert_data_eq!(short_hash(&source_id), str!["01e1e6c391813fb6"].raw());
870
871        let source_id = SourceId::for_directory(path).unwrap();
872        #[cfg(not(windows))]
873        {
874            assert_data_eq!(gen_hash(source_id), str!["6127590343904940368"].raw());
875            assert_data_eq!(short_hash(&source_id), str!["505191d1f3920955"].raw());
876        }
877        #[cfg(windows)]
878        {
879            assert_data_eq!(gen_hash(source_id), str!["10423446877655960172"].raw());
880            assert_data_eq!(short_hash(&source_id), str!["6c8ad69db585a790"].raw());
881        }
882    }
883
884    #[test]
885    fn serde_roundtrip() {
886        let url = "sparse+https://my-crates.io/".into_url().unwrap();
887        let source_id = SourceId::for_registry(&url).unwrap();
888        let formatted = format!("{}", source_id.as_url());
889        let deserialized = SourceId::from_url(&formatted).unwrap();
890        assert_eq!(formatted, "sparse+https://my-crates.io/");
891        assert_eq!(source_id, deserialized);
892    }
893
894    #[test]
895    fn gitrefs_roundtrip() {
896        let base = "https://host/path".into_url().unwrap();
897        let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
898        let s1 = SourceId::for_git(&base, branch).unwrap();
899        let ser1 = format!("{}", s1.as_encoded_url());
900        let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
901        let ser2 = format!("{}", s2.as_encoded_url());
902        // Serializing twice should yield the same result
903        assert_eq!(ser1, ser2, "Serialized forms don't match");
904        // SourceId serializing the same should have the same semantics
905        // This used to not be the case (# was ambiguous)
906        assert_eq!(s1, s2, "SourceId doesn't round-trip");
907        // Freeze the format to match an x-www-form-urlencoded query string
908        // https://url.spec.whatwg.org/#application/x-www-form-urlencoded
909        assert_eq!(
910            ser1,
911            "git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
912        );
913    }
914}
915
916/// Check if `url` equals to the overridden crates.io URL.
917// ALLOWED: For testing Cargo itself only.
918#[allow(clippy::disallowed_methods)]
919fn is_overridden_crates_io_url(url: &str) -> bool {
920    std::env::var("__CARGO_TEST_CRATES_IO_URL_DO_NOT_USE_THIS").map_or(false, |v| v == url)
921}