use crate::core::GitReference;
use crate::core::PackageId;
use crate::core::SourceKind;
use crate::sources::registry::CRATES_IO_HTTP_INDEX;
use crate::sources::source::Source;
use crate::sources::{DirectorySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_IO_REGISTRY};
use crate::sources::{GitSource, PathSource, RegistrySource};
use crate::util::interning::InternedString;
use crate::util::{context, CanonicalUrl, CargoResult, GlobalContext, IntoUrl};
use anyhow::Context as _;
use serde::de;
use serde::ser;
use std::cmp::{self, Ordering};
use std::collections::HashSet;
use std::fmt::{self, Formatter};
use std::hash::{self, Hash};
use std::path::{Path, PathBuf};
use std::ptr;
use std::sync::Mutex;
use std::sync::OnceLock;
use tracing::trace;
use url::Url;
static SOURCE_ID_CACHE: OnceLock<Mutex<HashSet<&'static SourceIdInner>>> = OnceLock::new();
#[derive(Clone, Copy, Eq, Debug)]
pub struct SourceId {
inner: &'static SourceIdInner,
}
#[derive(Eq, Clone, Debug)]
struct SourceIdInner {
url: Url,
canonical_url: CanonicalUrl,
kind: SourceKind,
precise: Option<Precise>,
registry_key: Option<KeyOf>,
}
#[derive(Eq, PartialEq, Clone, Debug, Hash)]
enum Precise {
Locked,
Updated {
name: InternedString,
from: semver::Version,
to: semver::Version,
},
GitUrlFragment(String),
}
impl fmt::Display for Precise {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Precise::Locked => "locked".fmt(f),
Precise::Updated { name, from, to } => {
write!(f, "{name}={from}->{to}")
}
Precise::GitUrlFragment(s) => s.fmt(f),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum KeyOf {
Registry(String),
Source(String),
}
impl SourceId {
fn new(kind: SourceKind, url: Url, key: Option<KeyOf>) -> CargoResult<SourceId> {
if kind == SourceKind::SparseRegistry {
assert!(url.as_str().starts_with("sparse+"));
}
let source_id = SourceId::wrap(SourceIdInner {
kind,
canonical_url: CanonicalUrl::new(&url)?,
url,
precise: None,
registry_key: key,
});
Ok(source_id)
}
fn wrap(inner: SourceIdInner) -> SourceId {
let mut cache = SOURCE_ID_CACHE
.get_or_init(|| Default::default())
.lock()
.unwrap();
let inner = cache.get(&inner).cloned().unwrap_or_else(|| {
let inner = Box::leak(Box::new(inner));
cache.insert(inner);
inner
});
SourceId { inner }
}
fn remote_source_kind(url: &Url) -> SourceKind {
if url.as_str().starts_with("sparse+") {
SourceKind::SparseRegistry
} else {
SourceKind::Registry
}
}
pub fn from_url(string: &str) -> CargoResult<SourceId> {
let (kind, url) = string
.split_once('+')
.ok_or_else(|| anyhow::format_err!("invalid source `{}`", string))?;
match kind {
"git" => {
let mut url = url.into_url()?;
let reference = GitReference::from_query(url.query_pairs());
let precise = url.fragment().map(|s| s.to_owned());
url.set_fragment(None);
url.set_query(None);
Ok(SourceId::for_git(&url, reference)?.with_git_precise(precise))
}
"registry" => {
let url = url.into_url()?;
Ok(SourceId::new(SourceKind::Registry, url, None)?.with_locked_precise())
}
"sparse" => {
let url = string.into_url()?;
Ok(SourceId::new(SourceKind::SparseRegistry, url, None)?.with_locked_precise())
}
"path" => {
let url = url.into_url()?;
SourceId::new(SourceKind::Path, url, None)
}
kind => Err(anyhow::format_err!("unsupported source protocol: {}", kind)),
}
}
pub fn as_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: false,
}
}
pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: true,
}
}
pub fn for_path(path: &Path) -> CargoResult<SourceId> {
let url = path.into_url()?;
SourceId::new(SourceKind::Path, url, None)
}
pub fn for_git(url: &Url, reference: GitReference) -> CargoResult<SourceId> {
SourceId::new(SourceKind::Git(reference), url.clone(), None)
}
pub fn for_registry(url: &Url) -> CargoResult<SourceId> {
let kind = Self::remote_source_kind(url);
SourceId::new(kind, url.to_owned(), None)
}
pub fn for_alt_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
let kind = Self::remote_source_kind(url);
let key = KeyOf::Registry(key.into());
SourceId::new(kind, url.to_owned(), Some(key))
}
pub fn for_source_replacement_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
let kind = Self::remote_source_kind(url);
let key = KeyOf::Source(key.into());
SourceId::new(kind, url.to_owned(), Some(key))
}
pub fn for_local_registry(path: &Path) -> CargoResult<SourceId> {
let url = path.into_url()?;
SourceId::new(SourceKind::LocalRegistry, url, None)
}
pub fn for_directory(path: &Path) -> CargoResult<SourceId> {
let url = path.into_url()?;
SourceId::new(SourceKind::Directory, url, None)
}
pub fn crates_io(gctx: &GlobalContext) -> CargoResult<SourceId> {
gctx.crates_io_source_id()
}
pub fn crates_io_maybe_sparse_http(gctx: &GlobalContext) -> CargoResult<SourceId> {
if Self::crates_io_is_sparse(gctx)? {
gctx.check_registry_index_not_set()?;
let url = CRATES_IO_HTTP_INDEX.into_url().unwrap();
let key = KeyOf::Registry(CRATES_IO_REGISTRY.into());
SourceId::new(SourceKind::SparseRegistry, url, Some(key))
} else {
Self::crates_io(gctx)
}
}
pub fn crates_io_is_sparse(gctx: &GlobalContext) -> CargoResult<bool> {
let proto: Option<context::Value<String>> = gctx.get("registries.crates-io.protocol")?;
let is_sparse = match proto.as_ref().map(|v| v.val.as_str()) {
Some("sparse") => true,
Some("git") => false,
Some(unknown) => anyhow::bail!(
"unsupported registry protocol `{unknown}` (defined in {})",
proto.as_ref().unwrap().definition
),
None => true,
};
Ok(is_sparse)
}
pub fn alt_registry(gctx: &GlobalContext, key: &str) -> CargoResult<SourceId> {
if key == CRATES_IO_REGISTRY {
return Self::crates_io(gctx);
}
let url = gctx.get_registry_index(key)?;
Self::for_alt_registry(&url, key)
}
pub fn url(&self) -> &Url {
&self.inner.url
}
pub fn canonical_url(&self) -> &CanonicalUrl {
&self.inner.canonical_url
}
pub fn display_index(self) -> String {
if self.is_crates_io() {
format!("{} index", CRATES_IO_DOMAIN)
} else {
format!("`{}` index", self.display_registry_name())
}
}
pub fn display_registry_name(self) -> String {
if let Some(key) = self.inner.registry_key.as_ref().map(|k| k.key()) {
key.into()
} else if self.has_precise() {
self.without_precise().display_registry_name()
} else {
url_display(self.url())
}
}
pub fn alt_registry_key(&self) -> Option<&str> {
self.inner.registry_key.as_ref()?.alternative_registry()
}
pub fn is_path(self) -> bool {
self.inner.kind == SourceKind::Path
}
pub fn local_path(self) -> Option<PathBuf> {
if self.inner.kind != SourceKind::Path {
return None;
}
Some(self.inner.url.to_file_path().unwrap())
}
pub fn kind(&self) -> &SourceKind {
&self.inner.kind
}
pub fn is_registry(self) -> bool {
matches!(
self.inner.kind,
SourceKind::Registry | SourceKind::SparseRegistry | SourceKind::LocalRegistry
)
}
pub fn is_sparse(self) -> bool {
matches!(self.inner.kind, SourceKind::SparseRegistry)
}
pub fn is_remote_registry(self) -> bool {
matches!(
self.inner.kind,
SourceKind::Registry | SourceKind::SparseRegistry
)
}
pub fn is_git(self) -> bool {
matches!(self.inner.kind, SourceKind::Git(_))
}
pub fn load<'a>(
self,
gctx: &'a GlobalContext,
yanked_whitelist: &HashSet<PackageId>,
) -> CargoResult<Box<dyn Source + 'a>> {
trace!("loading SourceId; {}", self);
match self.inner.kind {
SourceKind::Git(..) => Ok(Box::new(GitSource::new(self, gctx)?)),
SourceKind::Path => {
let path = self
.inner
.url
.to_file_path()
.expect("path sources cannot be remote");
Ok(Box::new(PathSource::new(&path, self, gctx)))
}
SourceKind::Registry | SourceKind::SparseRegistry => Ok(Box::new(
RegistrySource::remote(self, yanked_whitelist, gctx)?,
)),
SourceKind::LocalRegistry => {
let path = self
.inner
.url
.to_file_path()
.expect("path sources cannot be remote");
Ok(Box::new(RegistrySource::local(
self,
&path,
yanked_whitelist,
gctx,
)))
}
SourceKind::Directory => {
let path = self
.inner
.url
.to_file_path()
.expect("path sources cannot be remote");
Ok(Box::new(DirectorySource::new(&path, self, gctx)))
}
}
}
pub fn git_reference(self) -> Option<&'static GitReference> {
match self.inner.kind {
SourceKind::Git(ref s) => Some(s),
_ => None,
}
}
pub fn has_precise(self) -> bool {
self.inner.precise.is_some()
}
pub fn has_locked_precise(self) -> bool {
self.inner.precise == Some(Precise::Locked)
}
pub fn has_same_precise_as(self, other: Self) -> bool {
self.inner.precise == other.inner.precise
}
pub fn precise_registry_version(
self,
pkg: &str,
) -> Option<(&semver::Version, &semver::Version)> {
match &self.inner.precise {
Some(Precise::Updated { name, from, to }) if name == pkg => Some((from, to)),
_ => None,
}
}
pub fn precise_git_fragment(self) -> Option<&'static str> {
match &self.inner.precise {
Some(Precise::GitUrlFragment(s)) => Some(&s),
_ => None,
}
}
pub fn with_git_precise(self, fragment: Option<String>) -> SourceId {
self.with_precise(&fragment.map(|f| Precise::GitUrlFragment(f)))
}
pub fn without_precise(self) -> SourceId {
self.with_precise(&None)
}
pub fn with_locked_precise(self) -> SourceId {
self.with_precise(&Some(Precise::Locked))
}
pub fn with_precise_from(self, v: Self) -> SourceId {
self.with_precise(&v.inner.precise)
}
fn with_precise(self, precise: &Option<Precise>) -> SourceId {
if &self.inner.precise == precise {
self
} else {
SourceId::wrap(SourceIdInner {
precise: precise.clone(),
..(*self.inner).clone()
})
}
}
pub fn with_precise_registry_version(
self,
name: InternedString,
version: semver::Version,
precise: &str,
) -> CargoResult<SourceId> {
let precise = semver::Version::parse(precise)
.with_context(|| format!("invalid version format for precise version `{precise}`"))?;
Ok(SourceId::wrap(SourceIdInner {
precise: Some(Precise::Updated {
name,
from: version,
to: precise,
}),
..(*self.inner).clone()
}))
}
pub fn is_crates_io(self) -> bool {
match self.inner.kind {
SourceKind::Registry | SourceKind::SparseRegistry => {}
_ => return false,
}
let url = self.inner.url.as_str();
url == CRATES_IO_INDEX || url == CRATES_IO_HTTP_INDEX || is_overridden_crates_io_url(url)
}
pub fn stable_hash<S: hash::Hasher>(self, workspace: &Path, into: &mut S) {
if self.is_path() {
if let Ok(p) = self
.inner
.url
.to_file_path()
.unwrap()
.strip_prefix(workspace)
{
self.inner.kind.hash(into);
p.to_str().unwrap().hash(into);
return;
}
}
self.hash(into)
}
pub fn full_eq(self, other: SourceId) -> bool {
ptr::eq(self.inner, other.inner)
}
pub fn full_hash<S: hash::Hasher>(self, into: &mut S) {
ptr::NonNull::from(self.inner).hash(into)
}
}
impl PartialEq for SourceId {
fn eq(&self, other: &SourceId) -> bool {
self.cmp(other) == Ordering::Equal
}
}
impl PartialOrd for SourceId {
fn partial_cmp(&self, other: &SourceId) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for SourceId {
fn cmp(&self, other: &SourceId) -> Ordering {
if ptr::eq(self.inner, other.inner) {
return Ordering::Equal;
}
match self.inner.kind.cmp(&other.inner.kind) {
Ordering::Equal => {}
other => return other,
}
match (&self.inner.kind, &other.inner.kind) {
(SourceKind::Git(_), SourceKind::Git(_)) => {
self.inner.canonical_url.cmp(&other.inner.canonical_url)
}
_ => self.inner.url.cmp(&other.inner.url),
}
}
}
impl ser::Serialize for SourceId {
fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
where
S: ser::Serializer,
{
if self.is_path() {
None::<String>.serialize(s)
} else {
s.collect_str(&self.as_url())
}
}
}
impl<'de> de::Deserialize<'de> for SourceId {
fn deserialize<D>(d: D) -> Result<SourceId, D::Error>
where
D: de::Deserializer<'de>,
{
let string = String::deserialize(d)?;
SourceId::from_url(&string).map_err(de::Error::custom)
}
}
fn url_display(url: &Url) -> String {
if url.scheme() == "file" {
if let Ok(path) = url.to_file_path() {
if let Some(path_str) = path.to_str() {
return path_str.to_string();
}
}
}
url.as_str().to_string()
}
impl fmt::Display for SourceId {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self.inner.kind {
SourceKind::Git(ref reference) => {
write!(f, "{}", self.inner.url)?;
if let Some(pretty) = reference.pretty_ref(true) {
write!(f, "?{}", pretty)?;
}
if let Some(s) = &self.inner.precise {
let s = s.to_string();
let len = cmp::min(s.len(), 8);
write!(f, "#{}", &s[..len])?;
}
Ok(())
}
SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)),
SourceKind::Registry | SourceKind::SparseRegistry => {
write!(f, "registry `{}`", self.display_registry_name())
}
SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)),
SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)),
}
}
}
impl Hash for SourceId {
fn hash<S: hash::Hasher>(&self, into: &mut S) {
self.inner.kind.hash(into);
match self.inner.kind {
SourceKind::Git(_) => self.inner.canonical_url.hash(into),
_ => self.inner.url.as_str().hash(into),
}
}
}
impl Hash for SourceIdInner {
fn hash<S: hash::Hasher>(&self, into: &mut S) {
self.kind.hash(into);
self.precise.hash(into);
self.canonical_url.hash(into);
}
}
impl PartialEq for SourceIdInner {
fn eq(&self, other: &Self) -> bool {
self.kind == other.kind
&& self.precise == other.precise
&& self.canonical_url == other.canonical_url
}
}
pub struct SourceIdAsUrl<'a> {
inner: &'a SourceIdInner,
encoded: bool,
}
impl<'a> fmt::Display for SourceIdAsUrl<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(protocol) = self.inner.kind.protocol() {
write!(f, "{protocol}+")?;
}
write!(f, "{}", self.inner.url)?;
if let SourceIdInner {
kind: SourceKind::Git(ref reference),
ref precise,
..
} = *self.inner
{
if let Some(pretty) = reference.pretty_ref(self.encoded) {
write!(f, "?{}", pretty)?;
}
if let Some(precise) = precise.as_ref() {
write!(f, "#{}", precise)?;
}
}
Ok(())
}
}
impl KeyOf {
fn key(&self) -> &str {
match self {
KeyOf::Registry(k) | KeyOf::Source(k) => k,
}
}
fn alternative_registry(&self) -> Option<&str> {
match self {
KeyOf::Registry(k) => Some(k),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::{GitReference, SourceId, SourceKind};
use crate::util::{GlobalContext, IntoUrl};
#[test]
fn github_sources_equal() {
let loc = "https://github.com/foo/bar".into_url().unwrap();
let default = SourceKind::Git(GitReference::DefaultBranch);
let s1 = SourceId::new(default.clone(), loc, None).unwrap();
let loc = "git://github.com/foo/bar".into_url().unwrap();
let s2 = SourceId::new(default, loc.clone(), None).unwrap();
assert_eq!(s1, s2);
let foo = SourceKind::Git(GitReference::Branch("foo".to_string()));
let s3 = SourceId::new(foo, loc, None).unwrap();
assert_ne!(s1, s3);
}
#[test]
fn test_stable_hash() {
use std::hash::Hasher;
use std::path::Path;
use crate::util::StableHasher;
#[cfg(not(windows))]
let ws_root = Path::new("/tmp/ws");
#[cfg(windows)]
let ws_root = Path::new(r"C:\\tmp\ws");
let gen_hash = |source_id: SourceId| {
let mut hasher = StableHasher::new();
source_id.stable_hash(ws_root, &mut hasher);
Hasher::finish(&hasher)
};
let source_id = SourceId::crates_io(&GlobalContext::default().unwrap()).unwrap();
assert_eq!(gen_hash(source_id), 7062945687441624357);
assert_eq!(crate::util::hex::short_hash(&source_id), "25cdd57fae9f0462");
let url = "https://my-crates.io".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
assert_eq!(gen_hash(source_id), 8310250053664888498);
assert_eq!(crate::util::hex::short_hash(&source_id), "b2d65deb64f05373");
let url = "https://your-crates.io".into_url().unwrap();
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
assert_eq!(gen_hash(source_id), 14149534903000258933);
assert_eq!(crate::util::hex::short_hash(&source_id), "755952de063f5dc4");
let url = "sparse+https://my-crates.io".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
assert_eq!(gen_hash(source_id), 16249512552851930162);
assert_eq!(crate::util::hex::short_hash(&source_id), "327cfdbd92dd81e1");
let url = "sparse+https://your-crates.io".into_url().unwrap();
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
assert_eq!(gen_hash(source_id), 6156697384053352292);
assert_eq!(crate::util::hex::short_hash(&source_id), "64a713b6a6fb7055");
let url = "file:///tmp/ws/crate".into_url().unwrap();
let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
assert_eq!(gen_hash(source_id), 473480029881867801);
assert_eq!(crate::util::hex::short_hash(&source_id), "199e591d94239206");
let path = &ws_root.join("crate");
let source_id = SourceId::for_local_registry(path).unwrap();
#[cfg(not(windows))]
{
assert_eq!(gen_hash(source_id), 11515846423845066584);
assert_eq!(crate::util::hex::short_hash(&source_id), "58d73c154f81d09f");
}
#[cfg(windows)]
{
assert_eq!(gen_hash(source_id), 6146331155906064276);
assert_eq!(crate::util::hex::short_hash(&source_id), "946fb2239f274c55");
}
let source_id = SourceId::for_path(path).unwrap();
assert_eq!(gen_hash(source_id), 215644081443634269);
#[cfg(not(windows))]
assert_eq!(crate::util::hex::short_hash(&source_id), "64bace89c92b101f");
#[cfg(windows)]
assert_eq!(crate::util::hex::short_hash(&source_id), "01e1e6c391813fb6");
let source_id = SourceId::for_directory(path).unwrap();
#[cfg(not(windows))]
{
assert_eq!(gen_hash(source_id), 6127590343904940368);
assert_eq!(crate::util::hex::short_hash(&source_id), "505191d1f3920955");
}
#[cfg(windows)]
{
assert_eq!(gen_hash(source_id), 10423446877655960172);
assert_eq!(crate::util::hex::short_hash(&source_id), "6c8ad69db585a790");
}
}
#[test]
fn serde_roundtrip() {
let url = "sparse+https://my-crates.io/".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
let formatted = format!("{}", source_id.as_url());
let deserialized = SourceId::from_url(&formatted).unwrap();
assert_eq!(formatted, "sparse+https://my-crates.io/");
assert_eq!(source_id, deserialized);
}
#[test]
fn gitrefs_roundtrip() {
let base = "https://host/path".into_url().unwrap();
let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
let s1 = SourceId::for_git(&base, branch).unwrap();
let ser1 = format!("{}", s1.as_encoded_url());
let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
let ser2 = format!("{}", s2.as_encoded_url());
assert_eq!(ser1, ser2, "Serialized forms don't match");
assert_eq!(s1, s2, "SourceId doesn't round-trip");
assert_eq!(
ser1,
"git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
);
}
}
#[allow(clippy::disallowed_methods)]
fn is_overridden_crates_io_url(url: &str) -> bool {
std::env::var("__CARGO_TEST_CRATES_IO_URL_DO_NOT_USE_THIS").map_or(false, |v| v == url)
}