cargo/util/canonical_url.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
use crate::util::errors::CargoResult;
use std::hash::{self, Hash};
use url::Url;
/// A newtype wrapper around `Url` which represents a "canonical" version of an
/// original URL.
///
/// A "canonical" url is only intended for internal comparison purposes in
/// Cargo. It's to help paper over mistakes such as depending on
/// `github.com/foo/bar` vs `github.com/foo/bar.git`. This is **only** for
/// internal purposes within Cargo and provides no means to actually read the
/// underlying string value of the `Url` it contains. This is intentional,
/// because all fetching should still happen within the context of the original
/// URL.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct CanonicalUrl(Url);
impl CanonicalUrl {
pub fn new(url: &Url) -> CargoResult<CanonicalUrl> {
let mut url = url.clone();
// cannot-be-a-base-urls (e.g., `github.com:rust-lang/rustfmt.git`)
// are not supported.
if url.cannot_be_a_base() {
anyhow::bail!(
"invalid url `{}`: cannot-be-a-base-URLs are not supported",
url
)
}
// Strip a trailing slash.
if url.path().ends_with('/') {
url.path_segments_mut().unwrap().pop_if_empty();
}
// For GitHub URLs specifically, just lower-case everything. GitHub
// treats both the same, but they hash differently, and we're gonna be
// hashing them. This wants a more general solution, and also we're
// almost certainly not using the same case conversion rules that GitHub
// does. (See issue #84)
if url.host_str() == Some("github.com") {
url = format!("https{}", &url[url::Position::AfterScheme..])
.parse()
.unwrap();
let path = url.path().to_lowercase();
url.set_path(&path);
}
// Repos can generally be accessed with or without `.git` extension.
let needs_chopping = url.path().ends_with(".git");
if needs_chopping {
let last = {
let last = url.path_segments().unwrap().next_back().unwrap();
last[..last.len() - 4].to_owned()
};
url.path_segments_mut().unwrap().pop().push(&last);
}
Ok(CanonicalUrl(url))
}
/// Returns the raw canonicalized URL, although beware that this should
/// never be used/displayed/etc, it should only be used for internal data
/// structures and hashes and such.
pub fn raw_canonicalized_url(&self) -> &Url {
&self.0
}
}
// See comment in `source_id.rs` for why we explicitly use `as_str()` here.
impl Hash for CanonicalUrl {
fn hash<S: hash::Hasher>(&self, into: &mut S) {
self.0.as_str().hash(into);
}
}