cargo/util/canonical_url.rs
1use crate::util::errors::CargoResult;
2use std::hash::{self, Hash};
3use url::Url;
4
5/// A newtype wrapper around `Url` which represents a "canonical" version of an
6/// original URL.
7///
8/// A "canonical" url is only intended for internal comparison purposes in
9/// Cargo. It's to help paper over mistakes such as depending on
10/// `github.com/foo/bar` vs `github.com/foo/bar.git`. This is **only** for
11/// internal purposes within Cargo and provides no means to actually read the
12/// underlying string value of the `Url` it contains. This is intentional,
13/// because all fetching should still happen within the context of the original
14/// URL.
15#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
16pub struct CanonicalUrl(Url);
17
18impl CanonicalUrl {
19 pub fn new(url: &Url) -> CargoResult<CanonicalUrl> {
20 let mut url = url.clone();
21
22 // cannot-be-a-base-urls (e.g., `github.com:rust-lang/rustfmt.git`)
23 // are not supported.
24 if url.cannot_be_a_base() {
25 anyhow::bail!(
26 "invalid url `{}`: cannot-be-a-base-URLs are not supported",
27 url
28 )
29 }
30
31 // Strip a trailing slash.
32 if url.path().ends_with('/') {
33 url.path_segments_mut().unwrap().pop_if_empty();
34 }
35
36 // For GitHub URLs specifically, just lower-case everything. GitHub
37 // treats both the same, but they hash differently, and we're gonna be
38 // hashing them. This wants a more general solution, and also we're
39 // almost certainly not using the same case conversion rules that GitHub
40 // does. (See issue #84)
41 if url.host_str() == Some("github.com") {
42 url = format!("https{}", &url[url::Position::AfterScheme..])
43 .parse()
44 .unwrap();
45 let path = url.path().to_lowercase();
46 url.set_path(&path);
47 }
48
49 // Repos can generally be accessed with or without `.git` extension.
50 let needs_chopping = url.path().ends_with(".git");
51 if needs_chopping {
52 let last = {
53 let last = url.path_segments().unwrap().next_back().unwrap();
54 last[..last.len() - 4].to_owned()
55 };
56 url.path_segments_mut().unwrap().pop().push(&last);
57 }
58
59 Ok(CanonicalUrl(url))
60 }
61
62 /// Returns the raw canonicalized URL, although beware that this should
63 /// never be used/displayed/etc, it should only be used for internal data
64 /// structures and hashes and such.
65 pub fn raw_canonicalized_url(&self) -> &Url {
66 &self.0
67 }
68}
69
70// See comment in `source_id.rs` for why we explicitly use `as_str()` here.
71impl Hash for CanonicalUrl {
72 fn hash<S: hash::Hasher>(&self, into: &mut S) {
73 self.0.as_str().hash(into);
74 }
75}