cargo/util/
canonical_url.rs

1use crate::util::errors::CargoResult;
2use std::hash::{self, Hash};
3use url::Url;
4
5/// A newtype wrapper around `Url` which represents a "canonical" version of an
6/// original URL.
7///
8/// A "canonical" url is only intended for internal comparison purposes in
9/// Cargo. It's to help paper over mistakes such as depending on
10/// `github.com/foo/bar` vs `github.com/foo/bar.git`. This is **only** for
11/// internal purposes within Cargo and provides no means to actually read the
12/// underlying string value of the `Url` it contains. This is intentional,
13/// because all fetching should still happen within the context of the original
14/// URL.
15#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
16pub struct CanonicalUrl(Url);
17
18impl CanonicalUrl {
19    pub fn new(url: &Url) -> CargoResult<CanonicalUrl> {
20        let mut url = url.clone();
21
22        // cannot-be-a-base-urls (e.g., `github.com:rust-lang/rustfmt.git`)
23        // are not supported.
24        if url.cannot_be_a_base() {
25            anyhow::bail!(
26                "invalid url `{}`: cannot-be-a-base-URLs are not supported",
27                url
28            )
29        }
30
31        // Strip a trailing slash.
32        if url.path().ends_with('/') {
33            url.path_segments_mut().unwrap().pop_if_empty();
34        }
35
36        // For GitHub URLs specifically, just lower-case everything. GitHub
37        // treats both the same, but they hash differently, and we're gonna be
38        // hashing them. This wants a more general solution, and also we're
39        // almost certainly not using the same case conversion rules that GitHub
40        // does. (See issue #84)
41        if url.host_str() == Some("github.com") {
42            url = format!("https{}", &url[url::Position::AfterScheme..])
43                .parse()
44                .unwrap();
45            let path = url.path().to_lowercase();
46            url.set_path(&path);
47        }
48
49        // Repos can generally be accessed with or without `.git` extension.
50        let needs_chopping = url.path().ends_with(".git");
51        if needs_chopping {
52            let last = {
53                let last = url.path_segments().unwrap().next_back().unwrap();
54                last[..last.len() - 4].to_owned()
55            };
56            url.path_segments_mut().unwrap().pop().push(&last);
57        }
58
59        Ok(CanonicalUrl(url))
60    }
61
62    /// Returns the raw canonicalized URL, although beware that this should
63    /// never be used/displayed/etc, it should only be used for internal data
64    /// structures and hashes and such.
65    pub fn raw_canonicalized_url(&self) -> &Url {
66        &self.0
67    }
68}
69
70// See comment in `source_id.rs` for why we explicitly use `as_str()` here.
71impl Hash for CanonicalUrl {
72    fn hash<S: hash::Hasher>(&self, into: &mut S) {
73        self.0.as_str().hash(into);
74    }
75}