Skip to main content

cargo/sources/git/
utils.rs

1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, SourceId, Verbosity};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::sources::git::source::GitSource;
9use crate::sources::source::Source as _;
10use crate::util::HumanBytes;
11use crate::util::errors::{CargoResult, GitCliError};
12use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
13
14use anyhow::{Context as _, anyhow};
15use cargo_util::{ProcessBuilder, paths};
16use curl::easy::List;
17use git2::{ErrorClass, ObjectType, Oid};
18use tracing::{debug, info};
19use url::Url;
20
21use std::borrow::Cow;
22use std::path::{Path, PathBuf};
23use std::process::Command;
24use std::str;
25use std::sync::atomic::{AtomicBool, Ordering};
26use std::time::{Duration, Instant};
27
28/// A file indicates that if present, `git reset` has been done and a repo
29/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
30const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
31
32/// A short abbreviated OID.
33///
34/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
35pub struct GitShortID(git2::Buf);
36
37impl GitShortID {
38    /// Views the short ID as a `str`.
39    pub fn as_str(&self) -> &str {
40        self.0.as_str().unwrap()
41    }
42}
43
44/// A remote repository. It gets cloned into a local [`GitDatabase`].
45#[derive(PartialEq, Clone, Debug)]
46pub struct GitRemote {
47    /// URL to a remote repository.
48    url: Url,
49}
50
51/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
52/// can be cloned from a single [`GitDatabase`].
53pub struct GitDatabase {
54    /// The remote repository where this database is fetched from.
55    remote: GitRemote,
56    /// Path to the root of the underlying Git repository on the local filesystem.
57    path: PathBuf,
58    /// Underlying Git repository instance for this database.
59    repo: git2::Repository,
60}
61
62/// A local checkout of a particular revision from a [`GitDatabase`].
63pub struct GitCheckout<'a> {
64    /// The git database where this checkout is cloned from.
65    database: &'a GitDatabase,
66    /// Path to the root of the underlying Git repository on the local filesystem.
67    path: PathBuf,
68    /// The git revision this checkout is for.
69    revision: git2::Oid,
70    /// Underlying Git repository instance for this checkout.
71    repo: git2::Repository,
72}
73
74impl GitRemote {
75    /// Creates an instance for a remote repository URL.
76    pub fn new(url: &Url) -> GitRemote {
77        GitRemote { url: url.clone() }
78    }
79
80    /// Gets the remote repository URL.
81    pub fn url(&self) -> &Url {
82        &self.url
83    }
84
85    /// Fetches and checkouts to a reference or a revision from this remote
86    /// into a local path.
87    ///
88    /// This ensures that it gets the up-to-date commit when a named reference
89    /// is given (tag, branch, refs/*). Thus, network connection is involved.
90    ///
91    /// If we have a previous instance of [`GitDatabase`] then fetch into that
92    /// if we can. If that can successfully load our revision then we've
93    /// populated the database with the latest version of `reference`, so
94    /// return that database and the rev we resolve to.
95    pub fn checkout(
96        &self,
97        into: &Path,
98        db: Option<GitDatabase>,
99        reference: &GitReference,
100        gctx: &GlobalContext,
101    ) -> CargoResult<(GitDatabase, git2::Oid)> {
102        if let Some(mut db) = db {
103            fetch(
104                &mut db.repo,
105                self.url.as_str(),
106                reference,
107                gctx,
108                RemoteKind::GitDependency,
109            )
110            .with_context(|| format!("failed to fetch into: {}", into.display()))?;
111
112            if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
113                return Ok((db, rev));
114            }
115        }
116
117        // Otherwise start from scratch to handle corrupt git repositories.
118        // After our fetch (which is interpreted as a clone now) we do the same
119        // resolution to figure out what we cloned.
120        if into.exists() {
121            paths::remove_dir_all(into)?;
122        }
123        paths::create_dir_all(into)?;
124        let mut repo = init(into, true)?;
125        fetch(
126            &mut repo,
127            self.url.as_str(),
128            reference,
129            gctx,
130            RemoteKind::GitDependency,
131        )
132        .with_context(|| format!("failed to clone into: {}", into.display()))?;
133        let rev = resolve_ref(reference, &repo)?;
134
135        Ok((
136            GitDatabase {
137                remote: self.clone(),
138                path: into.to_path_buf(),
139                repo,
140            },
141            rev,
142        ))
143    }
144
145    /// Creates a [`GitDatabase`] of this remote at `db_path`.
146    pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
147        let repo = git2::Repository::open(db_path)?;
148        Ok(GitDatabase {
149            remote: self.clone(),
150            path: db_path.to_path_buf(),
151            repo,
152        })
153    }
154}
155
156impl GitDatabase {
157    /// Checkouts to a revision at `dest`ination from this database.
158    #[tracing::instrument(skip(self, gctx))]
159    pub fn copy_to(
160        &self,
161        rev: git2::Oid,
162        dest: &Path,
163        gctx: &GlobalContext,
164        quiet: bool,
165    ) -> CargoResult<GitCheckout<'_>> {
166        // If the existing checkout exists, and it is fresh, use it.
167        // A non-fresh checkout can happen if the checkout operation was
168        // interrupted. In that case, the checkout gets deleted and a new
169        // clone is created.
170        let checkout = match git2::Repository::open(dest)
171            .ok()
172            .map(|repo| GitCheckout::new(self, rev, repo))
173            .filter(|co| co.is_fresh())
174        {
175            Some(co) => co,
176            None => {
177                let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
178                checkout.update_submodules(gctx, quiet)?;
179                guard.mark_ok()?;
180                checkout
181            }
182        };
183
184        Ok(checkout)
185    }
186
187    /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
188    pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
189        let obj = self.repo.find_object(revision, None)?;
190        Ok(GitShortID(obj.short_id()?))
191    }
192
193    /// Checks if the database contains the object of this `oid`..
194    pub fn contains(&self, oid: git2::Oid) -> bool {
195        self.repo.revparse_single(&oid.to_string()).is_ok()
196    }
197
198    /// [`resolve_ref`]s this reference with this database.
199    pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
200        resolve_ref(r, &self.repo)
201    }
202}
203
204/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
205pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
206    let id = match gitref {
207        // Note that we resolve the named tag here in sync with where it's
208        // fetched into via `fetch` below.
209        GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
210            let refname = format!("refs/remotes/origin/tags/{}", s);
211            let id = repo.refname_to_id(&refname)?;
212            let obj = repo.find_object(id, None)?;
213            let obj = obj.peel(ObjectType::Commit)?;
214            Ok(obj.id())
215        })()
216        .with_context(|| format!("failed to find tag `{}`", s))?,
217
218        // Resolve the remote name since that's all we're configuring in
219        // `fetch` below.
220        GitReference::Branch(s) => {
221            let name = format!("origin/{}", s);
222            let b = repo
223                .find_branch(&name, git2::BranchType::Remote)
224                .with_context(|| format!("failed to find branch `{}`", s))?;
225            b.get()
226                .target()
227                .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
228        }
229
230        // We'll be using the HEAD commit
231        GitReference::DefaultBranch => {
232            let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
233            let head = repo.find_object(head_id, None)?;
234            head.peel(ObjectType::Commit)?.id()
235        }
236
237        GitReference::Rev(s) => {
238            let obj = repo.revparse_single(s)?;
239            match obj.as_tag() {
240                Some(tag) => tag.target_id(),
241                None => obj.id(),
242            }
243        }
244    };
245    Ok(id)
246}
247
248impl<'a> GitCheckout<'a> {
249    /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
250    /// is done. Use [`GitCheckout::is_fresh`] to check.
251    ///
252    /// * The `database` is where this checkout is from.
253    /// * The `repo` will be the checked out Git repository.
254    fn new(
255        database: &'a GitDatabase,
256        revision: git2::Oid,
257        repo: git2::Repository,
258    ) -> GitCheckout<'a> {
259        let path = repo.workdir().unwrap_or_else(|| repo.path());
260        GitCheckout {
261            path: path.to_path_buf(),
262            database,
263            revision,
264            repo,
265        }
266    }
267
268    /// Gets the remote repository URL.
269    fn remote_url(&self) -> &Url {
270        &self.database.remote.url()
271    }
272
273    /// Clone a repo for a `revision` into a local path from a `database`.
274    /// This is a filesystem-to-filesystem clone.
275    fn clone_into(
276        into: &Path,
277        database: &'a GitDatabase,
278        revision: git2::Oid,
279        gctx: &GlobalContext,
280    ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
281        let dirname = into.parent().unwrap();
282        paths::create_dir_all(&dirname)?;
283        if into.exists() {
284            paths::remove_dir_all(into)?;
285        }
286
287        // we're doing a local filesystem-to-filesystem clone so there should
288        // be no need to respect global configuration options, so pass in
289        // an empty instance of `git2::Config` below.
290        let git_config = git2::Config::new()?;
291
292        // Clone the repository, but make sure we use the "local" option in
293        // libgit2 which will attempt to use hardlinks to set up the database.
294        // This should speed up the clone operation quite a bit if it works.
295        //
296        // Note that we still use the same fetch options because while we don't
297        // need authentication information we may want progress bars and such.
298        let url = database.path.into_url()?;
299        let mut repo = None;
300        with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
301            let mut checkout = git2::build::CheckoutBuilder::new();
302            checkout.dry_run(); // we'll do this below during a `reset`
303
304            let r = git2::build::RepoBuilder::new()
305                // use hard links and/or copy the database, we're doing a
306                // filesystem clone so this'll speed things up quite a bit.
307                .clone_local(git2::build::CloneLocal::Local)
308                .with_checkout(checkout)
309                .fetch_options(fopts)
310                .clone(url.as_str(), into)?;
311            // `git2` doesn't seem to handle shallow repos correctly when doing
312            // a local clone. Fortunately all that's needed is the copy of the
313            // one file that defines the shallow boundary, the commits which
314            // have their parents omitted as part of the shallow clone.
315            //
316            // TODO(git2): remove this when git2 supports shallow clone correctly
317            if database.repo.is_shallow() {
318                std::fs::copy(
319                    database.repo.path().join("shallow"),
320                    r.path().join("shallow"),
321                )?;
322            }
323            repo = Some(r);
324            Ok(())
325        })?;
326        let repo = repo.unwrap();
327
328        let checkout = GitCheckout::new(database, revision, repo);
329        let guard = checkout.reset(gctx)?;
330        Ok((checkout, guard))
331    }
332
333    /// Checks if the `HEAD` of this checkout points to the expected revision.
334    fn is_fresh(&self) -> bool {
335        match self.repo.revparse_single("HEAD") {
336            Ok(ref head) if head.id() == self.revision => {
337                // See comments in reset() for why we check this
338                self.path.join(CHECKOUT_READY_LOCK).exists()
339            }
340            _ => false,
341        }
342    }
343
344    /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
345    /// revision of this checkout, with additional interrupt protection by a
346    /// dummy file [`CHECKOUT_READY_LOCK`].
347    ///
348    /// If we're interrupted while performing a `git reset` (e.g., we die
349    /// because of a signal) Cargo needs to be sure to try to check out this
350    /// repo again on the next go-round.
351    ///
352    /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
353    /// which if present means that the repo has been successfully reset and is
354    /// ready to go. Hence if we start to do a reset, we make sure this file
355    /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
356    /// (e.g. submodule update) before marking the check-out as ready.
357    ///
358    /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
359    fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
360        let guard = CheckoutGuard::guard(&self.path);
361        info!("reset {} to {}", self.repo.path().display(), self.revision);
362
363        // Ensure libgit2 won't mess with newlines when we vendor.
364        if let Ok(mut git_config) = self.repo.config() {
365            git_config.set_bool("core.autocrlf", false)?;
366        }
367
368        let object = self.repo.find_object(self.revision, None)?;
369        reset(&self.repo, &object, gctx)?;
370
371        Ok(guard)
372    }
373
374    /// Like `git submodule update --recursive` but for this git checkout.
375    ///
376    /// This function respects `submodule.<name>.update = none`[^1] git config.
377    /// Submodules set to `none` won't be fetched.
378    ///
379    /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
380    fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> {
381        return update_submodules(&self.repo, gctx, quiet, self.remote_url().as_str());
382
383        /// Recursive helper for [`GitCheckout::update_submodules`].
384        fn update_submodules(
385            repo: &git2::Repository,
386            gctx: &GlobalContext,
387            quiet: bool,
388            parent_remote_url: &str,
389        ) -> CargoResult<()> {
390            debug!("update submodules for: {:?}", repo.workdir().unwrap());
391
392            for mut child in repo.submodules()? {
393                update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context(
394                    || {
395                        format!(
396                            "failed to update submodule `{}`",
397                            child.name().unwrap_or("")
398                        )
399                    },
400                )?;
401            }
402            Ok(())
403        }
404
405        /// Update a single Git submodule, and recurse into its submodules.
406        fn update_submodule(
407            parent: &git2::Repository,
408            child: &mut git2::Submodule<'_>,
409            gctx: &GlobalContext,
410            quiet: bool,
411            parent_remote_url: &str,
412        ) -> CargoResult<()> {
413            child.init(false)?;
414
415            let child_url_str = child.url().ok_or_else(|| {
416                anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
417            })?;
418
419            // Skip the submodule if the config says not to update it.
420            if child.update_strategy() == git2::SubmoduleUpdate::None {
421                gctx.shell().status(
422                    "Skipping",
423                    format!(
424                        "git submodule `{}` due to update strategy in .gitmodules",
425                        child_url_str
426                    ),
427                )?;
428                return Ok(());
429            }
430
431            let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
432
433            // A submodule which is listed in .gitmodules but not actually
434            // checked out will not have a head id, so we should ignore it.
435            let Some(head) = child.head_id() else {
436                return Ok(());
437            };
438
439            // If the submodule hasn't been checked out yet, we need to
440            // clone it. If it has been checked out and the head is the same
441            // as the submodule's head, then we can skip an update and keep
442            // recursing.
443            let head_and_repo = child.open().and_then(|repo| {
444                let target = repo.head()?.target();
445                Ok((target, repo))
446            });
447            let repo = match head_and_repo {
448                Ok((head, repo)) => {
449                    if child.head_id() == head {
450                        return update_submodules(&repo, gctx, quiet, &child_remote_url);
451                    }
452                    repo
453                }
454                Err(..) => {
455                    let path = parent.workdir().unwrap().join(child.path());
456                    let _ = paths::remove_dir_all(&path);
457                    init(&path, false)?
458                }
459            };
460            // Fetch submodule database and checkout to target revision
461            let reference = GitReference::Rev(head.to_string());
462
463            // GitSource created from SourceId without git precise will result to
464            // locked_rev being Deferred and fetch_db always try to fetch if online
465            let source_id = SourceId::for_git(&child_remote_url.into_url()?, reference)?
466                .with_git_precise(Some(head.to_string()));
467
468            let mut source = GitSource::new(source_id, gctx)?;
469            source.set_quiet(quiet);
470
471            let (db, actual_rev) = source.fetch_db(true).with_context(|| {
472                let name = child.name().unwrap_or("");
473                format!("failed to fetch submodule `{name}` from {child_remote_url}",)
474            })?;
475            db.copy_to(actual_rev, repo.path(), gctx, quiet)?;
476            Ok(())
477        }
478    }
479}
480
481/// See [`GitCheckout::reset`] for rationale on this type.
482#[must_use]
483struct CheckoutGuard {
484    ok_file: PathBuf,
485}
486
487impl CheckoutGuard {
488    fn guard(path: &Path) -> Self {
489        let ok_file = path.join(CHECKOUT_READY_LOCK);
490        let _ = paths::remove_file(&ok_file);
491        Self { ok_file }
492    }
493
494    fn mark_ok(self) -> CargoResult<()> {
495        let _ = paths::create(self.ok_file)?;
496        Ok(())
497    }
498}
499
500/// Constructs an absolute URL for a child submodule URL with its parent base URL.
501///
502/// Git only assumes a submodule URL is a relative path if it starts with `./`
503/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
504/// submodule URL.
505///
506/// At this moment it comes with some limitations:
507///
508/// * GitHub doesn't accept non-normalized URLs with relative paths.
509///   (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
510/// * `url` crate cannot parse SCP-like URLs.
511///   (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
512///
513/// To overcome these, this patch always tries [`Url::parse`] first to normalize
514/// the path. If it couldn't, append the relative path as the last resort and
515/// pray the remote git service supports non-normalized URLs.
516///
517/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
518///
519/// [^1]: <https://git-scm.com/docs/git-submodule>
520fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
521    let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
522        match Url::parse(base_url) {
523            Ok(mut base_url) => {
524                let path = base_url.path();
525                if !path.ends_with('/') {
526                    base_url.set_path(&format!("{path}/"));
527                }
528                let absolute_url = base_url.join(submodule_url).with_context(|| {
529                    format!(
530                        "failed to parse relative child submodule url `{submodule_url}` \
531                        using parent base url `{base_url}`"
532                    )
533                })?;
534                Cow::from(absolute_url.to_string())
535            }
536            Err(_) => {
537                let mut absolute_url = base_url.to_string();
538                if !absolute_url.ends_with('/') {
539                    absolute_url.push('/');
540                }
541                absolute_url.push_str(submodule_url);
542                Cow::from(absolute_url)
543            }
544        }
545    } else {
546        Cow::from(submodule_url)
547    };
548
549    Ok(absolute_url)
550}
551
552/// Prepare the authentication callbacks for cloning a git repository.
553///
554/// The main purpose of this function is to construct the "authentication
555/// callback" which is used to clone a repository. This callback will attempt to
556/// find the right authentication on the system (without user input) and will
557/// guide libgit2 in doing so.
558///
559/// The callback is provided `allowed` types of credentials, and we try to do as
560/// much as possible based on that:
561///
562/// * Prioritize SSH keys from the local ssh agent as they're likely the most
563///   reliable. The username here is prioritized from the credential
564///   callback, then from whatever is configured in git itself, and finally
565///   we fall back to the generic user of `git`.
566///
567/// * If a username/password is allowed, then we fallback to git2-rs's
568///   implementation of the credential helper. This is what is configured
569///   with `credential.helper` in git, and is the interface for the macOS
570///   keychain, for example.
571///
572/// * After the above two have failed, we just kinda grapple attempting to
573///   return *something*.
574///
575/// If any form of authentication fails, libgit2 will repeatedly ask us for
576/// credentials until we give it a reason to not do so. To ensure we don't
577/// just sit here looping forever we keep track of authentications we've
578/// attempted and we don't try the same ones again.
579fn with_authentication<T, F>(
580    gctx: &GlobalContext,
581    url: &str,
582    cfg: &git2::Config,
583    mut f: F,
584) -> CargoResult<T>
585where
586    F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
587{
588    let mut cred_helper = git2::CredentialHelper::new(url);
589    cred_helper.config(cfg);
590
591    let mut ssh_username_requested = false;
592    let mut cred_helper_bad = None;
593    let mut ssh_agent_attempts = Vec::new();
594    let mut any_attempts = false;
595    let mut tried_sshkey = false;
596    let mut url_attempt = None;
597
598    let orig_url = url;
599    let mut res = f(&mut |url, username, allowed| {
600        any_attempts = true;
601        if url != orig_url {
602            url_attempt = Some(url.to_string());
603        }
604        // libgit2's "USERNAME" authentication actually means that it's just
605        // asking us for a username to keep going. This is currently only really
606        // used for SSH authentication and isn't really an authentication type.
607        // The logic currently looks like:
608        //
609        //      let user = ...;
610        //      if (user.is_null())
611        //          user = callback(USERNAME, null, ...);
612        //
613        //      callback(SSH_KEY, user, ...)
614        //
615        // So if we're being called here then we know that (a) we're using ssh
616        // authentication and (b) no username was specified in the URL that
617        // we're trying to clone. We need to guess an appropriate username here,
618        // but that may involve a few attempts. Unfortunately we can't switch
619        // usernames during one authentication session with libgit2, so to
620        // handle this we bail out of this authentication session after setting
621        // the flag `ssh_username_requested`, and then we handle this below.
622        if allowed.contains(git2::CredentialType::USERNAME) {
623            debug_assert!(username.is_none());
624            ssh_username_requested = true;
625            return Err(git2::Error::from_str("gonna try usernames later"));
626        }
627
628        // An "SSH_KEY" authentication indicates that we need some sort of SSH
629        // authentication. This can currently either come from the ssh-agent
630        // process or from a raw in-memory SSH key. Cargo only supports using
631        // ssh-agent currently.
632        //
633        // If we get called with this then the only way that should be possible
634        // is if a username is specified in the URL itself (e.g., `username` is
635        // Some), hence the unwrap() here. We try custom usernames down below.
636        if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
637            // If ssh-agent authentication fails, libgit2 will keep
638            // calling this callback asking for other authentication
639            // methods to try. Make sure we only try ssh-agent once,
640            // to avoid looping forever.
641            tried_sshkey = true;
642            let username = username.unwrap();
643            debug_assert!(!ssh_username_requested);
644            ssh_agent_attempts.push(username.to_string());
645            return git2::Cred::ssh_key_from_agent(username);
646        }
647
648        // Sometimes libgit2 will ask for a username/password in plaintext. This
649        // is where Cargo would have an interactive prompt if we supported it,
650        // but we currently don't! Right now the only way we support fetching a
651        // plaintext password is through the `credential.helper` support, so
652        // fetch that here.
653        //
654        // If ssh-agent authentication fails, libgit2 will keep calling this
655        // callback asking for other authentication methods to try. Check
656        // cred_helper_bad to make sure we only try the git credential helper
657        // once, to avoid looping forever.
658        if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
659        {
660            let r = git2::Cred::credential_helper(cfg, url, username);
661            cred_helper_bad = Some(r.is_err());
662            return r;
663        }
664
665        // I'm... not sure what the DEFAULT kind of authentication is, but seems
666        // easy to support?
667        if allowed.contains(git2::CredentialType::DEFAULT) {
668            return git2::Cred::default();
669        }
670
671        // Whelp, we tried our best
672        Err(git2::Error::from_str("no authentication methods succeeded"))
673    });
674
675    // Ok, so if it looks like we're going to be doing ssh authentication, we
676    // want to try a few different usernames as one wasn't specified in the URL
677    // for us to use. In order, we'll try:
678    //
679    // * A credential helper's username for this URL, if available.
680    // * This account's username.
681    // * "git"
682    //
683    // We have to restart the authentication session each time (due to
684    // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
685    // call our callback, `f`, in a loop here.
686    if ssh_username_requested {
687        debug_assert!(res.is_err());
688        let mut attempts = vec![String::from("git")];
689        if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
690            attempts.push(s.to_string());
691        }
692        if let Some(ref s) = cred_helper.username {
693            attempts.push(s.clone());
694        }
695
696        while let Some(s) = attempts.pop() {
697            // We should get `USERNAME` first, where we just return our attempt,
698            // and then after that we should get `SSH_KEY`. If the first attempt
699            // fails we'll get called again, but we don't have another option so
700            // we bail out.
701            let mut attempts = 0;
702            res = f(&mut |_url, username, allowed| {
703                if allowed.contains(git2::CredentialType::USERNAME) {
704                    return git2::Cred::username(&s);
705                }
706                if allowed.contains(git2::CredentialType::SSH_KEY) {
707                    debug_assert_eq!(Some(&s[..]), username);
708                    attempts += 1;
709                    if attempts == 1 {
710                        ssh_agent_attempts.push(s.to_string());
711                        return git2::Cred::ssh_key_from_agent(&s);
712                    }
713                }
714                Err(git2::Error::from_str("no authentication methods succeeded"))
715            });
716
717            // If we made two attempts then that means:
718            //
719            // 1. A username was requested, we returned `s`.
720            // 2. An ssh key was requested, we returned to look up `s` in the
721            //    ssh agent.
722            // 3. For whatever reason that lookup failed, so we were asked again
723            //    for another mode of authentication.
724            //
725            // Essentially, if `attempts == 2` then in theory the only error was
726            // that this username failed to authenticate (e.g., no other network
727            // errors happened). Otherwise something else is funny so we bail
728            // out.
729            if attempts != 2 {
730                break;
731            }
732        }
733    }
734    let mut err = match res {
735        Ok(e) => return Ok(e),
736        Err(e) => e,
737    };
738
739    // In the case of an authentication failure (where we tried something) then
740    // we try to give a more helpful error message about precisely what we
741    // tried.
742    if any_attempts {
743        let mut msg = "failed to authenticate when downloading \
744                       repository"
745            .to_string();
746
747        if let Some(attempt) = &url_attempt {
748            if url != attempt {
749                msg.push_str(": ");
750                msg.push_str(attempt);
751            }
752        }
753        msg.push('\n');
754        if !ssh_agent_attempts.is_empty() {
755            let names = ssh_agent_attempts
756                .iter()
757                .map(|s| format!("`{}`", s))
758                .collect::<Vec<_>>()
759                .join(", ");
760            msg.push_str(&format!(
761                "\n* attempted ssh-agent authentication, but \
762                 no usernames succeeded: {}",
763                names
764            ));
765        }
766        if let Some(failed_cred_helper) = cred_helper_bad {
767            if failed_cred_helper {
768                msg.push_str(
769                    "\n* attempted to find username/password via \
770                     git's `credential.helper` support, but failed",
771                );
772            } else {
773                msg.push_str(
774                    "\n* attempted to find username/password via \
775                     `credential.helper`, but maybe the found \
776                     credentials were incorrect",
777                );
778            }
779        }
780        msg.push_str("\n\n");
781        msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
782        msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
783        err = err.context(msg);
784
785        // Otherwise if we didn't even get to the authentication phase them we may
786        // have failed to set up a connection, in these cases hint on the
787        // `net.git-fetch-with-cli` configuration option.
788    } else if let Some(e) = err.downcast_ref::<git2::Error>() {
789        match e.class() {
790            ErrorClass::Net
791            | ErrorClass::Ssl
792            | ErrorClass::Submodule
793            | ErrorClass::FetchHead
794            | ErrorClass::Ssh
795            | ErrorClass::Http => {
796                let msg = format!(
797                    concat!(
798                        "network failure seems to have happened\n",
799                        "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
800                        "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
801                        "{}"
802                    ),
803                    note_github_pull_request(url).unwrap_or_default()
804                );
805                err = err.context(msg);
806            }
807            ErrorClass::Callback => {
808                // This unwraps the git2 error. We're using the callback error
809                // specifically to convey errors from Rust land through the C
810                // callback interface. We don't need the `; class=Callback
811                // (26)` that gets tacked on to the git2 error message.
812                err = anyhow::format_err!("{}", e.message());
813            }
814            _ => {}
815        }
816    }
817
818    Err(err)
819}
820
821/// `git reset --hard` to the given `obj` for the `repo`.
822///
823/// The `obj` is a commit-ish to which the head should be moved.
824fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
825    let mut pb = Progress::new("Checkout", gctx);
826    let mut opts = git2::build::CheckoutBuilder::new();
827    opts.progress(|_, cur, max| {
828        drop(pb.tick(cur, max, ""));
829    });
830    debug!("doing reset");
831    repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
832    debug!("reset done");
833    Ok(())
834}
835
836/// Prepares the callbacks for fetching a git repository.
837///
838/// The main purpose of this function is to construct everything before a fetch.
839/// This will attempt to setup a progress bar, the authentication for git,
840/// ssh known hosts check, and the network retry mechanism.
841///
842/// The callback is provided a fetch options, which can be used by the actual
843/// git fetch.
844pub fn with_fetch_options(
845    git_config: &git2::Config,
846    url: &str,
847    gctx: &GlobalContext,
848    cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
849) -> CargoResult<()> {
850    let mut progress = Progress::new("Fetch", gctx);
851    let ssh_config = gctx.net_config()?.ssh.as_ref();
852    let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
853    let diagnostic_home_config = gctx.diagnostic_home_config();
854    network::retry::with_retry(gctx, || {
855        // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
856        // so we store the error additionally and unwrap it later
857        let mut check_cb_result = Ok(());
858        let auth_result = with_authentication(gctx, url, git_config, |f| {
859            let port = Url::parse(url).ok().and_then(|url| url.port());
860            let mut last_update = Instant::now();
861            let mut rcb = git2::RemoteCallbacks::new();
862            // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
863            // sliding window for tracking the data transfer rate (in bytes/s).
864            let mut counter = MetricsCounter::<10>::new(0, last_update);
865            rcb.credentials(f);
866            rcb.certificate_check(|cert, host| {
867                match super::known_hosts::certificate_check(
868                    gctx,
869                    cert,
870                    host,
871                    port,
872                    config_known_hosts,
873                    &diagnostic_home_config,
874                ) {
875                    Ok(status) => Ok(status),
876                    Err(e) => {
877                        check_cb_result = Err(e);
878                        // This is not really used because it'll be overridden by libgit2
879                        // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
880                        Err(git2::Error::from_str(
881                            "invalid or unknown remote ssh hostkey",
882                        ))
883                    }
884                }
885            });
886            rcb.transfer_progress(|stats| {
887                let indexed_deltas = stats.indexed_deltas();
888                let msg = if indexed_deltas > 0 {
889                    // Resolving deltas.
890                    format!(
891                        ", ({}/{}) resolving deltas",
892                        indexed_deltas,
893                        stats.total_deltas()
894                    )
895                } else {
896                    // Receiving objects.
897                    //
898                    // # Caveat
899                    //
900                    // Progress bar relies on git2 calling `transfer_progress`
901                    // to update its transfer rate, but we cannot guarantee a
902                    // periodic call of that callback. Thus if we don't receive
903                    // any data for, say, 10 seconds, the rate will get stuck
904                    // and never go down to 0B/s.
905                    // In the future, we need to find away to update the rate
906                    // even when the callback is not called.
907                    let now = Instant::now();
908                    // Scrape a `received_bytes` to the counter every 300ms.
909                    if now - last_update > Duration::from_millis(300) {
910                        counter.add(stats.received_bytes(), now);
911                        last_update = now;
912                    }
913                    let rate = HumanBytes(counter.rate() as u64);
914                    format!(", {rate:.2}/s")
915                };
916                progress
917                    .tick(stats.indexed_objects(), stats.total_objects(), &msg)
918                    .is_ok()
919            });
920
921            // Create a local anonymous remote in the repository to fetch the
922            // url
923            let mut opts = git2::FetchOptions::new();
924            opts.remote_callbacks(rcb);
925            cb(opts)
926        });
927        if auth_result.is_err() {
928            check_cb_result?;
929        }
930        auth_result?;
931        Ok(())
932    })
933}
934
935/// Attempts to fetch the given git `reference` for a Git repository.
936///
937/// This is the main entry for git clone/fetch. It does the followings:
938///
939/// * Turns [`GitReference`] into refspecs accordingly.
940/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
941///
942/// The `remote_url` argument is the git remote URL where we want to fetch from.
943///
944/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
945/// at this time. It could be extended when libgit2 supports shallow clones.
946///
947/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
948pub fn fetch(
949    repo: &mut git2::Repository,
950    remote_url: &str,
951    reference: &GitReference,
952    gctx: &GlobalContext,
953    remote_kind: RemoteKind,
954) -> CargoResult<()> {
955    if let Some(offline_flag) = gctx.offline_flag() {
956        anyhow::bail!(
957            "attempting to update a git repository, but {offline_flag} \
958             was specified"
959        )
960    }
961
962    let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
963
964    // Flag to keep track if the rev is a full commit hash
965    let mut fast_path_rev: bool = false;
966
967    let oid_to_fetch = match github_fast_path(repo, remote_url, reference, gctx) {
968        Ok(FastPathRev::UpToDate) => return Ok(()),
969        Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
970        Ok(FastPathRev::Indeterminate) => None,
971        Err(e) => {
972            debug!("failed to check github {:?}", e);
973            None
974        }
975    };
976
977    maybe_gc_repo(repo, gctx)?;
978
979    clean_repo_temp_files(repo);
980
981    // Translate the reference desired here into an actual list of refspecs
982    // which need to get fetched. Additionally record if we're fetching tags.
983    let mut refspecs = Vec::new();
984    let mut tags = false;
985    // The `+` symbol on the refspec means to allow a forced (fast-forward)
986    // update which is needed if there is ever a force push that requires a
987    // fast-forward.
988    match reference {
989        // For branches and tags we can fetch simply one reference and copy it
990        // locally, no need to fetch other branches/tags.
991        GitReference::Branch(b) => {
992            refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
993        }
994
995        GitReference::Tag(t) => {
996            refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
997        }
998
999        GitReference::DefaultBranch => {
1000            refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1001        }
1002
1003        GitReference::Rev(rev) => {
1004            if rev.starts_with("refs/") {
1005                refspecs.push(format!("+{0}:{0}", rev));
1006            } else if let Some(oid_to_fetch) = oid_to_fetch {
1007                fast_path_rev = true;
1008                refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1009            } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1010                && rev_to_oid(rev).is_some()
1011            {
1012                // There is a specific commit to fetch and we will do so in shallow-mode only
1013                // to not disturb the previous logic.
1014                // Note that with typical settings for shallowing, we will just fetch a single `rev`
1015                // as single commit.
1016                // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1017                // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1018                refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1019            } else {
1020                // We don't know what the rev will point to. To handle this
1021                // situation we fetch all branches and tags, and then we pray
1022                // it's somewhere in there.
1023                refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1024                refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1025                tags = true;
1026            }
1027        }
1028    }
1029
1030    debug!("doing a fetch for {remote_url}");
1031    let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1032        fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
1033    } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1034        fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1035    } else {
1036        fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1037    };
1038
1039    if fast_path_rev {
1040        if let Some(oid) = oid_to_fetch {
1041            return result.with_context(|| format!("revision {} not found", oid));
1042        }
1043    }
1044    result
1045}
1046
1047/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1048/// files atomically.
1049/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1050/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1051fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1052    matches!(
1053        err,
1054        gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1055            gix::protocol::fetch::Error::LockShallowFile(_)
1056        ))
1057    )
1058}
1059
1060/// Attempts to use `git` CLI installed on the system to fetch a repository,
1061/// when the config value [`net.git-fetch-with-cli`][1] is set.
1062///
1063/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1064/// when compared to the `git` command line. As a result, allow an escape
1065/// hatch for users that would prefer to use `git`-the-CLI for fetching
1066/// repositories instead of `libgit2`-the-library. This should make more
1067/// flavors of authentication possible while also still giving us all the
1068/// speed and portability of using `libgit2`.
1069///
1070/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1071fn fetch_with_cli(
1072    repo: &mut git2::Repository,
1073    url: &str,
1074    refspecs: &[String],
1075    tags: bool,
1076    shallow: gix::remote::fetch::Shallow,
1077    gctx: &GlobalContext,
1078) -> CargoResult<()> {
1079    debug!(target: "git-fetch", backend = "git-cli");
1080
1081    let mut cmd = ProcessBuilder::new("git");
1082    cmd.arg("fetch");
1083    if tags {
1084        cmd.arg("--tags");
1085    } else {
1086        cmd.arg("--no-tags");
1087    }
1088    if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1089        let depth = 0i32.saturating_add_unsigned(depth.get());
1090        cmd.arg(format!("--depth={depth}"));
1091    }
1092    match gctx.shell().verbosity() {
1093        Verbosity::Normal => {}
1094        Verbosity::Verbose => {
1095            cmd.arg("--verbose");
1096        }
1097        Verbosity::Quiet => {
1098            cmd.arg("--quiet");
1099        }
1100    }
1101    cmd.arg("--force") // handle force pushes
1102        .arg("--update-head-ok") // see discussion in #2078
1103        .arg(url)
1104        .args(refspecs)
1105        // If cargo is run by git (for example, the `exec` command in `git
1106        // rebase`), the GIT_DIR is set by git and will point to the wrong
1107        // location. This makes sure GIT_DIR is always the repository path.
1108        .env("GIT_DIR", repo.path())
1109        // The reset of these may not be necessary, but I'm including them
1110        // just to be extra paranoid and avoid any issues.
1111        .env_remove("GIT_WORK_TREE")
1112        .env_remove("GIT_INDEX_FILE")
1113        .env_remove("GIT_OBJECT_DIRECTORY")
1114        .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1115        .cwd(repo.path());
1116    gctx.shell()
1117        .verbose(|s| s.status("Running", &cmd.to_string()))?;
1118    network::retry::with_retry(gctx, || {
1119        cmd.exec()
1120            .map_err(|error| GitCliError::new(error, true).into())
1121    })?;
1122
1123    Ok(())
1124}
1125
1126fn fetch_with_gitoxide(
1127    repo: &mut git2::Repository,
1128    remote_url: &str,
1129    refspecs: Vec<String>,
1130    tags: bool,
1131    shallow: gix::remote::fetch::Shallow,
1132    gctx: &GlobalContext,
1133) -> CargoResult<()> {
1134    debug!(target: "git-fetch", backend = "gitoxide");
1135
1136    let git2_repo = repo;
1137    let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1138    let repo_reinitialized = AtomicBool::default();
1139    let res = oxide::with_retry_and_progress(
1140        git2_repo.path(),
1141        gctx,
1142        remote_url,
1143        &|repo_path,
1144          should_interrupt,
1145          mut progress,
1146          url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1147            // The `fetch` operation here may fail spuriously due to a corrupt
1148            // repository. It could also fail, however, for a whole slew of other
1149            // reasons (aka network related reasons). We want Cargo to automatically
1150            // recover from corrupt repositories, but we don't want Cargo to stomp
1151            // over other legitimate errors.
1152            //
1153            // Consequently we save off the error of the `fetch` operation and if it
1154            // looks like a "corrupt repo" error then we blow away the repo and try
1155            // again. If it looks like any other kind of error, or if we've already
1156            // blown away the repository, then we want to return the error as-is.
1157            loop {
1158                let res = oxide::open_repo(
1159                    repo_path,
1160                    config_overrides.clone(),
1161                    oxide::OpenMode::ForFetch,
1162                )
1163                .map_err(crate::sources::git::fetch::Error::from)
1164                .and_then(|repo| {
1165                    debug!("initiating fetch of {refspecs:?} from {remote_url}");
1166                    let url_for_authentication = &mut *url_for_authentication;
1167                    let remote = repo
1168                        .remote_at(remote_url)?
1169                        .with_fetch_tags(if tags {
1170                            gix::remote::fetch::Tags::All
1171                        } else {
1172                            gix::remote::fetch::Tags::Included
1173                        })
1174                        .with_refspecs(
1175                            refspecs.iter().map(|s| s.as_str()),
1176                            gix::remote::Direction::Fetch,
1177                        )
1178                        .map_err(crate::sources::git::fetch::Error::Other)?;
1179                    let url = remote
1180                        .url(gix::remote::Direction::Fetch)
1181                        .expect("set at init")
1182                        .to_owned();
1183                    let connection = remote.connect(gix::remote::Direction::Fetch)?;
1184                    let mut authenticate = connection.configured_credentials(url)?;
1185                    let connection = connection.with_credentials(
1186                        move |action: gix::protocol::credentials::helper::Action| {
1187                            if let Some(url) = action
1188                                .context()
1189                                .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1190                            {
1191                                url_for_authentication(url.as_ref());
1192                            }
1193                            authenticate(action)
1194                        },
1195                    );
1196                    let outcome = connection
1197                        .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1198                        .with_shallow(shallow.clone())
1199                        .receive(&mut progress, should_interrupt)?;
1200                    Ok(outcome)
1201                });
1202                let err = match res {
1203                    Ok(_) => break,
1204                    Err(e) => e,
1205                };
1206                debug!("fetch failed: {}", err);
1207
1208                if !repo_reinitialized.load(Ordering::Relaxed)
1209                        // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1210                        // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1211                        // folder before writing files into it, or else not even open a directory as git repository (which is
1212                        // also handled here).
1213                        && err.is_corrupted()
1214                    || has_shallow_lock_file(&err)
1215                {
1216                    repo_reinitialized.store(true, Ordering::Relaxed);
1217                    debug!(
1218                        "looks like this is a corrupt repository, reinitializing \
1219                     and trying again"
1220                    );
1221                    if oxide::reinitialize(repo_path).is_ok() {
1222                        continue;
1223                    }
1224                }
1225
1226                return Err(err.into());
1227            }
1228            Ok(())
1229        },
1230    );
1231    if repo_reinitialized.load(Ordering::Relaxed) {
1232        *git2_repo = git2::Repository::open(git2_repo.path())?;
1233    }
1234    res
1235}
1236
1237fn fetch_with_libgit2(
1238    repo: &mut git2::Repository,
1239    remote_url: &str,
1240    refspecs: Vec<String>,
1241    tags: bool,
1242    shallow: gix::remote::fetch::Shallow,
1243    gctx: &GlobalContext,
1244) -> CargoResult<()> {
1245    debug!(target: "git-fetch", backend = "libgit2");
1246
1247    let git_config = git2::Config::open_default()?;
1248    with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1249        if tags {
1250            opts.download_tags(git2::AutotagOption::All);
1251        }
1252        if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1253            opts.depth(0i32.saturating_add_unsigned(depth.get()));
1254        }
1255        // The `fetch` operation here may fail spuriously due to a corrupt
1256        // repository. It could also fail, however, for a whole slew of other
1257        // reasons (aka network related reasons). We want Cargo to automatically
1258        // recover from corrupt repositories, but we don't want Cargo to stomp
1259        // over other legitimate errors.
1260        //
1261        // Consequently we save off the error of the `fetch` operation and if it
1262        // looks like a "corrupt repo" error then we blow away the repo and try
1263        // again. If it looks like any other kind of error, or if we've already
1264        // blown away the repository, then we want to return the error as-is.
1265        let mut repo_reinitialized = false;
1266        loop {
1267            debug!("initiating fetch of {refspecs:?} from {remote_url}");
1268            let res = repo
1269                .remote_anonymous(remote_url)?
1270                .fetch(&refspecs, Some(&mut opts), None);
1271            let err = match res {
1272                Ok(()) => break,
1273                Err(e) => e,
1274            };
1275            debug!("fetch failed: {}", err);
1276
1277            if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1278            {
1279                repo_reinitialized = true;
1280                debug!(
1281                    "looks like this is a corrupt repository, reinitializing \
1282                     and trying again"
1283                );
1284                if reinitialize(repo).is_ok() {
1285                    continue;
1286                }
1287            }
1288
1289            return Err(err.into());
1290        }
1291        Ok(())
1292    })
1293}
1294
1295/// Attempts to `git gc` a repository.
1296///
1297/// Cargo has a bunch of long-lived git repositories in its global cache and
1298/// some, like the index, are updated very frequently. Right now each update
1299/// creates a new "pack file" inside the git database, and over time this can
1300/// cause bad performance and bad current behavior in libgit2.
1301///
1302/// One pathological use case today is where libgit2 opens hundreds of file
1303/// descriptors, getting us dangerously close to blowing out the OS limits of
1304/// how many fds we can have open. This is detailed in [#4403].
1305///
1306/// To try to combat this problem we attempt a `git gc` here. Note, though, that
1307/// we may not even have `git` installed on the system! As a result we
1308/// opportunistically try a `git gc` when the pack directory looks too big, and
1309/// failing that we just blow away the repository and start over.
1310///
1311/// In theory this shouldn't be too expensive compared to the network request
1312/// we're about to issue.
1313///
1314/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1315fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1316    // Here we arbitrarily declare that if you have more than 100 files in your
1317    // `pack` folder that we need to do a gc.
1318    let entries = match repo.path().join("objects/pack").read_dir() {
1319        Ok(e) => e.count(),
1320        Err(_) => {
1321            debug!("skipping gc as pack dir appears gone");
1322            return Ok(());
1323        }
1324    };
1325    let max = gctx
1326        .get_env("__CARGO_PACKFILE_LIMIT")
1327        .ok()
1328        .and_then(|s| s.parse::<usize>().ok())
1329        .unwrap_or(100);
1330    if entries < max {
1331        debug!("skipping gc as there's only {} pack files", entries);
1332        return Ok(());
1333    }
1334
1335    // First up, try a literal `git gc` by shelling out to git. This is pretty
1336    // likely to fail though as we may not have `git` installed. Note that
1337    // libgit2 doesn't currently implement the gc operation, so there's no
1338    // equivalent there.
1339    match Command::new("git")
1340        .arg("gc")
1341        .current_dir(repo.path())
1342        .output()
1343    {
1344        Ok(out) => {
1345            debug!(
1346                "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1347                out.status,
1348                String::from_utf8_lossy(&out.stdout),
1349                String::from_utf8_lossy(&out.stderr)
1350            );
1351            if out.status.success() {
1352                let new = git2::Repository::open(repo.path())?;
1353                *repo = new;
1354                return Ok(());
1355            }
1356        }
1357        Err(e) => debug!("git-gc failed to spawn: {}", e),
1358    }
1359
1360    // Alright all else failed, let's start over.
1361    reinitialize(repo)
1362}
1363
1364/// Removes temporary files left from previous activity.
1365///
1366/// If libgit2 is interrupted while indexing pack files, it will leave behind
1367/// some temporary files that it doesn't clean up. These can be quite large in
1368/// size, so this tries to clean things up.
1369///
1370/// This intentionally ignores errors. This is only an opportunistic cleaning,
1371/// and we don't really care if there are issues (there's unlikely anything
1372/// that can be done).
1373///
1374/// The git CLI has similar behavior (its temp files look like
1375/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1376/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1377/// filenames, so they never get cleaned up.
1378fn clean_repo_temp_files(repo: &git2::Repository) {
1379    let path = repo.path().join("objects/pack/pack_git2_*");
1380    let Some(pattern) = path.to_str() else {
1381        tracing::warn!("cannot convert {path:?} to a string");
1382        return;
1383    };
1384    let Ok(paths) = glob::glob(pattern) else {
1385        return;
1386    };
1387    for path in paths {
1388        if let Ok(path) = path {
1389            match paths::remove_file(&path) {
1390                Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1391                Err(e) => {
1392                    tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1393                }
1394            }
1395        }
1396    }
1397}
1398
1399/// Reinitializes a given Git repository. This is useful when a Git repository
1400/// seems corrupted and we want to start over.
1401fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1402    // Here we want to drop the current repository object pointed to by `repo`,
1403    // so we initialize temporary repository in a sub-folder, blow away the
1404    // existing git folder, and then recreate the git repo. Finally we blow away
1405    // the `tmp` folder we allocated.
1406    let path = repo.path().to_path_buf();
1407    debug!("reinitializing git repo at {:?}", path);
1408    let tmp = path.join("tmp");
1409    let bare = !repo.path().ends_with(".git");
1410    *repo = init(&tmp, false)?;
1411    for entry in path.read_dir()? {
1412        let entry = entry?;
1413        if entry.file_name().to_str() == Some("tmp") {
1414            continue;
1415        }
1416        let path = entry.path();
1417        drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1418    }
1419    *repo = init(&path, bare)?;
1420    paths::remove_dir_all(&tmp)?;
1421    Ok(())
1422}
1423
1424/// Initializes a Git repository at `path`.
1425fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1426    let mut opts = git2::RepositoryInitOptions::new();
1427    // Skip anything related to templates, they just call all sorts of issues as
1428    // we really don't want to use them yet they insist on being used. See #6240
1429    // for an example issue that comes up.
1430    opts.external_template(false);
1431    opts.bare(bare);
1432    Ok(git2::Repository::init_opts(&path, &opts)?)
1433}
1434
1435/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1436enum FastPathRev {
1437    /// The local rev (determined by `reference.resolve(repo)`) is already up to
1438    /// date with what this rev resolves to on GitHub's server.
1439    UpToDate,
1440    /// The following SHA must be fetched in order for the local rev to become
1441    /// up to date.
1442    NeedsFetch(Oid),
1443    /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1444    /// and tags from the server and see what happens.
1445    Indeterminate,
1446}
1447
1448/// Attempts GitHub's special fast path for testing if we've already got an
1449/// up-to-date copy of the repository.
1450///
1451/// Updating the index is done pretty regularly so we want it to be as fast as
1452/// possible. For registries hosted on GitHub (like the crates.io index) there's
1453/// a fast path available to use[^1] to tell us that there's no updates to be
1454/// made.
1455///
1456/// Note that this function should never cause an actual failure because it's
1457/// just a fast path. As a result, a caller should ignore `Err` returned from
1458/// this function and move forward on the normal path.
1459///
1460/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1461fn github_fast_path(
1462    repo: &mut git2::Repository,
1463    url: &str,
1464    reference: &GitReference,
1465    gctx: &GlobalContext,
1466) -> CargoResult<FastPathRev> {
1467    let url = Url::parse(url)?;
1468    if !is_github(&url) {
1469        return Ok(FastPathRev::Indeterminate);
1470    }
1471
1472    let local_object = resolve_ref(reference, repo).ok();
1473
1474    let github_branch_name = match reference {
1475        GitReference::Branch(branch) => branch,
1476        GitReference::Tag(tag) => tag,
1477        GitReference::DefaultBranch => "HEAD",
1478        GitReference::Rev(rev) => {
1479            if rev.starts_with("refs/") {
1480                rev
1481            } else if looks_like_commit_hash(rev) {
1482                // `revparse_single` (used by `resolve`) is the only way to turn
1483                // short hash -> long hash, but it also parses other things,
1484                // like branch and tag names, which might coincidentally be
1485                // valid hex.
1486                //
1487                // We only return early if `rev` is a prefix of the object found
1488                // by `revparse_single`. Don't bother talking to GitHub in that
1489                // case, since commit hashes are permanent. If a commit with the
1490                // requested hash is already present in the local clone, its
1491                // contents must be the same as what is on the server for that
1492                // hash.
1493                //
1494                // If `rev` is not found locally by `revparse_single`, we'll
1495                // need GitHub to resolve it and get a hash. If `rev` is found
1496                // but is not a short hash of the found object, it's probably a
1497                // branch and we also need to get a hash from GitHub, in case
1498                // the branch has moved.
1499                if let Some(local_object) = local_object {
1500                    if is_short_hash_of(rev, local_object) {
1501                        debug!("github fast path already has {local_object}");
1502                        return Ok(FastPathRev::UpToDate);
1503                    }
1504                }
1505                // If `rev` is a full commit hash, the only thing it can resolve
1506                // to is itself. Don't bother talking to GitHub in that case
1507                // either. (This ensures that we always attempt to fetch the
1508                // commit directly even if we can't reach the GitHub API.)
1509                if let Some(oid) = rev_to_oid(rev) {
1510                    debug!("github fast path is already a full commit hash {rev}");
1511                    return Ok(FastPathRev::NeedsFetch(oid));
1512                }
1513                rev
1514            } else {
1515                debug!("can't use github fast path with `rev = \"{}\"`", rev);
1516                return Ok(FastPathRev::Indeterminate);
1517            }
1518        }
1519    };
1520
1521    // This expects GitHub urls in the form `github.com/user/repo` and nothing
1522    // else
1523    let mut pieces = url
1524        .path_segments()
1525        .ok_or_else(|| anyhow!("no path segments on url"))?;
1526    let username = pieces
1527        .next()
1528        .ok_or_else(|| anyhow!("couldn't find username"))?;
1529    let repository = pieces
1530        .next()
1531        .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1532    if pieces.next().is_some() {
1533        anyhow::bail!("too many segments on URL");
1534    }
1535
1536    // Trim off the `.git` from the repository, if present, since that's
1537    // optional for GitHub and won't work when we try to use the API as well.
1538    let repository = repository.strip_suffix(".git").unwrap_or(repository);
1539
1540    let url = format!(
1541        "https://api.github.com/repos/{}/{}/commits/{}",
1542        username, repository, github_branch_name,
1543    );
1544    let mut handle = gctx.http()?.lock().unwrap();
1545    debug!("attempting GitHub fast path for {}", url);
1546    handle.get(true)?;
1547    handle.url(&url)?;
1548    handle.useragent("cargo")?;
1549    handle.follow_location(true)?; // follow redirects
1550    handle.http_headers({
1551        let mut headers = List::new();
1552        headers.append("Accept: application/vnd.github.3.sha")?;
1553        if let Some(local_object) = local_object {
1554            headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1555        }
1556        headers
1557    })?;
1558
1559    let mut response_body = Vec::new();
1560    let mut transfer = handle.transfer();
1561    transfer.write_function(|data| {
1562        response_body.extend_from_slice(data);
1563        Ok(data.len())
1564    })?;
1565    transfer.perform()?;
1566    drop(transfer); // end borrow of handle so that response_code can be called
1567
1568    let response_code = handle.response_code()?;
1569    if response_code == 304 {
1570        debug!("github fast path up-to-date");
1571        Ok(FastPathRev::UpToDate)
1572    } else if response_code == 200
1573        && let Some(oid_to_fetch) = rev_to_oid(str::from_utf8(&response_body)?)
1574    {
1575        // response expected to be a full hash hexstring (40 or 64 chars)
1576        debug!("github fast path fetch {oid_to_fetch}");
1577        Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1578    } else {
1579        // Usually response_code == 404 if the repository does not exist, and
1580        // response_code == 422 if exists but GitHub is unable to resolve the
1581        // requested rev.
1582        debug!("github fast path bad response code {response_code}");
1583        Ok(FastPathRev::Indeterminate)
1584    }
1585}
1586
1587/// Whether a `url` is one from GitHub.
1588fn is_github(url: &Url) -> bool {
1589    url.host_str() == Some("github.com")
1590}
1591
1592// Give some messages on GitHub PR URL given as is
1593pub(crate) fn note_github_pull_request(url: &str) -> Option<String> {
1594    if let Ok(url) = url.parse::<Url>()
1595        && is_github(&url)
1596    {
1597        let path_segments = url
1598            .path_segments()
1599            .map(|p| p.into_iter().collect::<Vec<_>>())
1600            .unwrap_or_default();
1601        if let [owner, repo, "pull", pr_number, ..] = path_segments[..] {
1602            let repo_url = format!("https://github.com/{owner}/{repo}.git");
1603            let rev = format!("refs/pull/{pr_number}/head");
1604            return Some(format!(
1605                concat!(
1606                    "\n\nnote: GitHub url {} is not a repository. \n",
1607                    "help: Replace the dependency with \n",
1608                    "       `git = \"{}\" rev = \"{}\"` \n",
1609                    "   to specify pull requests as dependencies' revision."
1610                ),
1611                url, repo_url, rev
1612            ));
1613        }
1614    }
1615
1616    None
1617}
1618
1619/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1620fn looks_like_commit_hash(rev: &str) -> bool {
1621    rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1622}
1623
1624/// Whether `rev` is a shorter hash of `oid`.
1625fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1626    let long_hash = oid.to_string();
1627    match long_hash.get(..rev.len()) {
1628        Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1629        None => false,
1630    }
1631}
1632
1633#[cfg(test)]
1634mod tests {
1635    use super::absolute_submodule_url;
1636
1637    #[test]
1638    fn test_absolute_submodule_url() {
1639        let cases = [
1640            (
1641                "ssh://git@gitub.com/rust-lang/cargo",
1642                "git@github.com:rust-lang/cargo.git",
1643                "git@github.com:rust-lang/cargo.git",
1644            ),
1645            (
1646                "ssh://git@gitub.com/rust-lang/cargo",
1647                "./",
1648                "ssh://git@gitub.com/rust-lang/cargo/",
1649            ),
1650            (
1651                "ssh://git@gitub.com/rust-lang/cargo",
1652                "../",
1653                "ssh://git@gitub.com/rust-lang/",
1654            ),
1655            (
1656                "ssh://git@gitub.com/rust-lang/cargo",
1657                "./foo",
1658                "ssh://git@gitub.com/rust-lang/cargo/foo",
1659            ),
1660            (
1661                "ssh://git@gitub.com/rust-lang/cargo/",
1662                "./foo",
1663                "ssh://git@gitub.com/rust-lang/cargo/foo",
1664            ),
1665            (
1666                "ssh://git@gitub.com/rust-lang/cargo/",
1667                "../foo",
1668                "ssh://git@gitub.com/rust-lang/foo",
1669            ),
1670            (
1671                "ssh://git@gitub.com/rust-lang/cargo",
1672                "../foo",
1673                "ssh://git@gitub.com/rust-lang/foo",
1674            ),
1675            (
1676                "ssh://git@gitub.com/rust-lang/cargo",
1677                "../foo/bar/../baz",
1678                "ssh://git@gitub.com/rust-lang/foo/baz",
1679            ),
1680            (
1681                "git@github.com:rust-lang/cargo.git",
1682                "ssh://git@gitub.com/rust-lang/cargo",
1683                "ssh://git@gitub.com/rust-lang/cargo",
1684            ),
1685            (
1686                "git@github.com:rust-lang/cargo.git",
1687                "./",
1688                "git@github.com:rust-lang/cargo.git/./",
1689            ),
1690            (
1691                "git@github.com:rust-lang/cargo.git",
1692                "../",
1693                "git@github.com:rust-lang/cargo.git/../",
1694            ),
1695            (
1696                "git@github.com:rust-lang/cargo.git",
1697                "./foo",
1698                "git@github.com:rust-lang/cargo.git/./foo",
1699            ),
1700            (
1701                "git@github.com:rust-lang/cargo.git/",
1702                "./foo",
1703                "git@github.com:rust-lang/cargo.git/./foo",
1704            ),
1705            (
1706                "git@github.com:rust-lang/cargo.git",
1707                "../foo",
1708                "git@github.com:rust-lang/cargo.git/../foo",
1709            ),
1710            (
1711                "git@github.com:rust-lang/cargo.git/",
1712                "../foo",
1713                "git@github.com:rust-lang/cargo.git/../foo",
1714            ),
1715            (
1716                "git@github.com:rust-lang/cargo.git",
1717                "../foo/bar/../baz",
1718                "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1719            ),
1720        ];
1721
1722        for (base_url, submodule_url, expected) in cases {
1723            let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1724            assert_eq!(
1725                expected, url,
1726                "base `{base_url}`; submodule `{submodule_url}`"
1727            );
1728        }
1729    }
1730}
1731
1732/// Turns a full commit hash revision into an oid.
1733///
1734/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1735/// Its length must be double to the underlying bytes (40 or 64),
1736/// otherwise libgit2 would happily zero-pad the returned oid.
1737///
1738/// See:
1739///
1740/// * <https://github.com/rust-lang/cargo/issues/13188>
1741/// * <https://github.com/rust-lang/cargo/issues/13968>
1742pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1743    Oid::from_str(rev)
1744        .ok()
1745        .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1746}