Skip to main content

cargo/sources/git/
utils.rs

1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, SourceId};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::sources::git::source::GitSource;
9use crate::sources::source::Source as _;
10use crate::util::HumanBytes;
11use crate::util::errors::{CargoResult, GitCliError};
12use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
13
14use anyhow::{Context as _, anyhow};
15use cargo_util::{ProcessBuilder, paths};
16use cargo_util_terminal::Verbosity;
17use git2::{ErrorClass, ObjectType, Oid};
18use http::{Request, StatusCode};
19use tracing::{debug, info};
20use url::Url;
21
22use std::borrow::Cow;
23use std::path::{Path, PathBuf};
24use std::process::Command;
25use std::str;
26use std::sync::atomic::{AtomicBool, Ordering};
27use std::time::{Duration, Instant};
28
29/// A file indicates that if present, `git reset` has been done and a repo
30/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
31const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
32
33/// A short abbreviated OID.
34///
35/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
36pub struct GitShortID(git2::Buf);
37
38impl GitShortID {
39    /// Views the short ID as a `str`.
40    pub fn as_str(&self) -> &str {
41        self.0.as_str().unwrap()
42    }
43}
44
45/// A remote repository. It gets cloned into a local [`GitDatabase`].
46#[derive(PartialEq, Clone, Debug)]
47pub struct GitRemote {
48    /// URL to a remote repository.
49    ///
50    /// This may differ from the [`SourceId`] URL when the original URL
51    /// can't be represented as a WHATWG [`Url`], for example SCP-like URLs.
52    /// See <https://github.com/rust-lang/cargo/issues/16740>.
53    url: String,
54}
55
56/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
57/// can be cloned from a single [`GitDatabase`].
58pub struct GitDatabase {
59    /// The remote repository where this database is fetched from.
60    remote: GitRemote,
61    /// Path to the root of the underlying Git repository on the local filesystem.
62    path: PathBuf,
63    /// Underlying Git repository instance for this database.
64    repo: git2::Repository,
65}
66
67/// A local checkout of a particular revision from a [`GitDatabase`].
68pub struct GitCheckout<'a> {
69    /// The git database where this checkout is cloned from.
70    database: &'a GitDatabase,
71    /// Path to the root of the underlying Git repository on the local filesystem.
72    path: PathBuf,
73    /// The git revision this checkout is for.
74    revision: git2::Oid,
75    /// Underlying Git repository instance for this checkout.
76    repo: git2::Repository,
77}
78
79impl GitRemote {
80    /// Creates an instance for a remote repository URL.
81    pub fn new(url: &Url) -> GitRemote {
82        GitRemote {
83            url: url.as_str().to_owned(),
84        }
85    }
86
87    /// Creates an instance with an URL that may not be a valid WHATWG URL.
88    ///
89    /// This is needed because [`SourceId`] hasn't yet supported SCP-like URLs.
90    pub(super) fn new_from_str(url: String) -> GitRemote {
91        GitRemote { url }
92    }
93
94    /// Gets the remote repository URL.
95    pub fn url(&self) -> &str {
96        &self.url
97    }
98
99    /// Fetches and checkouts to a reference or a revision from this remote
100    /// into a local path.
101    ///
102    /// This ensures that it gets the up-to-date commit when a named reference
103    /// is given (tag, branch, refs/*). Thus, network connection is involved.
104    ///
105    /// If we have a previous instance of [`GitDatabase`] then fetch into that
106    /// if we can. If that can successfully load our revision then we've
107    /// populated the database with the latest version of `reference`, so
108    /// return that database and the rev we resolve to.
109    pub fn checkout(
110        &self,
111        into: &Path,
112        db: Option<GitDatabase>,
113        manifest_reference: &GitReference,
114        reference: &GitReference,
115        gctx: &GlobalContext,
116    ) -> CargoResult<(GitDatabase, git2::Oid)> {
117        if let Some(mut db) = db {
118            fetch(
119                &mut db.repo,
120                self.url(),
121                manifest_reference,
122                reference,
123                gctx,
124                RemoteKind::GitDependency,
125            )
126            .with_context(|| format!("failed to fetch into: {}", into.display()))?;
127
128            if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
129                return Ok((db, rev));
130            }
131        }
132
133        // Otherwise start from scratch to handle corrupt git repositories.
134        // After our fetch (which is interpreted as a clone now) we do the same
135        // resolution to figure out what we cloned.
136        if into.exists() {
137            paths::remove_dir_all(into)?;
138        }
139        paths::create_dir_all(into)?;
140        let mut repo = init(into, true)?;
141        fetch(
142            &mut repo,
143            self.url(),
144            manifest_reference,
145            reference,
146            gctx,
147            RemoteKind::GitDependency,
148        )
149        .with_context(|| format!("failed to clone into: {}", into.display()))?;
150        let rev = resolve_ref(reference, &repo)?;
151
152        Ok((
153            GitDatabase {
154                remote: self.clone(),
155                path: into.to_path_buf(),
156                repo,
157            },
158            rev,
159        ))
160    }
161
162    /// Creates a [`GitDatabase`] of this remote at `db_path`.
163    pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
164        let repo = git2::Repository::open(db_path)?;
165        Ok(GitDatabase {
166            remote: self.clone(),
167            path: db_path.to_path_buf(),
168            repo,
169        })
170    }
171}
172
173impl GitDatabase {
174    /// Checkouts to a revision at `dest`ination from this database.
175    #[tracing::instrument(skip(self, gctx))]
176    pub fn copy_to(
177        &self,
178        rev: git2::Oid,
179        dest: &Path,
180        gctx: &GlobalContext,
181        quiet: bool,
182    ) -> CargoResult<GitCheckout<'_>> {
183        // If the existing checkout exists, and it is fresh, use it.
184        // A non-fresh checkout can happen if the checkout operation was
185        // interrupted. In that case, the checkout gets deleted and a new
186        // clone is created.
187        let checkout = match git2::Repository::open(dest)
188            .ok()
189            .map(|repo| GitCheckout::new(self, rev, repo))
190            .filter(|co| co.is_fresh())
191        {
192            Some(co) => co,
193            None => {
194                let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
195                checkout.update_submodules(gctx, quiet)?;
196                guard.mark_ok()?;
197                checkout
198            }
199        };
200
201        Ok(checkout)
202    }
203
204    /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
205    pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
206        let obj = self.repo.find_object(revision, None)?;
207        Ok(GitShortID(obj.short_id()?))
208    }
209
210    /// Checks if the database contains the object of this `oid`..
211    pub fn contains(&self, oid: git2::Oid) -> bool {
212        self.repo.revparse_single(&oid.to_string()).is_ok()
213    }
214
215    /// [`resolve_ref`]s this reference with this database.
216    pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
217        resolve_ref(r, &self.repo)
218    }
219}
220
221/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
222pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
223    let id = match gitref {
224        // Note that we resolve the named tag here in sync with where it's
225        // fetched into via `fetch` below.
226        GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
227            let refname = format!("refs/remotes/origin/tags/{}", s);
228            let id = repo.refname_to_id(&refname)?;
229            let obj = repo.find_object(id, None)?;
230            let obj = obj.peel(ObjectType::Commit)?;
231            Ok(obj.id())
232        })()
233        .with_context(|| format!("failed to find tag `{}`", s))?,
234
235        // Resolve the remote name since that's all we're configuring in
236        // `fetch` below.
237        GitReference::Branch(s) => {
238            let name = format!("origin/{}", s);
239            let b = repo
240                .find_branch(&name, git2::BranchType::Remote)
241                .with_context(|| format!("failed to find branch `{}`", s))?;
242            b.get()
243                .target()
244                .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
245        }
246
247        // We'll be using the HEAD commit
248        GitReference::DefaultBranch => {
249            let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
250            let head = repo.find_object(head_id, None)?;
251            head.peel(ObjectType::Commit)?.id()
252        }
253
254        GitReference::Rev(s) => {
255            let obj = repo.revparse_single(s)?;
256            match obj.as_tag() {
257                Some(tag) => tag.target_id(),
258                None => obj.id(),
259            }
260        }
261    };
262    Ok(id)
263}
264
265impl<'a> GitCheckout<'a> {
266    /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
267    /// is done. Use [`GitCheckout::is_fresh`] to check.
268    ///
269    /// * The `database` is where this checkout is from.
270    /// * The `repo` will be the checked out Git repository.
271    fn new(
272        database: &'a GitDatabase,
273        revision: git2::Oid,
274        repo: git2::Repository,
275    ) -> GitCheckout<'a> {
276        let path = repo.workdir().unwrap_or_else(|| repo.path());
277        GitCheckout {
278            path: path.to_path_buf(),
279            database,
280            revision,
281            repo,
282        }
283    }
284
285    /// Gets the remote repository URL.
286    fn remote_url(&self) -> &str {
287        self.database.remote.url()
288    }
289
290    /// Clone a repo for a `revision` into a local path from a `database`.
291    /// This is a filesystem-to-filesystem clone.
292    fn clone_into(
293        into: &Path,
294        database: &'a GitDatabase,
295        revision: git2::Oid,
296        gctx: &GlobalContext,
297    ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
298        let dirname = into.parent().unwrap();
299        paths::create_dir_all(&dirname)?;
300        if into.exists() {
301            paths::remove_dir_all(into)?;
302        }
303
304        // we're doing a local filesystem-to-filesystem clone so there should
305        // be no need to respect global configuration options, so pass in
306        // an empty instance of `git2::Config` below.
307        let git_config = git2::Config::new()?;
308
309        // Clone the repository, but make sure we use the "local" option in
310        // libgit2 which will attempt to use hardlinks to set up the database.
311        // This should speed up the clone operation quite a bit if it works.
312        //
313        // Note that we still use the same fetch options because while we don't
314        // need authentication information we may want progress bars and such.
315        let url = database.path.into_url()?;
316        let mut repo = None;
317        with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
318            let mut checkout = git2::build::CheckoutBuilder::new();
319            checkout.dry_run(); // we'll do this below during a `reset`
320
321            let r = git2::build::RepoBuilder::new()
322                // use hard links and/or copy the database, we're doing a
323                // filesystem clone so this'll speed things up quite a bit.
324                .clone_local(git2::build::CloneLocal::Local)
325                .with_checkout(checkout)
326                .fetch_options(fopts)
327                .clone(url.as_str(), into)?;
328            // `git2` doesn't seem to handle shallow repos correctly when doing
329            // a local clone. Fortunately all that's needed is the copy of the
330            // one file that defines the shallow boundary, the commits which
331            // have their parents omitted as part of the shallow clone.
332            //
333            // TODO(git2): remove this when git2 supports shallow clone correctly
334            if database.repo.is_shallow() {
335                std::fs::copy(
336                    database.repo.path().join("shallow"),
337                    r.path().join("shallow"),
338                )?;
339            }
340            repo = Some(r);
341            Ok(())
342        })?;
343        let repo = repo.unwrap();
344
345        let checkout = GitCheckout::new(database, revision, repo);
346        let guard = checkout.reset(gctx)?;
347        Ok((checkout, guard))
348    }
349
350    /// Checks if the `HEAD` of this checkout points to the expected revision.
351    fn is_fresh(&self) -> bool {
352        match self.repo.revparse_single("HEAD") {
353            Ok(ref head) if head.id() == self.revision => {
354                // See comments in reset() for why we check this
355                self.path.join(CHECKOUT_READY_LOCK).exists()
356            }
357            _ => false,
358        }
359    }
360
361    /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
362    /// revision of this checkout, with additional interrupt protection by a
363    /// dummy file [`CHECKOUT_READY_LOCK`].
364    ///
365    /// If we're interrupted while performing a `git reset` (e.g., we die
366    /// because of a signal) Cargo needs to be sure to try to check out this
367    /// repo again on the next go-round.
368    ///
369    /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
370    /// which if present means that the repo has been successfully reset and is
371    /// ready to go. Hence if we start to do a reset, we make sure this file
372    /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
373    /// (e.g. submodule update) before marking the check-out as ready.
374    ///
375    /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
376    fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
377        let guard = CheckoutGuard::guard(&self.path);
378        info!("reset {} to {}", self.repo.path().display(), self.revision);
379
380        // Ensure libgit2 won't mess with newlines when we vendor.
381        if let Ok(mut git_config) = self.repo.config() {
382            git_config.set_bool("core.autocrlf", false)?;
383        }
384
385        let object = self.repo.find_object(self.revision, None)?;
386        reset(&self.repo, &object, gctx)?;
387
388        Ok(guard)
389    }
390
391    /// Like `git submodule update --recursive` but for this git checkout.
392    ///
393    /// This function respects `submodule.<name>.update = none`[^1] git config.
394    /// Submodules set to `none` won't be fetched.
395    ///
396    /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
397    fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> {
398        return update_submodules(&self.repo, gctx, quiet, self.remote_url());
399
400        /// Recursive helper for [`GitCheckout::update_submodules`].
401        fn update_submodules(
402            repo: &git2::Repository,
403            gctx: &GlobalContext,
404            quiet: bool,
405            parent_remote_url: &str,
406        ) -> CargoResult<()> {
407            debug!("update submodules for: {:?}", repo.workdir().unwrap());
408
409            for mut child in repo.submodules()? {
410                update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context(
411                    || {
412                        format!(
413                            "failed to update submodule `{}`",
414                            child.name().unwrap_or("")
415                        )
416                    },
417                )?;
418            }
419            Ok(())
420        }
421
422        /// Update a single Git submodule, and recurse into its submodules.
423        fn update_submodule(
424            parent: &git2::Repository,
425            child: &mut git2::Submodule<'_>,
426            gctx: &GlobalContext,
427            quiet: bool,
428            parent_remote_url: &str,
429        ) -> CargoResult<()> {
430            child.init(false)?;
431
432            let child_url_str = child
433                .url()
434                .with_context(|| {
435                    format!("failed to update submodule `{}`", child.path().display())
436                })?
437                .ok_or_else(|| {
438                    anyhow::format_err!(
439                        "unable to update submodule `{}` without a path",
440                        child.name().unwrap_or("")
441                    )
442                })?;
443
444            // Skip the submodule if the config says not to update it.
445            if child.update_strategy() == git2::SubmoduleUpdate::None {
446                gctx.shell().status(
447                    "Skipping",
448                    format!(
449                        "git submodule `{}` due to update strategy in .gitmodules",
450                        child_url_str
451                    ),
452                )?;
453                return Ok(());
454            }
455
456            let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
457
458            // A submodule which is listed in .gitmodules but not actually
459            // checked out will not have a head id, so we should ignore it.
460            let Some(head) = child.head_id() else {
461                return Ok(());
462            };
463
464            // If the submodule hasn't been checked out yet, we need to
465            // clone it. If it has been checked out and the head is the same
466            // as the submodule's head, then we can skip an update and keep
467            // recursing.
468            let head_and_repo = child.open().and_then(|repo| {
469                let target = repo.head()?.target();
470                Ok((target, repo))
471            });
472            let repo = match head_and_repo {
473                Ok((head, repo)) => {
474                    if child.head_id() == head {
475                        return update_submodules(&repo, gctx, quiet, &child_remote_url);
476                    }
477                    repo
478                }
479                Err(..) => {
480                    let path = parent.workdir().unwrap().join(child.path());
481                    let _ = paths::remove_dir_all(&path);
482                    init(&path, false)?
483                }
484            };
485            // Fetch submodule database and checkout to target revision
486            let reference = GitReference::Rev(head.to_string());
487
488            // SCP-like URL is not a WHATWG Standard URL.
489            // `url` crate can't parse SCP-like URLs.
490            // We convert to `ssh://` for SourceId,
491            // but preserve the original URL for fetch to maintain correct semantics
492            // See <https://github.com/rust-lang/cargo/issues/16740>
493            let (source_url, fetch_url) = match child_remote_url.as_ref().into_url() {
494                Ok(url) => (url, None),
495                Err(_) => {
496                    let ssh_url = scp_to_ssh(&child_remote_url)
497                        .ok_or_else(|| anyhow::format_err!("invalid url `{child_remote_url}`"))?
498                        .as_str()
499                        .into_url()?;
500                    (ssh_url, Some(child_remote_url.into_owned()))
501                }
502            };
503
504            // GitSource created from SourceId without git precise will result to
505            // locked_rev being Deferred and fetch_db always try to fetch if online
506            let source_id =
507                SourceId::for_git(&source_url, reference)?.with_git_precise(Some(head.to_string()));
508
509            let mut source = match &fetch_url {
510                Some(url) => GitSource::new_for_submodule(source_id, url.to_owned(), gctx)?,
511                None => GitSource::new(source_id, gctx)?,
512            };
513            source.set_quiet(quiet);
514
515            let (db, actual_rev) = source.fetch_db(true).with_context(|| {
516                let name = child.name().unwrap_or("");
517                let url = fetch_url.unwrap_or_else(|| source_url.to_string());
518                format!("failed to fetch submodule `{name}` from {url}")
519            })?;
520            db.copy_to(actual_rev, repo.path(), gctx, quiet)?;
521            Ok(())
522        }
523    }
524}
525
526/// See [`GitCheckout::reset`] for rationale on this type.
527#[must_use]
528struct CheckoutGuard {
529    ok_file: PathBuf,
530}
531
532impl CheckoutGuard {
533    fn guard(path: &Path) -> Self {
534        let ok_file = path.join(CHECKOUT_READY_LOCK);
535        let _ = paths::remove_file(&ok_file);
536        Self { ok_file }
537    }
538
539    fn mark_ok(self) -> CargoResult<()> {
540        let _ = paths::create(self.ok_file)?;
541        Ok(())
542    }
543}
544
545/// Constructs an absolute URL for a child submodule URL with its parent base URL.
546///
547/// Git only assumes a submodule URL is a relative path if it starts with `./`
548/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
549/// submodule URL.
550///
551/// At this moment it comes with some limitations:
552///
553/// * GitHub doesn't accept non-normalized URLs with relative paths.
554///   (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
555/// * `url` crate cannot parse SCP-like URLs.
556///   (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
557///
558/// To overcome these, this patch always tries [`Url::parse`] first to normalize
559/// the path. If it couldn't, append the relative path and/or convert SCP-like URLs
560/// to ssh:// format as the last resorts and pray the remote git service supports
561/// non-normalized URLs.
562///
563/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
564///
565/// [^1]: <https://git-scm.com/docs/git-submodule>
566fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
567    let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
568        match Url::parse(base_url) {
569            Ok(mut base_url) => {
570                let path = base_url.path();
571                if !path.ends_with('/') {
572                    base_url.set_path(&format!("{path}/"));
573                }
574                let absolute_url = base_url.join(submodule_url).with_context(|| {
575                    format!(
576                        "failed to parse relative child submodule url `{submodule_url}` \
577                        using parent base url `{base_url}`"
578                    )
579                })?;
580                Cow::from(absolute_url.to_string())
581            }
582            Err(_) => {
583                let mut absolute_url = base_url.to_string();
584                if !absolute_url.ends_with('/') {
585                    absolute_url.push('/');
586                }
587                absolute_url.push_str(submodule_url);
588                Cow::from(absolute_url)
589            }
590        }
591    } else {
592        Cow::from(submodule_url)
593    };
594
595    Ok(absolute_url)
596}
597
598/// Converts an SCP-like URL to `ssh://` format.
599fn scp_to_ssh(url: &str) -> Option<String> {
600    let mut gix_url = gix::url::parse(gix::bstr::BStr::new(url.as_bytes())).ok()?;
601    if gix_url.serialize_alternative_form && gix_url.scheme == gix::url::Scheme::Ssh {
602        gix_url.serialize_alternative_form = false;
603        Some(gix_url.to_bstring().to_string())
604    } else {
605        None
606    }
607}
608
609/// Prepare the authentication callbacks for cloning a git repository.
610///
611/// The main purpose of this function is to construct the "authentication
612/// callback" which is used to clone a repository. This callback will attempt to
613/// find the right authentication on the system (without user input) and will
614/// guide libgit2 in doing so.
615///
616/// The callback is provided `allowed` types of credentials, and we try to do as
617/// much as possible based on that:
618///
619/// * Prioritize SSH keys from the local ssh agent as they're likely the most
620///   reliable. The username here is prioritized from the credential
621///   callback, then from whatever is configured in git itself, and finally
622///   we fall back to the generic user of `git`.
623///
624/// * If a username/password is allowed, then we fallback to git2-rs's
625///   implementation of the credential helper. This is what is configured
626///   with `credential.helper` in git, and is the interface for the macOS
627///   keychain, for example.
628///
629/// * After the above two have failed, we just kinda grapple attempting to
630///   return *something*.
631///
632/// If any form of authentication fails, libgit2 will repeatedly ask us for
633/// credentials until we give it a reason to not do so. To ensure we don't
634/// just sit here looping forever we keep track of authentications we've
635/// attempted and we don't try the same ones again.
636fn with_authentication<T, F>(
637    gctx: &GlobalContext,
638    url: &str,
639    cfg: &git2::Config,
640    mut f: F,
641) -> CargoResult<T>
642where
643    F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
644{
645    let mut cred_helper = git2::CredentialHelper::new(url);
646    cred_helper.config(cfg);
647
648    let mut ssh_username_requested = false;
649    let mut cred_helper_bad = None;
650    let mut ssh_agent_attempts = Vec::new();
651    let mut any_attempts = false;
652    let mut tried_sshkey = false;
653    let mut url_attempt = None;
654
655    let orig_url = url;
656    let mut res = f(&mut |url, username, allowed| {
657        any_attempts = true;
658        if url != orig_url {
659            url_attempt = Some(url.to_string());
660        }
661        // libgit2's "USERNAME" authentication actually means that it's just
662        // asking us for a username to keep going. This is currently only really
663        // used for SSH authentication and isn't really an authentication type.
664        // The logic currently looks like:
665        //
666        //      let user = ...;
667        //      if (user.is_null())
668        //          user = callback(USERNAME, null, ...);
669        //
670        //      callback(SSH_KEY, user, ...)
671        //
672        // So if we're being called here then we know that (a) we're using ssh
673        // authentication and (b) no username was specified in the URL that
674        // we're trying to clone. We need to guess an appropriate username here,
675        // but that may involve a few attempts. Unfortunately we can't switch
676        // usernames during one authentication session with libgit2, so to
677        // handle this we bail out of this authentication session after setting
678        // the flag `ssh_username_requested`, and then we handle this below.
679        if allowed.contains(git2::CredentialType::USERNAME) {
680            debug_assert!(username.is_none());
681            ssh_username_requested = true;
682            return Err(git2::Error::from_str("gonna try usernames later"));
683        }
684
685        // An "SSH_KEY" authentication indicates that we need some sort of SSH
686        // authentication. This can currently either come from the ssh-agent
687        // process or from a raw in-memory SSH key. Cargo only supports using
688        // ssh-agent currently.
689        //
690        // If we get called with this then the only way that should be possible
691        // is if a username is specified in the URL itself (e.g., `username` is
692        // Some), hence the unwrap() here. We try custom usernames down below.
693        if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
694            // If ssh-agent authentication fails, libgit2 will keep
695            // calling this callback asking for other authentication
696            // methods to try. Make sure we only try ssh-agent once,
697            // to avoid looping forever.
698            tried_sshkey = true;
699            let username = username.unwrap();
700            debug_assert!(!ssh_username_requested);
701            ssh_agent_attempts.push(username.to_string());
702            return git2::Cred::ssh_key_from_agent(username);
703        }
704
705        // Sometimes libgit2 will ask for a username/password in plaintext. This
706        // is where Cargo would have an interactive prompt if we supported it,
707        // but we currently don't! Right now the only way we support fetching a
708        // plaintext password is through the `credential.helper` support, so
709        // fetch that here.
710        //
711        // If ssh-agent authentication fails, libgit2 will keep calling this
712        // callback asking for other authentication methods to try. Check
713        // cred_helper_bad to make sure we only try the git credential helper
714        // once, to avoid looping forever.
715        if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
716        {
717            let r = git2::Cred::credential_helper(cfg, url, username);
718            cred_helper_bad = Some(r.is_err());
719            return r;
720        }
721
722        // I'm... not sure what the DEFAULT kind of authentication is, but seems
723        // easy to support?
724        if allowed.contains(git2::CredentialType::DEFAULT) {
725            return git2::Cred::default();
726        }
727
728        // Whelp, we tried our best
729        Err(git2::Error::from_str("no authentication methods succeeded"))
730    });
731
732    // Ok, so if it looks like we're going to be doing ssh authentication, we
733    // want to try a few different usernames as one wasn't specified in the URL
734    // for us to use. In order, we'll try:
735    //
736    // * A credential helper's username for this URL, if available.
737    // * This account's username.
738    // * "git"
739    //
740    // We have to restart the authentication session each time (due to
741    // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
742    // call our callback, `f`, in a loop here.
743    if ssh_username_requested {
744        debug_assert!(res.is_err());
745        let mut attempts = vec![String::from("git")];
746        if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
747            attempts.push(s.to_string());
748        }
749        if let Some(ref s) = cred_helper.username {
750            attempts.push(s.clone());
751        }
752
753        while let Some(s) = attempts.pop() {
754            // We should get `USERNAME` first, where we just return our attempt,
755            // and then after that we should get `SSH_KEY`. If the first attempt
756            // fails we'll get called again, but we don't have another option so
757            // we bail out.
758            let mut attempts = 0;
759            res = f(&mut |_url, username, allowed| {
760                if allowed.contains(git2::CredentialType::USERNAME) {
761                    return git2::Cred::username(&s);
762                }
763                if allowed.contains(git2::CredentialType::SSH_KEY) {
764                    debug_assert_eq!(Some(&s[..]), username);
765                    attempts += 1;
766                    if attempts == 1 {
767                        ssh_agent_attempts.push(s.to_string());
768                        return git2::Cred::ssh_key_from_agent(&s);
769                    }
770                }
771                Err(git2::Error::from_str("no authentication methods succeeded"))
772            });
773
774            // If we made two attempts then that means:
775            //
776            // 1. A username was requested, we returned `s`.
777            // 2. An ssh key was requested, we returned to look up `s` in the
778            //    ssh agent.
779            // 3. For whatever reason that lookup failed, so we were asked again
780            //    for another mode of authentication.
781            //
782            // Essentially, if `attempts == 2` then in theory the only error was
783            // that this username failed to authenticate (e.g., no other network
784            // errors happened). Otherwise something else is funny so we bail
785            // out.
786            if attempts != 2 {
787                break;
788            }
789        }
790    }
791    let mut err = match res {
792        Ok(e) => return Ok(e),
793        Err(e) => e,
794    };
795
796    // In the case of an authentication failure (where we tried something) then
797    // we try to give a more helpful error message about precisely what we
798    // tried.
799    if any_attempts {
800        let mut msg = "failed to authenticate when downloading \
801                       repository"
802            .to_string();
803
804        if let Some(attempt) = &url_attempt {
805            if url != attempt {
806                msg.push_str(": ");
807                msg.push_str(attempt);
808            }
809        }
810        msg.push('\n');
811        if !ssh_agent_attempts.is_empty() {
812            let names = ssh_agent_attempts
813                .iter()
814                .map(|s| format!("`{}`", s))
815                .collect::<Vec<_>>()
816                .join(", ");
817            msg.push_str(&format!(
818                "\n* attempted ssh-agent authentication, but \
819                 no usernames succeeded: {}",
820                names
821            ));
822        }
823        if let Some(failed_cred_helper) = cred_helper_bad {
824            if failed_cred_helper {
825                msg.push_str(
826                    "\n* attempted to find username/password via \
827                     git's `credential.helper` support, but failed",
828                );
829            } else {
830                msg.push_str(
831                    "\n* attempted to find username/password via \
832                     `credential.helper`, but maybe the found \
833                     credentials were incorrect",
834                );
835            }
836        }
837        msg.push_str("\n\n");
838        msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
839        msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
840        err = err.context(msg);
841
842        // Otherwise if we didn't even get to the authentication phase them we may
843        // have failed to set up a connection, in these cases hint on the
844        // `net.git-fetch-with-cli` configuration option.
845    } else if let Some(e) = err.downcast_ref::<git2::Error>() {
846        match e.class() {
847            ErrorClass::Net
848            | ErrorClass::Ssl
849            | ErrorClass::Submodule
850            | ErrorClass::FetchHead
851            | ErrorClass::Ssh
852            | ErrorClass::Http => {
853                let msg = format!(
854                    concat!(
855                        "network failure seems to have happened\n",
856                        "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
857                        "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
858                        "{}"
859                    ),
860                    note_github_pull_request(url).unwrap_or_default()
861                );
862                err = err.context(msg);
863            }
864            ErrorClass::Callback => {
865                // This unwraps the git2 error. We're using the callback error
866                // specifically to convey errors from Rust land through the C
867                // callback interface. We don't need the `; class=Callback
868                // (26)` that gets tacked on to the git2 error message.
869                err = anyhow::format_err!("{}", e.message());
870            }
871            _ => {}
872        }
873    }
874
875    Err(err)
876}
877
878/// `git reset --hard` to the given `obj` for the `repo`.
879///
880/// The `obj` is a commit-ish to which the head should be moved.
881fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
882    let mut pb = Progress::new("Checkout", gctx);
883    let mut opts = git2::build::CheckoutBuilder::new();
884    opts.progress(|_, cur, max| {
885        drop(pb.tick(cur, max, ""));
886    });
887    debug!("doing reset");
888    repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
889    debug!("reset done");
890    Ok(())
891}
892
893/// Prepares the callbacks for fetching a git repository.
894///
895/// The main purpose of this function is to construct everything before a fetch.
896/// This will attempt to setup a progress bar, the authentication for git,
897/// ssh known hosts check, and the network retry mechanism.
898///
899/// The callback is provided a fetch options, which can be used by the actual
900/// git fetch.
901pub fn with_fetch_options(
902    git_config: &git2::Config,
903    url: &str,
904    gctx: &GlobalContext,
905    cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
906) -> CargoResult<()> {
907    let mut progress = Progress::new("Fetch", gctx);
908    let ssh_config = gctx.net_config()?.ssh.as_ref();
909    let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
910    let diagnostic_home_config = gctx.diagnostic_home_config();
911    network::retry::with_retry(gctx, || {
912        // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
913        // so we store the error additionally and unwrap it later
914        let mut check_cb_result = Ok(());
915        let auth_result = with_authentication(gctx, url, git_config, |f| {
916            let port = Url::parse(url).ok().and_then(|url| url.port());
917            let mut last_update = Instant::now();
918            let mut rcb = git2::RemoteCallbacks::new();
919            // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
920            // sliding window for tracking the data transfer rate (in bytes/s).
921            let mut counter = MetricsCounter::<10>::new(0, last_update);
922            rcb.credentials(f);
923            rcb.certificate_check(|cert, host| {
924                match super::known_hosts::certificate_check(
925                    gctx,
926                    cert,
927                    host,
928                    port,
929                    config_known_hosts,
930                    &diagnostic_home_config,
931                ) {
932                    Ok(status) => Ok(status),
933                    Err(e) => {
934                        check_cb_result = Err(e);
935                        // This is not really used because it'll be overridden by libgit2
936                        // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
937                        Err(git2::Error::from_str(
938                            "invalid or unknown remote ssh hostkey",
939                        ))
940                    }
941                }
942            });
943            rcb.transfer_progress(|stats| {
944                let indexed_deltas = stats.indexed_deltas();
945                let msg = if indexed_deltas > 0 {
946                    // Resolving deltas.
947                    format!(
948                        ", ({}/{}) resolving deltas",
949                        indexed_deltas,
950                        stats.total_deltas()
951                    )
952                } else {
953                    // Receiving objects.
954                    //
955                    // # Caveat
956                    //
957                    // Progress bar relies on git2 calling `transfer_progress`
958                    // to update its transfer rate, but we cannot guarantee a
959                    // periodic call of that callback. Thus if we don't receive
960                    // any data for, say, 10 seconds, the rate will get stuck
961                    // and never go down to 0B/s.
962                    // In the future, we need to find away to update the rate
963                    // even when the callback is not called.
964                    let now = Instant::now();
965                    // Scrape a `received_bytes` to the counter every 300ms.
966                    if now - last_update > Duration::from_millis(300) {
967                        counter.add(stats.received_bytes(), now);
968                        last_update = now;
969                    }
970                    let rate = HumanBytes(counter.rate() as u64);
971                    format!(", {rate:.2}/s")
972                };
973                progress
974                    .tick(stats.indexed_objects(), stats.total_objects(), &msg)
975                    .is_ok()
976            });
977
978            // Create a local anonymous remote in the repository to fetch the
979            // url
980            let mut opts = git2::FetchOptions::new();
981            opts.remote_callbacks(rcb);
982            cb(opts)
983        });
984        if auth_result.is_err() {
985            check_cb_result?;
986        }
987        auth_result?;
988        Ok(())
989    })
990}
991
992/// Attempts to fetch the given git `reference` for a Git repository.
993///
994/// This is the main entry for git clone/fetch. It does the followings:
995///
996/// * Turns [`GitReference`] into refspecs accordingly.
997/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
998///
999/// The `remote_url` argument is the git remote URL where we want to fetch from.
1000///
1001/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
1002/// at this time. It could be extended when libgit2 supports shallow clones.
1003///
1004/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
1005pub fn fetch(
1006    repo: &mut git2::Repository,
1007    remote_url: &str,
1008    manifest_reference: &GitReference,
1009    locked_reference: &GitReference,
1010    gctx: &GlobalContext,
1011    remote_kind: RemoteKind,
1012) -> CargoResult<()> {
1013    if let Some(offline_flag) = gctx.offline_flag() {
1014        anyhow::bail!(
1015            "attempting to update a git repository, but {offline_flag} \
1016             was specified"
1017        )
1018    }
1019
1020    let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
1021
1022    // Flag to keep track if the rev is a full commit hash
1023    let mut fast_path_rev: bool = false;
1024
1025    let oid_to_fetch = match github_fast_path(repo, remote_url, locked_reference, gctx) {
1026        Ok(FastPathRev::UpToDate) => return Ok(()),
1027        Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
1028        Ok(FastPathRev::Indeterminate) => None,
1029        Err(e) => {
1030            debug!("failed to check github {:?}", e);
1031            None
1032        }
1033    };
1034
1035    maybe_gc_repo(repo, gctx)?;
1036
1037    clean_repo_temp_files(repo);
1038
1039    // Translate the reference desired here into an actual list of refspecs
1040    // which need to get fetched. Additionally record if we're fetching tags.
1041    let mut refspecs = Vec::new();
1042    let mut tags = false;
1043    // The `+` symbol on the refspec means to allow a forced (fast-forward)
1044    // update which is needed if there is ever a force push that requires a
1045    // fast-forward.
1046    match locked_reference {
1047        // For branches and tags we can fetch simply one reference and copy it
1048        // locally, no need to fetch other branches/tags.
1049        GitReference::Branch(b) => {
1050            refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
1051        }
1052
1053        GitReference::Tag(t) => {
1054            refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
1055        }
1056
1057        GitReference::DefaultBranch => {
1058            refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1059        }
1060
1061        GitReference::Rev(rev) => {
1062            if rev.starts_with("refs/") {
1063                refspecs.push(format!("+{0}:{0}", rev));
1064            } else if let Some(oid_to_fetch) = oid_to_fetch {
1065                fast_path_rev = true;
1066                refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1067            } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1068                && rev_to_oid(rev).is_some()
1069            {
1070                // There is a specific commit to fetch and we will do so in shallow-mode only
1071                // to not disturb the previous logic.
1072                // Note that with typical settings for shallowing, we will just fetch a single `rev`
1073                // as single commit.
1074                // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1075                // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1076                refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1077            } else if let GitReference::Rev(rev) = manifest_reference
1078                && rev.starts_with("refs/")
1079            {
1080                // If the lockfile has a commit. we can't directly fetch it (unless we're talking
1081                // to GitHub), so we fetch the ref associated with it from the manifest.
1082                refspecs.push(format!("+{0}:{0}", rev));
1083            } else {
1084                // We don't know what the rev will point to. To handle this
1085                // situation we fetch all branches and tags, and then we pray
1086                // it's somewhere in there.
1087                refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1088                refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1089                tags = true;
1090            }
1091        }
1092    }
1093
1094    debug!("doing a fetch for {remote_url}");
1095    let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1096        fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
1097    } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1098        fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1099    } else {
1100        fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1101    };
1102
1103    if fast_path_rev {
1104        if let Some(oid) = oid_to_fetch {
1105            return result.with_context(|| format!("revision {} not found", oid));
1106        }
1107    }
1108    result
1109}
1110
1111/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1112/// files atomically.
1113/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1114/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1115fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1116    matches!(
1117        err,
1118        gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1119            gix::protocol::fetch::Error::LockShallowFile(_)
1120        ))
1121    )
1122}
1123
1124/// Attempts to use `git` CLI installed on the system to fetch a repository,
1125/// when the config value [`net.git-fetch-with-cli`][1] is set.
1126///
1127/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1128/// when compared to the `git` command line. As a result, allow an escape
1129/// hatch for users that would prefer to use `git`-the-CLI for fetching
1130/// repositories instead of `libgit2`-the-library. This should make more
1131/// flavors of authentication possible while also still giving us all the
1132/// speed and portability of using `libgit2`.
1133///
1134/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1135fn fetch_with_cli(
1136    repo: &mut git2::Repository,
1137    url: &str,
1138    refspecs: &[String],
1139    tags: bool,
1140    shallow: gix::remote::fetch::Shallow,
1141    gctx: &GlobalContext,
1142) -> CargoResult<()> {
1143    debug!(target: "git-fetch", backend = "git-cli");
1144
1145    let mut cmd = ProcessBuilder::new("git");
1146    cmd.arg("fetch");
1147    if tags {
1148        cmd.arg("--tags");
1149    } else {
1150        cmd.arg("--no-tags");
1151    }
1152    if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1153        let depth = 0i32.saturating_add_unsigned(depth.get());
1154        cmd.arg(format!("--depth={depth}"));
1155    }
1156    match gctx.shell().verbosity() {
1157        Verbosity::Normal => {}
1158        Verbosity::Verbose => {
1159            cmd.arg("--verbose");
1160        }
1161        Verbosity::Quiet => {
1162            cmd.arg("--quiet");
1163        }
1164    }
1165    cmd.arg("--force") // handle force pushes
1166        .arg("--update-head-ok") // see discussion in #2078
1167        .arg(url)
1168        .args(refspecs)
1169        // If cargo is run by git (for example, the `exec` command in `git
1170        // rebase`), the GIT_DIR is set by git and will point to the wrong
1171        // location. This makes sure GIT_DIR is always the repository path.
1172        .env("GIT_DIR", repo.path())
1173        // The reset of these may not be necessary, but I'm including them
1174        // just to be extra paranoid and avoid any issues.
1175        .env_remove("GIT_WORK_TREE")
1176        .env_remove("GIT_INDEX_FILE")
1177        .env_remove("GIT_OBJECT_DIRECTORY")
1178        .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1179        .cwd(repo.path());
1180    gctx.shell()
1181        .verbose(|s| s.status("Running", &cmd.to_string()))?;
1182    network::retry::with_retry(gctx, || {
1183        cmd.exec()
1184            .map_err(|error| GitCliError::new(error, true).into())
1185    })?;
1186
1187    Ok(())
1188}
1189
1190fn fetch_with_gitoxide(
1191    repo: &mut git2::Repository,
1192    remote_url: &str,
1193    refspecs: Vec<String>,
1194    tags: bool,
1195    shallow: gix::remote::fetch::Shallow,
1196    gctx: &GlobalContext,
1197) -> CargoResult<()> {
1198    debug!(target: "git-fetch", backend = "gitoxide");
1199
1200    let git2_repo = repo;
1201    let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1202    let repo_reinitialized = AtomicBool::default();
1203    let res = oxide::with_retry_and_progress(
1204        git2_repo.path(),
1205        gctx,
1206        remote_url,
1207        &|repo_path,
1208          should_interrupt,
1209          mut progress,
1210          url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1211            // The `fetch` operation here may fail spuriously due to a corrupt
1212            // repository. It could also fail, however, for a whole slew of other
1213            // reasons (aka network related reasons). We want Cargo to automatically
1214            // recover from corrupt repositories, but we don't want Cargo to stomp
1215            // over other legitimate errors.
1216            //
1217            // Consequently we save off the error of the `fetch` operation and if it
1218            // looks like a "corrupt repo" error then we blow away the repo and try
1219            // again. If it looks like any other kind of error, or if we've already
1220            // blown away the repository, then we want to return the error as-is.
1221            loop {
1222                let res = oxide::open_repo(
1223                    repo_path,
1224                    config_overrides.clone(),
1225                    oxide::OpenMode::ForFetch,
1226                )
1227                .map_err(crate::sources::git::fetch::Error::from)
1228                .and_then(|repo| {
1229                    debug!("initiating fetch of {refspecs:?} from {remote_url}");
1230                    let url_for_authentication = &mut *url_for_authentication;
1231                    let remote = repo
1232                        .remote_at(remote_url)?
1233                        .with_fetch_tags(if tags {
1234                            gix::remote::fetch::Tags::All
1235                        } else {
1236                            gix::remote::fetch::Tags::Included
1237                        })
1238                        .with_refspecs(
1239                            refspecs.iter().map(|s| s.as_str()),
1240                            gix::remote::Direction::Fetch,
1241                        )
1242                        .map_err(crate::sources::git::fetch::Error::Other)?;
1243                    let url = remote
1244                        .url(gix::remote::Direction::Fetch)
1245                        .expect("set at init")
1246                        .to_owned();
1247                    let connection = remote.connect(gix::remote::Direction::Fetch)?;
1248                    let mut authenticate = connection.configured_credentials(url)?;
1249                    let connection = connection.with_credentials(
1250                        move |action: gix::protocol::credentials::helper::Action| {
1251                            if let Some(url) = action
1252                                .context()
1253                                .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1254                            {
1255                                url_for_authentication(url.as_ref());
1256                            }
1257                            authenticate(action)
1258                        },
1259                    );
1260                    let outcome = connection
1261                        .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1262                        .with_shallow(shallow.clone())
1263                        .receive(&mut progress, should_interrupt)?;
1264                    Ok(outcome)
1265                });
1266                let err = match res {
1267                    Ok(_) => break,
1268                    Err(e) => e,
1269                };
1270                debug!("fetch failed: {}", err);
1271
1272                if !repo_reinitialized.load(Ordering::Relaxed)
1273                        // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1274                        // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1275                        // folder before writing files into it, or else not even open a directory as git repository (which is
1276                        // also handled here).
1277                        && err.is_corrupted()
1278                    || has_shallow_lock_file(&err)
1279                {
1280                    repo_reinitialized.store(true, Ordering::Relaxed);
1281                    debug!(
1282                        "looks like this is a corrupt repository, reinitializing \
1283                     and trying again"
1284                    );
1285                    if oxide::reinitialize(repo_path).is_ok() {
1286                        continue;
1287                    }
1288                }
1289
1290                return Err(err.into());
1291            }
1292            Ok(())
1293        },
1294    );
1295    if repo_reinitialized.load(Ordering::Relaxed) {
1296        *git2_repo = git2::Repository::open(git2_repo.path())?;
1297    }
1298    res
1299}
1300
1301fn fetch_with_libgit2(
1302    repo: &mut git2::Repository,
1303    remote_url: &str,
1304    refspecs: Vec<String>,
1305    tags: bool,
1306    shallow: gix::remote::fetch::Shallow,
1307    gctx: &GlobalContext,
1308) -> CargoResult<()> {
1309    debug!(target: "git-fetch", backend = "libgit2");
1310
1311    let git_config = git2::Config::open_default()?;
1312    with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1313        if tags {
1314            opts.download_tags(git2::AutotagOption::All);
1315        }
1316        if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1317            opts.depth(0i32.saturating_add_unsigned(depth.get()));
1318        }
1319        // The `fetch` operation here may fail spuriously due to a corrupt
1320        // repository. It could also fail, however, for a whole slew of other
1321        // reasons (aka network related reasons). We want Cargo to automatically
1322        // recover from corrupt repositories, but we don't want Cargo to stomp
1323        // over other legitimate errors.
1324        //
1325        // Consequently we save off the error of the `fetch` operation and if it
1326        // looks like a "corrupt repo" error then we blow away the repo and try
1327        // again. If it looks like any other kind of error, or if we've already
1328        // blown away the repository, then we want to return the error as-is.
1329        let mut repo_reinitialized = false;
1330        loop {
1331            debug!("initiating fetch of {refspecs:?} from {remote_url}");
1332            let res = repo
1333                .remote_anonymous(remote_url)?
1334                .fetch(&refspecs, Some(&mut opts), None);
1335            let err = match res {
1336                Ok(()) => break,
1337                Err(e) => e,
1338            };
1339            debug!("fetch failed: {}", err);
1340
1341            if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1342            {
1343                repo_reinitialized = true;
1344                debug!(
1345                    "looks like this is a corrupt repository, reinitializing \
1346                     and trying again"
1347                );
1348                if reinitialize(repo).is_ok() {
1349                    continue;
1350                }
1351            }
1352
1353            return Err(err.into());
1354        }
1355        Ok(())
1356    })
1357}
1358
1359/// Attempts to `git gc` a repository.
1360///
1361/// Cargo has a bunch of long-lived git repositories in its global cache and
1362/// some, like the index, are updated very frequently. Right now each update
1363/// creates a new "pack file" inside the git database, and over time this can
1364/// cause bad performance and bad current behavior in libgit2.
1365///
1366/// One pathological use case today is where libgit2 opens hundreds of file
1367/// descriptors, getting us dangerously close to blowing out the OS limits of
1368/// how many fds we can have open. This is detailed in [#4403].
1369///
1370/// Instead of trying to be clever about when gc is needed, we just run
1371/// `git gc --auto` and let git figure it out. It checks its own thresholds
1372/// (gc.auto, gc.autoPackLimit) and either does the work or exits quickly.
1373/// If git isn't installed, no worries - we skip it.
1374///
1375/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1376fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1377    // Let git decide whether gc is actually needed based on its own thresholds
1378    // (gc.auto, gc.autoPackLimit). This avoids duplicating git's internal logic
1379    // for deciding when housekeeping is needed.
1380    //
1381    // For testing purposes, __CARGO_PACKFILE_LIMIT can be set to override
1382    // gc.autoPackLimit, which has the same meaning. This lets tests force gc
1383    // to run by setting a low threshold without depending on git's defaults.
1384    let mut cmd = Command::new("git");
1385    if let Ok(limit) = gctx.get_env("__CARGO_PACKFILE_LIMIT") {
1386        cmd.arg(format!("-c gc.autoPackLimit={}", limit));
1387    }
1388    cmd.arg("gc").arg("--auto").current_dir(repo.path());
1389
1390    match cmd.output() {
1391        Ok(out) => {
1392            debug!(
1393                "git-gc --auto status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1394                out.status,
1395                String::from_utf8_lossy(&out.stdout),
1396                String::from_utf8_lossy(&out.stderr)
1397            );
1398            if out.status.success() {
1399                let new = git2::Repository::open(repo.path())?;
1400                *repo = new;
1401                return Ok(());
1402            }
1403        }
1404        Err(e) => debug!("git-gc --auto failed to spawn: {}", e),
1405    }
1406
1407    // Alright all else failed, let's start over.
1408    reinitialize(repo)
1409}
1410
1411/// Removes temporary files left from previous activity.
1412///
1413/// If libgit2 is interrupted while indexing pack files, it will leave behind
1414/// some temporary files that it doesn't clean up. These can be quite large in
1415/// size, so this tries to clean things up.
1416///
1417/// This intentionally ignores errors. This is only an opportunistic cleaning,
1418/// and we don't really care if there are issues (there's unlikely anything
1419/// that can be done).
1420///
1421/// The git CLI has similar behavior (its temp files look like
1422/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1423/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1424/// filenames, so they never get cleaned up.
1425fn clean_repo_temp_files(repo: &git2::Repository) {
1426    let path = repo.path().join("objects/pack/pack_git2_*");
1427    let Some(pattern) = path.to_str() else {
1428        tracing::warn!("cannot convert {path:?} to a string");
1429        return;
1430    };
1431    let Ok(paths) = glob::glob(pattern) else {
1432        return;
1433    };
1434    for path in paths {
1435        if let Ok(path) = path {
1436            match paths::remove_file(&path) {
1437                Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1438                Err(e) => {
1439                    tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1440                }
1441            }
1442        }
1443    }
1444}
1445
1446/// Reinitializes a given Git repository. This is useful when a Git repository
1447/// seems corrupted and we want to start over.
1448fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1449    // Here we want to drop the current repository object pointed to by `repo`,
1450    // so we initialize temporary repository in a sub-folder, blow away the
1451    // existing git folder, and then recreate the git repo. Finally we blow away
1452    // the `tmp` folder we allocated.
1453    let path = repo.path().to_path_buf();
1454    debug!("reinitializing git repo at {:?}", path);
1455    let tmp = path.join("tmp");
1456    let bare = !repo.path().ends_with(".git");
1457    *repo = init(&tmp, false)?;
1458    for entry in path.read_dir()? {
1459        let entry = entry?;
1460        if entry.file_name().to_str() == Some("tmp") {
1461            continue;
1462        }
1463        let path = entry.path();
1464        drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1465    }
1466    *repo = init(&path, bare)?;
1467    paths::remove_dir_all(&tmp)?;
1468    Ok(())
1469}
1470
1471/// Initializes a Git repository at `path`.
1472fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1473    let mut opts = git2::RepositoryInitOptions::new();
1474    // Skip anything related to templates, they just call all sorts of issues as
1475    // we really don't want to use them yet they insist on being used. See #6240
1476    // for an example issue that comes up.
1477    opts.external_template(false);
1478    opts.bare(bare);
1479    Ok(git2::Repository::init_opts(&path, &opts)?)
1480}
1481
1482/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1483enum FastPathRev {
1484    /// The local rev (determined by `reference.resolve(repo)`) is already up to
1485    /// date with what this rev resolves to on GitHub's server.
1486    UpToDate,
1487    /// The following SHA must be fetched in order for the local rev to become
1488    /// up to date.
1489    NeedsFetch(Oid),
1490    /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1491    /// and tags from the server and see what happens.
1492    Indeterminate,
1493}
1494
1495/// Attempts GitHub's special fast path for testing if we've already got an
1496/// up-to-date copy of the repository.
1497///
1498/// Updating the index is done pretty regularly so we want it to be as fast as
1499/// possible. For registries hosted on GitHub (like the crates.io index) there's
1500/// a fast path available to use[^1] to tell us that there's no updates to be
1501/// made.
1502///
1503/// Note that this function should never cause an actual failure because it's
1504/// just a fast path. As a result, a caller should ignore `Err` returned from
1505/// this function and move forward on the normal path.
1506///
1507/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1508fn github_fast_path(
1509    repo: &mut git2::Repository,
1510    url: &str,
1511    reference: &GitReference,
1512    gctx: &GlobalContext,
1513) -> CargoResult<FastPathRev> {
1514    let url = Url::parse(url)?;
1515    if !is_github(&url) {
1516        return Ok(FastPathRev::Indeterminate);
1517    }
1518
1519    let local_object = resolve_ref(reference, repo).ok();
1520
1521    let github_branch_name = match reference {
1522        GitReference::Branch(branch) => branch,
1523        GitReference::Tag(tag) => tag,
1524        GitReference::DefaultBranch => "HEAD",
1525        GitReference::Rev(rev) => {
1526            if rev.starts_with("refs/") {
1527                rev
1528            } else if looks_like_commit_hash(rev) {
1529                // `revparse_single` (used by `resolve`) is the only way to turn
1530                // short hash -> long hash, but it also parses other things,
1531                // like branch and tag names, which might coincidentally be
1532                // valid hex.
1533                //
1534                // We only return early if `rev` is a prefix of the object found
1535                // by `revparse_single`. Don't bother talking to GitHub in that
1536                // case, since commit hashes are permanent. If a commit with the
1537                // requested hash is already present in the local clone, its
1538                // contents must be the same as what is on the server for that
1539                // hash.
1540                //
1541                // If `rev` is not found locally by `revparse_single`, we'll
1542                // need GitHub to resolve it and get a hash. If `rev` is found
1543                // but is not a short hash of the found object, it's probably a
1544                // branch and we also need to get a hash from GitHub, in case
1545                // the branch has moved.
1546                if let Some(local_object) = local_object {
1547                    if is_short_hash_of(rev, local_object) {
1548                        debug!("github fast path already has {local_object}");
1549                        return Ok(FastPathRev::UpToDate);
1550                    }
1551                }
1552                // If `rev` is a full commit hash, the only thing it can resolve
1553                // to is itself. Don't bother talking to GitHub in that case
1554                // either. (This ensures that we always attempt to fetch the
1555                // commit directly even if we can't reach the GitHub API.)
1556                if let Some(oid) = rev_to_oid(rev) {
1557                    debug!("github fast path is already a full commit hash {rev}");
1558                    return Ok(FastPathRev::NeedsFetch(oid));
1559                }
1560                rev
1561            } else {
1562                debug!("can't use github fast path with `rev = \"{}\"`", rev);
1563                return Ok(FastPathRev::Indeterminate);
1564            }
1565        }
1566    };
1567
1568    // This expects GitHub urls in the form `github.com/user/repo` and nothing
1569    // else
1570    let mut pieces = url
1571        .path_segments()
1572        .ok_or_else(|| anyhow!("no path segments on url"))?;
1573    let username = pieces
1574        .next()
1575        .ok_or_else(|| anyhow!("couldn't find username"))?;
1576    let repository = pieces
1577        .next()
1578        .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1579    if pieces.next().is_some() {
1580        anyhow::bail!("too many segments on URL");
1581    }
1582
1583    // Trim off the `.git` from the repository, if present, since that's
1584    // optional for GitHub and won't work when we try to use the API as well.
1585    let repository = repository.strip_suffix(".git").unwrap_or(repository);
1586
1587    let url = format!(
1588        "https://api.github.com/repos/{}/{}/commits/{}",
1589        username, repository, github_branch_name,
1590    );
1591    debug!("attempting GitHub fast path for {}", url);
1592    let mut request =
1593        Request::get(url).header(http::header::ACCEPT, "application/vnd.github.3.sha");
1594    if let Some(local_object) = local_object {
1595        request = request.header(http::header::IF_NONE_MATCH, &format!("\"{local_object}\""));
1596    }
1597    let response = gctx
1598        .http_async()?
1599        .request_blocking(request.body(Vec::new())?)?;
1600    let response_code = response.status();
1601    if response_code == StatusCode::NOT_MODIFIED {
1602        debug!("github fast path up-to-date");
1603        Ok(FastPathRev::UpToDate)
1604    } else if response_code == StatusCode::OK
1605        && let Some(oid_to_fetch) = rev_to_oid(str::from_utf8(&response.body())?)
1606    {
1607        // response expected to be a full hash hexstring (40 or 64 chars)
1608        debug!("github fast path fetch {oid_to_fetch}");
1609        Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1610    } else {
1611        // Usually response_code == 404 if the repository does not exist, and
1612        // response_code == 422 if exists but GitHub is unable to resolve the
1613        // requested rev.
1614        debug!("github fast path bad response code {response_code}");
1615        Ok(FastPathRev::Indeterminate)
1616    }
1617}
1618
1619/// Whether a `url` is one from GitHub.
1620fn is_github(url: &Url) -> bool {
1621    url.host_str() == Some("github.com")
1622}
1623
1624// Give some messages on GitHub PR URL given as is
1625pub(crate) fn note_github_pull_request(url: &str) -> Option<String> {
1626    if let Ok(url) = url.parse::<Url>()
1627        && is_github(&url)
1628    {
1629        let path_segments = url
1630            .path_segments()
1631            .map(|p| p.into_iter().collect::<Vec<_>>())
1632            .unwrap_or_default();
1633        if let [owner, repo, "pull", pr_number, ..] = path_segments[..] {
1634            let repo_url = format!("https://github.com/{owner}/{repo}.git");
1635            let rev = format!("refs/pull/{pr_number}/head");
1636            return Some(format!(
1637                concat!(
1638                    "\n\nnote: GitHub url {} is not a repository. \n",
1639                    "help: Replace the dependency with \n",
1640                    "       `git = \"{}\" rev = \"{}\"` \n",
1641                    "   to specify pull requests as dependencies' revision."
1642                ),
1643                url, repo_url, rev
1644            ));
1645        }
1646    }
1647
1648    None
1649}
1650
1651/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1652fn looks_like_commit_hash(rev: &str) -> bool {
1653    rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1654}
1655
1656/// Whether `rev` is a shorter hash of `oid`.
1657fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1658    let long_hash = oid.to_string();
1659    match long_hash.get(..rev.len()) {
1660        Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1661        None => false,
1662    }
1663}
1664
1665#[cfg(test)]
1666mod tests {
1667    use super::*;
1668
1669    #[test]
1670    fn github_fast_path_full_hash_returns_needs_fetch() {
1671        let temp_dir = tempfile::TempDir::new().unwrap();
1672        let mut repo = git2::Repository::init_bare(temp_dir.path()).unwrap();
1673        let full_hash = "c9040898c9183ddbb9402dcbf749ed06d6ea90ad";
1674        let reference = GitReference::Rev(full_hash.to_string());
1675        let gctx = GlobalContext::default().unwrap();
1676        let expected_oid = rev_to_oid(full_hash).unwrap();
1677
1678        let result =
1679            github_fast_path(&mut repo, "https://github.com/user/repo", &reference, &gctx).unwrap();
1680
1681        assert!(matches!(result, FastPathRev::NeedsFetch(oid) if oid == expected_oid));
1682    }
1683
1684    #[test]
1685    fn test_absolute_submodule_url() {
1686        let cases = [
1687            (
1688                "ssh://git@gitub.com/rust-lang/cargo",
1689                "git@github.com:rust-lang/cargo.git",
1690                "git@github.com:rust-lang/cargo.git",
1691            ),
1692            (
1693                "ssh://git@gitub.com/rust-lang/cargo",
1694                "./",
1695                "ssh://git@gitub.com/rust-lang/cargo/",
1696            ),
1697            (
1698                "ssh://git@gitub.com/rust-lang/cargo",
1699                "../",
1700                "ssh://git@gitub.com/rust-lang/",
1701            ),
1702            (
1703                "ssh://git@gitub.com/rust-lang/cargo",
1704                "./foo",
1705                "ssh://git@gitub.com/rust-lang/cargo/foo",
1706            ),
1707            (
1708                "ssh://git@gitub.com/rust-lang/cargo/",
1709                "./foo",
1710                "ssh://git@gitub.com/rust-lang/cargo/foo",
1711            ),
1712            (
1713                "ssh://git@gitub.com/rust-lang/cargo/",
1714                "../foo",
1715                "ssh://git@gitub.com/rust-lang/foo",
1716            ),
1717            (
1718                "ssh://git@gitub.com/rust-lang/cargo",
1719                "../foo",
1720                "ssh://git@gitub.com/rust-lang/foo",
1721            ),
1722            (
1723                "ssh://git@gitub.com/rust-lang/cargo",
1724                "../foo/bar/../baz",
1725                "ssh://git@gitub.com/rust-lang/foo/baz",
1726            ),
1727            (
1728                "git@github.com:rust-lang/cargo.git",
1729                "ssh://git@gitub.com/rust-lang/cargo",
1730                "ssh://git@gitub.com/rust-lang/cargo",
1731            ),
1732            (
1733                "git@github.com:rust-lang/cargo.git",
1734                "./",
1735                "git@github.com:rust-lang/cargo.git/./",
1736            ),
1737            (
1738                "git@github.com:rust-lang/cargo.git",
1739                "../",
1740                "git@github.com:rust-lang/cargo.git/../",
1741            ),
1742            (
1743                "git@github.com:rust-lang/cargo.git",
1744                "./foo",
1745                "git@github.com:rust-lang/cargo.git/./foo",
1746            ),
1747            (
1748                "git@github.com:rust-lang/cargo.git/",
1749                "./foo",
1750                "git@github.com:rust-lang/cargo.git/./foo",
1751            ),
1752            (
1753                "git@github.com:rust-lang/cargo.git",
1754                "../foo",
1755                "git@github.com:rust-lang/cargo.git/../foo",
1756            ),
1757            (
1758                "git@github.com:rust-lang/cargo.git/",
1759                "../foo",
1760                "git@github.com:rust-lang/cargo.git/../foo",
1761            ),
1762            (
1763                "git@github.com:rust-lang/cargo.git",
1764                "../foo/bar/../baz",
1765                "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1766            ),
1767        ];
1768
1769        for (base_url, submodule_url, expected) in cases {
1770            let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1771            assert_eq!(
1772                expected, url,
1773                "base `{base_url}`; submodule `{submodule_url}`"
1774            );
1775        }
1776    }
1777}
1778
1779/// Turns a full commit hash revision into an oid.
1780///
1781/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1782/// Its length must be double to the underlying bytes (40 or 64),
1783/// otherwise libgit2 would happily zero-pad the returned oid.
1784///
1785/// See:
1786///
1787/// * <https://github.com/rust-lang/cargo/issues/13188>
1788/// * <https://github.com/rust-lang/cargo/issues/13968>
1789pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1790    Oid::from_str(rev)
1791        .ok()
1792        .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1793}