Skip to main content

cargo/sources/git/
utils.rs

1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, SourceId, Verbosity};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::sources::git::source::GitSource;
9use crate::sources::source::Source as _;
10use crate::util::HumanBytes;
11use crate::util::errors::{CargoResult, GitCliError};
12use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
13
14use anyhow::{Context as _, anyhow};
15use cargo_util::{ProcessBuilder, paths};
16use curl::easy::List;
17use git2::{ErrorClass, ObjectType, Oid};
18use tracing::{debug, info};
19use url::Url;
20
21use std::borrow::Cow;
22use std::path::{Path, PathBuf};
23use std::process::Command;
24use std::str;
25use std::sync::atomic::{AtomicBool, Ordering};
26use std::time::{Duration, Instant};
27
28/// A file indicates that if present, `git reset` has been done and a repo
29/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
30const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
31
32/// A short abbreviated OID.
33///
34/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
35pub struct GitShortID(git2::Buf);
36
37impl GitShortID {
38    /// Views the short ID as a `str`.
39    pub fn as_str(&self) -> &str {
40        self.0.as_str().unwrap()
41    }
42}
43
44/// A remote repository. It gets cloned into a local [`GitDatabase`].
45#[derive(PartialEq, Clone, Debug)]
46pub struct GitRemote {
47    /// URL to a remote repository.
48    ///
49    /// This may differ from the [`SourceId`] URL when the original URL
50    /// can't be represented as a WHATWG [`Url`], for example SCP-like URLs.
51    /// See <https://github.com/rust-lang/cargo/issues/16740>.
52    url: String,
53}
54
55/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
56/// can be cloned from a single [`GitDatabase`].
57pub struct GitDatabase {
58    /// The remote repository where this database is fetched from.
59    remote: GitRemote,
60    /// Path to the root of the underlying Git repository on the local filesystem.
61    path: PathBuf,
62    /// Underlying Git repository instance for this database.
63    repo: git2::Repository,
64}
65
66/// A local checkout of a particular revision from a [`GitDatabase`].
67pub struct GitCheckout<'a> {
68    /// The git database where this checkout is cloned from.
69    database: &'a GitDatabase,
70    /// Path to the root of the underlying Git repository on the local filesystem.
71    path: PathBuf,
72    /// The git revision this checkout is for.
73    revision: git2::Oid,
74    /// Underlying Git repository instance for this checkout.
75    repo: git2::Repository,
76}
77
78impl GitRemote {
79    /// Creates an instance for a remote repository URL.
80    pub fn new(url: &Url) -> GitRemote {
81        GitRemote {
82            url: url.as_str().to_owned(),
83        }
84    }
85
86    /// Creates an instance with an URL that may not be a valid WHATWG URL.
87    ///
88    /// This is needed because [`SourceId`] hasn't yet supported SCP-like URLs.
89    pub(super) fn new_from_str(url: String) -> GitRemote {
90        GitRemote { url }
91    }
92
93    /// Gets the remote repository URL.
94    pub fn url(&self) -> &str {
95        &self.url
96    }
97
98    /// Fetches and checkouts to a reference or a revision from this remote
99    /// into a local path.
100    ///
101    /// This ensures that it gets the up-to-date commit when a named reference
102    /// is given (tag, branch, refs/*). Thus, network connection is involved.
103    ///
104    /// If we have a previous instance of [`GitDatabase`] then fetch into that
105    /// if we can. If that can successfully load our revision then we've
106    /// populated the database with the latest version of `reference`, so
107    /// return that database and the rev we resolve to.
108    pub fn checkout(
109        &self,
110        into: &Path,
111        db: Option<GitDatabase>,
112        manifest_reference: &GitReference,
113        reference: &GitReference,
114        gctx: &GlobalContext,
115    ) -> CargoResult<(GitDatabase, git2::Oid)> {
116        if let Some(mut db) = db {
117            fetch(
118                &mut db.repo,
119                self.url(),
120                manifest_reference,
121                reference,
122                gctx,
123                RemoteKind::GitDependency,
124            )
125            .with_context(|| format!("failed to fetch into: {}", into.display()))?;
126
127            if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
128                return Ok((db, rev));
129            }
130        }
131
132        // Otherwise start from scratch to handle corrupt git repositories.
133        // After our fetch (which is interpreted as a clone now) we do the same
134        // resolution to figure out what we cloned.
135        if into.exists() {
136            paths::remove_dir_all(into)?;
137        }
138        paths::create_dir_all(into)?;
139        let mut repo = init(into, true)?;
140        fetch(
141            &mut repo,
142            self.url(),
143            manifest_reference,
144            reference,
145            gctx,
146            RemoteKind::GitDependency,
147        )
148        .with_context(|| format!("failed to clone into: {}", into.display()))?;
149        let rev = resolve_ref(reference, &repo)?;
150
151        Ok((
152            GitDatabase {
153                remote: self.clone(),
154                path: into.to_path_buf(),
155                repo,
156            },
157            rev,
158        ))
159    }
160
161    /// Creates a [`GitDatabase`] of this remote at `db_path`.
162    pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
163        let repo = git2::Repository::open(db_path)?;
164        Ok(GitDatabase {
165            remote: self.clone(),
166            path: db_path.to_path_buf(),
167            repo,
168        })
169    }
170}
171
172impl GitDatabase {
173    /// Checkouts to a revision at `dest`ination from this database.
174    #[tracing::instrument(skip(self, gctx))]
175    pub fn copy_to(
176        &self,
177        rev: git2::Oid,
178        dest: &Path,
179        gctx: &GlobalContext,
180        quiet: bool,
181    ) -> CargoResult<GitCheckout<'_>> {
182        // If the existing checkout exists, and it is fresh, use it.
183        // A non-fresh checkout can happen if the checkout operation was
184        // interrupted. In that case, the checkout gets deleted and a new
185        // clone is created.
186        let checkout = match git2::Repository::open(dest)
187            .ok()
188            .map(|repo| GitCheckout::new(self, rev, repo))
189            .filter(|co| co.is_fresh())
190        {
191            Some(co) => co,
192            None => {
193                let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
194                checkout.update_submodules(gctx, quiet)?;
195                guard.mark_ok()?;
196                checkout
197            }
198        };
199
200        Ok(checkout)
201    }
202
203    /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
204    pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
205        let obj = self.repo.find_object(revision, None)?;
206        Ok(GitShortID(obj.short_id()?))
207    }
208
209    /// Checks if the database contains the object of this `oid`..
210    pub fn contains(&self, oid: git2::Oid) -> bool {
211        self.repo.revparse_single(&oid.to_string()).is_ok()
212    }
213
214    /// [`resolve_ref`]s this reference with this database.
215    pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
216        resolve_ref(r, &self.repo)
217    }
218}
219
220/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
221pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
222    let id = match gitref {
223        // Note that we resolve the named tag here in sync with where it's
224        // fetched into via `fetch` below.
225        GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
226            let refname = format!("refs/remotes/origin/tags/{}", s);
227            let id = repo.refname_to_id(&refname)?;
228            let obj = repo.find_object(id, None)?;
229            let obj = obj.peel(ObjectType::Commit)?;
230            Ok(obj.id())
231        })()
232        .with_context(|| format!("failed to find tag `{}`", s))?,
233
234        // Resolve the remote name since that's all we're configuring in
235        // `fetch` below.
236        GitReference::Branch(s) => {
237            let name = format!("origin/{}", s);
238            let b = repo
239                .find_branch(&name, git2::BranchType::Remote)
240                .with_context(|| format!("failed to find branch `{}`", s))?;
241            b.get()
242                .target()
243                .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
244        }
245
246        // We'll be using the HEAD commit
247        GitReference::DefaultBranch => {
248            let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
249            let head = repo.find_object(head_id, None)?;
250            head.peel(ObjectType::Commit)?.id()
251        }
252
253        GitReference::Rev(s) => {
254            let obj = repo.revparse_single(s)?;
255            match obj.as_tag() {
256                Some(tag) => tag.target_id(),
257                None => obj.id(),
258            }
259        }
260    };
261    Ok(id)
262}
263
264impl<'a> GitCheckout<'a> {
265    /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
266    /// is done. Use [`GitCheckout::is_fresh`] to check.
267    ///
268    /// * The `database` is where this checkout is from.
269    /// * The `repo` will be the checked out Git repository.
270    fn new(
271        database: &'a GitDatabase,
272        revision: git2::Oid,
273        repo: git2::Repository,
274    ) -> GitCheckout<'a> {
275        let path = repo.workdir().unwrap_or_else(|| repo.path());
276        GitCheckout {
277            path: path.to_path_buf(),
278            database,
279            revision,
280            repo,
281        }
282    }
283
284    /// Gets the remote repository URL.
285    fn remote_url(&self) -> &str {
286        self.database.remote.url()
287    }
288
289    /// Clone a repo for a `revision` into a local path from a `database`.
290    /// This is a filesystem-to-filesystem clone.
291    fn clone_into(
292        into: &Path,
293        database: &'a GitDatabase,
294        revision: git2::Oid,
295        gctx: &GlobalContext,
296    ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
297        let dirname = into.parent().unwrap();
298        paths::create_dir_all(&dirname)?;
299        if into.exists() {
300            paths::remove_dir_all(into)?;
301        }
302
303        // we're doing a local filesystem-to-filesystem clone so there should
304        // be no need to respect global configuration options, so pass in
305        // an empty instance of `git2::Config` below.
306        let git_config = git2::Config::new()?;
307
308        // Clone the repository, but make sure we use the "local" option in
309        // libgit2 which will attempt to use hardlinks to set up the database.
310        // This should speed up the clone operation quite a bit if it works.
311        //
312        // Note that we still use the same fetch options because while we don't
313        // need authentication information we may want progress bars and such.
314        let url = database.path.into_url()?;
315        let mut repo = None;
316        with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
317            let mut checkout = git2::build::CheckoutBuilder::new();
318            checkout.dry_run(); // we'll do this below during a `reset`
319
320            let r = git2::build::RepoBuilder::new()
321                // use hard links and/or copy the database, we're doing a
322                // filesystem clone so this'll speed things up quite a bit.
323                .clone_local(git2::build::CloneLocal::Local)
324                .with_checkout(checkout)
325                .fetch_options(fopts)
326                .clone(url.as_str(), into)?;
327            // `git2` doesn't seem to handle shallow repos correctly when doing
328            // a local clone. Fortunately all that's needed is the copy of the
329            // one file that defines the shallow boundary, the commits which
330            // have their parents omitted as part of the shallow clone.
331            //
332            // TODO(git2): remove this when git2 supports shallow clone correctly
333            if database.repo.is_shallow() {
334                std::fs::copy(
335                    database.repo.path().join("shallow"),
336                    r.path().join("shallow"),
337                )?;
338            }
339            repo = Some(r);
340            Ok(())
341        })?;
342        let repo = repo.unwrap();
343
344        let checkout = GitCheckout::new(database, revision, repo);
345        let guard = checkout.reset(gctx)?;
346        Ok((checkout, guard))
347    }
348
349    /// Checks if the `HEAD` of this checkout points to the expected revision.
350    fn is_fresh(&self) -> bool {
351        match self.repo.revparse_single("HEAD") {
352            Ok(ref head) if head.id() == self.revision => {
353                // See comments in reset() for why we check this
354                self.path.join(CHECKOUT_READY_LOCK).exists()
355            }
356            _ => false,
357        }
358    }
359
360    /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
361    /// revision of this checkout, with additional interrupt protection by a
362    /// dummy file [`CHECKOUT_READY_LOCK`].
363    ///
364    /// If we're interrupted while performing a `git reset` (e.g., we die
365    /// because of a signal) Cargo needs to be sure to try to check out this
366    /// repo again on the next go-round.
367    ///
368    /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
369    /// which if present means that the repo has been successfully reset and is
370    /// ready to go. Hence if we start to do a reset, we make sure this file
371    /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
372    /// (e.g. submodule update) before marking the check-out as ready.
373    ///
374    /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
375    fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
376        let guard = CheckoutGuard::guard(&self.path);
377        info!("reset {} to {}", self.repo.path().display(), self.revision);
378
379        // Ensure libgit2 won't mess with newlines when we vendor.
380        if let Ok(mut git_config) = self.repo.config() {
381            git_config.set_bool("core.autocrlf", false)?;
382        }
383
384        let object = self.repo.find_object(self.revision, None)?;
385        reset(&self.repo, &object, gctx)?;
386
387        Ok(guard)
388    }
389
390    /// Like `git submodule update --recursive` but for this git checkout.
391    ///
392    /// This function respects `submodule.<name>.update = none`[^1] git config.
393    /// Submodules set to `none` won't be fetched.
394    ///
395    /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
396    fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> {
397        return update_submodules(&self.repo, gctx, quiet, self.remote_url());
398
399        /// Recursive helper for [`GitCheckout::update_submodules`].
400        fn update_submodules(
401            repo: &git2::Repository,
402            gctx: &GlobalContext,
403            quiet: bool,
404            parent_remote_url: &str,
405        ) -> CargoResult<()> {
406            debug!("update submodules for: {:?}", repo.workdir().unwrap());
407
408            for mut child in repo.submodules()? {
409                update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context(
410                    || {
411                        format!(
412                            "failed to update submodule `{}`",
413                            child.name().unwrap_or("")
414                        )
415                    },
416                )?;
417            }
418            Ok(())
419        }
420
421        /// Update a single Git submodule, and recurse into its submodules.
422        fn update_submodule(
423            parent: &git2::Repository,
424            child: &mut git2::Submodule<'_>,
425            gctx: &GlobalContext,
426            quiet: bool,
427            parent_remote_url: &str,
428        ) -> CargoResult<()> {
429            child.init(false)?;
430
431            let child_url_str = child.url().ok_or_else(|| {
432                anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
433            })?;
434
435            // Skip the submodule if the config says not to update it.
436            if child.update_strategy() == git2::SubmoduleUpdate::None {
437                gctx.shell().status(
438                    "Skipping",
439                    format!(
440                        "git submodule `{}` due to update strategy in .gitmodules",
441                        child_url_str
442                    ),
443                )?;
444                return Ok(());
445            }
446
447            let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
448
449            // A submodule which is listed in .gitmodules but not actually
450            // checked out will not have a head id, so we should ignore it.
451            let Some(head) = child.head_id() else {
452                return Ok(());
453            };
454
455            // If the submodule hasn't been checked out yet, we need to
456            // clone it. If it has been checked out and the head is the same
457            // as the submodule's head, then we can skip an update and keep
458            // recursing.
459            let head_and_repo = child.open().and_then(|repo| {
460                let target = repo.head()?.target();
461                Ok((target, repo))
462            });
463            let repo = match head_and_repo {
464                Ok((head, repo)) => {
465                    if child.head_id() == head {
466                        return update_submodules(&repo, gctx, quiet, &child_remote_url);
467                    }
468                    repo
469                }
470                Err(..) => {
471                    let path = parent.workdir().unwrap().join(child.path());
472                    let _ = paths::remove_dir_all(&path);
473                    init(&path, false)?
474                }
475            };
476            // Fetch submodule database and checkout to target revision
477            let reference = GitReference::Rev(head.to_string());
478
479            // SCP-like URL is not a WHATWG Standard URL.
480            // `url` crate can't parse SCP-like URLs.
481            // We convert to `ssh://` for SourceId,
482            // but preserve the original URL for fetch to maintain correct semantics
483            // See <https://github.com/rust-lang/cargo/issues/16740>
484            let (source_url, fetch_url) = match child_remote_url.as_ref().into_url() {
485                Ok(url) => (url, None),
486                Err(_) => {
487                    let ssh_url = scp_to_ssh(&child_remote_url)
488                        .ok_or_else(|| anyhow::format_err!("invalid url `{child_remote_url}`"))?
489                        .as_str()
490                        .into_url()?;
491                    (ssh_url, Some(child_remote_url.into_owned()))
492                }
493            };
494
495            // GitSource created from SourceId without git precise will result to
496            // locked_rev being Deferred and fetch_db always try to fetch if online
497            let source_id =
498                SourceId::for_git(&source_url, reference)?.with_git_precise(Some(head.to_string()));
499
500            let mut source = match &fetch_url {
501                Some(url) => GitSource::new_for_submodule(source_id, url.to_owned(), gctx)?,
502                None => GitSource::new(source_id, gctx)?,
503            };
504            source.set_quiet(quiet);
505
506            let (db, actual_rev) = source.fetch_db(true).with_context(|| {
507                let name = child.name().unwrap_or("");
508                let url = fetch_url.unwrap_or_else(|| source_url.to_string());
509                format!("failed to fetch submodule `{name}` from {url}")
510            })?;
511            db.copy_to(actual_rev, repo.path(), gctx, quiet)?;
512            Ok(())
513        }
514    }
515}
516
517/// See [`GitCheckout::reset`] for rationale on this type.
518#[must_use]
519struct CheckoutGuard {
520    ok_file: PathBuf,
521}
522
523impl CheckoutGuard {
524    fn guard(path: &Path) -> Self {
525        let ok_file = path.join(CHECKOUT_READY_LOCK);
526        let _ = paths::remove_file(&ok_file);
527        Self { ok_file }
528    }
529
530    fn mark_ok(self) -> CargoResult<()> {
531        let _ = paths::create(self.ok_file)?;
532        Ok(())
533    }
534}
535
536/// Constructs an absolute URL for a child submodule URL with its parent base URL.
537///
538/// Git only assumes a submodule URL is a relative path if it starts with `./`
539/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
540/// submodule URL.
541///
542/// At this moment it comes with some limitations:
543///
544/// * GitHub doesn't accept non-normalized URLs with relative paths.
545///   (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
546/// * `url` crate cannot parse SCP-like URLs.
547///   (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
548///
549/// To overcome these, this patch always tries [`Url::parse`] first to normalize
550/// the path. If it couldn't, append the relative path and/or convert SCP-like URLs
551/// to ssh:// format as the last resorts and pray the remote git service supports
552/// non-normalized URLs.
553///
554/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
555///
556/// [^1]: <https://git-scm.com/docs/git-submodule>
557fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
558    let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
559        match Url::parse(base_url) {
560            Ok(mut base_url) => {
561                let path = base_url.path();
562                if !path.ends_with('/') {
563                    base_url.set_path(&format!("{path}/"));
564                }
565                let absolute_url = base_url.join(submodule_url).with_context(|| {
566                    format!(
567                        "failed to parse relative child submodule url `{submodule_url}` \
568                        using parent base url `{base_url}`"
569                    )
570                })?;
571                Cow::from(absolute_url.to_string())
572            }
573            Err(_) => {
574                let mut absolute_url = base_url.to_string();
575                if !absolute_url.ends_with('/') {
576                    absolute_url.push('/');
577                }
578                absolute_url.push_str(submodule_url);
579                Cow::from(absolute_url)
580            }
581        }
582    } else {
583        Cow::from(submodule_url)
584    };
585
586    Ok(absolute_url)
587}
588
589/// Converts an SCP-like URL to `ssh://` format.
590fn scp_to_ssh(url: &str) -> Option<String> {
591    let mut gix_url = gix::url::parse(gix::bstr::BStr::new(url.as_bytes())).ok()?;
592    if gix_url.serialize_alternative_form && gix_url.scheme == gix::url::Scheme::Ssh {
593        gix_url.serialize_alternative_form = false;
594        Some(gix_url.to_bstring().to_string())
595    } else {
596        None
597    }
598}
599
600/// Prepare the authentication callbacks for cloning a git repository.
601///
602/// The main purpose of this function is to construct the "authentication
603/// callback" which is used to clone a repository. This callback will attempt to
604/// find the right authentication on the system (without user input) and will
605/// guide libgit2 in doing so.
606///
607/// The callback is provided `allowed` types of credentials, and we try to do as
608/// much as possible based on that:
609///
610/// * Prioritize SSH keys from the local ssh agent as they're likely the most
611///   reliable. The username here is prioritized from the credential
612///   callback, then from whatever is configured in git itself, and finally
613///   we fall back to the generic user of `git`.
614///
615/// * If a username/password is allowed, then we fallback to git2-rs's
616///   implementation of the credential helper. This is what is configured
617///   with `credential.helper` in git, and is the interface for the macOS
618///   keychain, for example.
619///
620/// * After the above two have failed, we just kinda grapple attempting to
621///   return *something*.
622///
623/// If any form of authentication fails, libgit2 will repeatedly ask us for
624/// credentials until we give it a reason to not do so. To ensure we don't
625/// just sit here looping forever we keep track of authentications we've
626/// attempted and we don't try the same ones again.
627fn with_authentication<T, F>(
628    gctx: &GlobalContext,
629    url: &str,
630    cfg: &git2::Config,
631    mut f: F,
632) -> CargoResult<T>
633where
634    F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
635{
636    let mut cred_helper = git2::CredentialHelper::new(url);
637    cred_helper.config(cfg);
638
639    let mut ssh_username_requested = false;
640    let mut cred_helper_bad = None;
641    let mut ssh_agent_attempts = Vec::new();
642    let mut any_attempts = false;
643    let mut tried_sshkey = false;
644    let mut url_attempt = None;
645
646    let orig_url = url;
647    let mut res = f(&mut |url, username, allowed| {
648        any_attempts = true;
649        if url != orig_url {
650            url_attempt = Some(url.to_string());
651        }
652        // libgit2's "USERNAME" authentication actually means that it's just
653        // asking us for a username to keep going. This is currently only really
654        // used for SSH authentication and isn't really an authentication type.
655        // The logic currently looks like:
656        //
657        //      let user = ...;
658        //      if (user.is_null())
659        //          user = callback(USERNAME, null, ...);
660        //
661        //      callback(SSH_KEY, user, ...)
662        //
663        // So if we're being called here then we know that (a) we're using ssh
664        // authentication and (b) no username was specified in the URL that
665        // we're trying to clone. We need to guess an appropriate username here,
666        // but that may involve a few attempts. Unfortunately we can't switch
667        // usernames during one authentication session with libgit2, so to
668        // handle this we bail out of this authentication session after setting
669        // the flag `ssh_username_requested`, and then we handle this below.
670        if allowed.contains(git2::CredentialType::USERNAME) {
671            debug_assert!(username.is_none());
672            ssh_username_requested = true;
673            return Err(git2::Error::from_str("gonna try usernames later"));
674        }
675
676        // An "SSH_KEY" authentication indicates that we need some sort of SSH
677        // authentication. This can currently either come from the ssh-agent
678        // process or from a raw in-memory SSH key. Cargo only supports using
679        // ssh-agent currently.
680        //
681        // If we get called with this then the only way that should be possible
682        // is if a username is specified in the URL itself (e.g., `username` is
683        // Some), hence the unwrap() here. We try custom usernames down below.
684        if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
685            // If ssh-agent authentication fails, libgit2 will keep
686            // calling this callback asking for other authentication
687            // methods to try. Make sure we only try ssh-agent once,
688            // to avoid looping forever.
689            tried_sshkey = true;
690            let username = username.unwrap();
691            debug_assert!(!ssh_username_requested);
692            ssh_agent_attempts.push(username.to_string());
693            return git2::Cred::ssh_key_from_agent(username);
694        }
695
696        // Sometimes libgit2 will ask for a username/password in plaintext. This
697        // is where Cargo would have an interactive prompt if we supported it,
698        // but we currently don't! Right now the only way we support fetching a
699        // plaintext password is through the `credential.helper` support, so
700        // fetch that here.
701        //
702        // If ssh-agent authentication fails, libgit2 will keep calling this
703        // callback asking for other authentication methods to try. Check
704        // cred_helper_bad to make sure we only try the git credential helper
705        // once, to avoid looping forever.
706        if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
707        {
708            let r = git2::Cred::credential_helper(cfg, url, username);
709            cred_helper_bad = Some(r.is_err());
710            return r;
711        }
712
713        // I'm... not sure what the DEFAULT kind of authentication is, but seems
714        // easy to support?
715        if allowed.contains(git2::CredentialType::DEFAULT) {
716            return git2::Cred::default();
717        }
718
719        // Whelp, we tried our best
720        Err(git2::Error::from_str("no authentication methods succeeded"))
721    });
722
723    // Ok, so if it looks like we're going to be doing ssh authentication, we
724    // want to try a few different usernames as one wasn't specified in the URL
725    // for us to use. In order, we'll try:
726    //
727    // * A credential helper's username for this URL, if available.
728    // * This account's username.
729    // * "git"
730    //
731    // We have to restart the authentication session each time (due to
732    // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
733    // call our callback, `f`, in a loop here.
734    if ssh_username_requested {
735        debug_assert!(res.is_err());
736        let mut attempts = vec![String::from("git")];
737        if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
738            attempts.push(s.to_string());
739        }
740        if let Some(ref s) = cred_helper.username {
741            attempts.push(s.clone());
742        }
743
744        while let Some(s) = attempts.pop() {
745            // We should get `USERNAME` first, where we just return our attempt,
746            // and then after that we should get `SSH_KEY`. If the first attempt
747            // fails we'll get called again, but we don't have another option so
748            // we bail out.
749            let mut attempts = 0;
750            res = f(&mut |_url, username, allowed| {
751                if allowed.contains(git2::CredentialType::USERNAME) {
752                    return git2::Cred::username(&s);
753                }
754                if allowed.contains(git2::CredentialType::SSH_KEY) {
755                    debug_assert_eq!(Some(&s[..]), username);
756                    attempts += 1;
757                    if attempts == 1 {
758                        ssh_agent_attempts.push(s.to_string());
759                        return git2::Cred::ssh_key_from_agent(&s);
760                    }
761                }
762                Err(git2::Error::from_str("no authentication methods succeeded"))
763            });
764
765            // If we made two attempts then that means:
766            //
767            // 1. A username was requested, we returned `s`.
768            // 2. An ssh key was requested, we returned to look up `s` in the
769            //    ssh agent.
770            // 3. For whatever reason that lookup failed, so we were asked again
771            //    for another mode of authentication.
772            //
773            // Essentially, if `attempts == 2` then in theory the only error was
774            // that this username failed to authenticate (e.g., no other network
775            // errors happened). Otherwise something else is funny so we bail
776            // out.
777            if attempts != 2 {
778                break;
779            }
780        }
781    }
782    let mut err = match res {
783        Ok(e) => return Ok(e),
784        Err(e) => e,
785    };
786
787    // In the case of an authentication failure (where we tried something) then
788    // we try to give a more helpful error message about precisely what we
789    // tried.
790    if any_attempts {
791        let mut msg = "failed to authenticate when downloading \
792                       repository"
793            .to_string();
794
795        if let Some(attempt) = &url_attempt {
796            if url != attempt {
797                msg.push_str(": ");
798                msg.push_str(attempt);
799            }
800        }
801        msg.push('\n');
802        if !ssh_agent_attempts.is_empty() {
803            let names = ssh_agent_attempts
804                .iter()
805                .map(|s| format!("`{}`", s))
806                .collect::<Vec<_>>()
807                .join(", ");
808            msg.push_str(&format!(
809                "\n* attempted ssh-agent authentication, but \
810                 no usernames succeeded: {}",
811                names
812            ));
813        }
814        if let Some(failed_cred_helper) = cred_helper_bad {
815            if failed_cred_helper {
816                msg.push_str(
817                    "\n* attempted to find username/password via \
818                     git's `credential.helper` support, but failed",
819                );
820            } else {
821                msg.push_str(
822                    "\n* attempted to find username/password via \
823                     `credential.helper`, but maybe the found \
824                     credentials were incorrect",
825                );
826            }
827        }
828        msg.push_str("\n\n");
829        msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
830        msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
831        err = err.context(msg);
832
833        // Otherwise if we didn't even get to the authentication phase them we may
834        // have failed to set up a connection, in these cases hint on the
835        // `net.git-fetch-with-cli` configuration option.
836    } else if let Some(e) = err.downcast_ref::<git2::Error>() {
837        match e.class() {
838            ErrorClass::Net
839            | ErrorClass::Ssl
840            | ErrorClass::Submodule
841            | ErrorClass::FetchHead
842            | ErrorClass::Ssh
843            | ErrorClass::Http => {
844                let msg = format!(
845                    concat!(
846                        "network failure seems to have happened\n",
847                        "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
848                        "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
849                        "{}"
850                    ),
851                    note_github_pull_request(url).unwrap_or_default()
852                );
853                err = err.context(msg);
854            }
855            ErrorClass::Callback => {
856                // This unwraps the git2 error. We're using the callback error
857                // specifically to convey errors from Rust land through the C
858                // callback interface. We don't need the `; class=Callback
859                // (26)` that gets tacked on to the git2 error message.
860                err = anyhow::format_err!("{}", e.message());
861            }
862            _ => {}
863        }
864    }
865
866    Err(err)
867}
868
869/// `git reset --hard` to the given `obj` for the `repo`.
870///
871/// The `obj` is a commit-ish to which the head should be moved.
872fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
873    let mut pb = Progress::new("Checkout", gctx);
874    let mut opts = git2::build::CheckoutBuilder::new();
875    opts.progress(|_, cur, max| {
876        drop(pb.tick(cur, max, ""));
877    });
878    debug!("doing reset");
879    repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
880    debug!("reset done");
881    Ok(())
882}
883
884/// Prepares the callbacks for fetching a git repository.
885///
886/// The main purpose of this function is to construct everything before a fetch.
887/// This will attempt to setup a progress bar, the authentication for git,
888/// ssh known hosts check, and the network retry mechanism.
889///
890/// The callback is provided a fetch options, which can be used by the actual
891/// git fetch.
892pub fn with_fetch_options(
893    git_config: &git2::Config,
894    url: &str,
895    gctx: &GlobalContext,
896    cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
897) -> CargoResult<()> {
898    let mut progress = Progress::new("Fetch", gctx);
899    let ssh_config = gctx.net_config()?.ssh.as_ref();
900    let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
901    let diagnostic_home_config = gctx.diagnostic_home_config();
902    network::retry::with_retry(gctx, || {
903        // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
904        // so we store the error additionally and unwrap it later
905        let mut check_cb_result = Ok(());
906        let auth_result = with_authentication(gctx, url, git_config, |f| {
907            let port = Url::parse(url).ok().and_then(|url| url.port());
908            let mut last_update = Instant::now();
909            let mut rcb = git2::RemoteCallbacks::new();
910            // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
911            // sliding window for tracking the data transfer rate (in bytes/s).
912            let mut counter = MetricsCounter::<10>::new(0, last_update);
913            rcb.credentials(f);
914            rcb.certificate_check(|cert, host| {
915                match super::known_hosts::certificate_check(
916                    gctx,
917                    cert,
918                    host,
919                    port,
920                    config_known_hosts,
921                    &diagnostic_home_config,
922                ) {
923                    Ok(status) => Ok(status),
924                    Err(e) => {
925                        check_cb_result = Err(e);
926                        // This is not really used because it'll be overridden by libgit2
927                        // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
928                        Err(git2::Error::from_str(
929                            "invalid or unknown remote ssh hostkey",
930                        ))
931                    }
932                }
933            });
934            rcb.transfer_progress(|stats| {
935                let indexed_deltas = stats.indexed_deltas();
936                let msg = if indexed_deltas > 0 {
937                    // Resolving deltas.
938                    format!(
939                        ", ({}/{}) resolving deltas",
940                        indexed_deltas,
941                        stats.total_deltas()
942                    )
943                } else {
944                    // Receiving objects.
945                    //
946                    // # Caveat
947                    //
948                    // Progress bar relies on git2 calling `transfer_progress`
949                    // to update its transfer rate, but we cannot guarantee a
950                    // periodic call of that callback. Thus if we don't receive
951                    // any data for, say, 10 seconds, the rate will get stuck
952                    // and never go down to 0B/s.
953                    // In the future, we need to find away to update the rate
954                    // even when the callback is not called.
955                    let now = Instant::now();
956                    // Scrape a `received_bytes` to the counter every 300ms.
957                    if now - last_update > Duration::from_millis(300) {
958                        counter.add(stats.received_bytes(), now);
959                        last_update = now;
960                    }
961                    let rate = HumanBytes(counter.rate() as u64);
962                    format!(", {rate:.2}/s")
963                };
964                progress
965                    .tick(stats.indexed_objects(), stats.total_objects(), &msg)
966                    .is_ok()
967            });
968
969            // Create a local anonymous remote in the repository to fetch the
970            // url
971            let mut opts = git2::FetchOptions::new();
972            opts.remote_callbacks(rcb);
973            cb(opts)
974        });
975        if auth_result.is_err() {
976            check_cb_result?;
977        }
978        auth_result?;
979        Ok(())
980    })
981}
982
983/// Attempts to fetch the given git `reference` for a Git repository.
984///
985/// This is the main entry for git clone/fetch. It does the followings:
986///
987/// * Turns [`GitReference`] into refspecs accordingly.
988/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
989///
990/// The `remote_url` argument is the git remote URL where we want to fetch from.
991///
992/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
993/// at this time. It could be extended when libgit2 supports shallow clones.
994///
995/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
996pub fn fetch(
997    repo: &mut git2::Repository,
998    remote_url: &str,
999    manifest_reference: &GitReference,
1000    locked_reference: &GitReference,
1001    gctx: &GlobalContext,
1002    remote_kind: RemoteKind,
1003) -> CargoResult<()> {
1004    if let Some(offline_flag) = gctx.offline_flag() {
1005        anyhow::bail!(
1006            "attempting to update a git repository, but {offline_flag} \
1007             was specified"
1008        )
1009    }
1010
1011    let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
1012
1013    // Flag to keep track if the rev is a full commit hash
1014    let mut fast_path_rev: bool = false;
1015
1016    let oid_to_fetch = match github_fast_path(repo, remote_url, locked_reference, gctx) {
1017        Ok(FastPathRev::UpToDate) => return Ok(()),
1018        Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
1019        Ok(FastPathRev::Indeterminate) => None,
1020        Err(e) => {
1021            debug!("failed to check github {:?}", e);
1022            None
1023        }
1024    };
1025
1026    maybe_gc_repo(repo, gctx)?;
1027
1028    clean_repo_temp_files(repo);
1029
1030    // Translate the reference desired here into an actual list of refspecs
1031    // which need to get fetched. Additionally record if we're fetching tags.
1032    let mut refspecs = Vec::new();
1033    let mut tags = false;
1034    // The `+` symbol on the refspec means to allow a forced (fast-forward)
1035    // update which is needed if there is ever a force push that requires a
1036    // fast-forward.
1037    match locked_reference {
1038        // For branches and tags we can fetch simply one reference and copy it
1039        // locally, no need to fetch other branches/tags.
1040        GitReference::Branch(b) => {
1041            refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
1042        }
1043
1044        GitReference::Tag(t) => {
1045            refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
1046        }
1047
1048        GitReference::DefaultBranch => {
1049            refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1050        }
1051
1052        GitReference::Rev(rev) => {
1053            if rev.starts_with("refs/") {
1054                refspecs.push(format!("+{0}:{0}", rev));
1055            } else if let Some(oid_to_fetch) = oid_to_fetch {
1056                fast_path_rev = true;
1057                refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1058            } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1059                && rev_to_oid(rev).is_some()
1060            {
1061                // There is a specific commit to fetch and we will do so in shallow-mode only
1062                // to not disturb the previous logic.
1063                // Note that with typical settings for shallowing, we will just fetch a single `rev`
1064                // as single commit.
1065                // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1066                // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1067                refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1068            } else if let GitReference::Rev(rev) = manifest_reference
1069                && rev.starts_with("refs/")
1070            {
1071                // If the lockfile has a commit. we can't directly fetch it (unless we're talking
1072                // to GitHub), so we fetch the ref associated with it from the manifest.
1073                refspecs.push(format!("+{0}:{0}", rev));
1074            } else {
1075                // We don't know what the rev will point to. To handle this
1076                // situation we fetch all branches and tags, and then we pray
1077                // it's somewhere in there.
1078                refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1079                refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1080                tags = true;
1081            }
1082        }
1083    }
1084
1085    debug!("doing a fetch for {remote_url}");
1086    let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1087        fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
1088    } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1089        fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1090    } else {
1091        fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1092    };
1093
1094    if fast_path_rev {
1095        if let Some(oid) = oid_to_fetch {
1096            return result.with_context(|| format!("revision {} not found", oid));
1097        }
1098    }
1099    result
1100}
1101
1102/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1103/// files atomically.
1104/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1105/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1106fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1107    matches!(
1108        err,
1109        gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1110            gix::protocol::fetch::Error::LockShallowFile(_)
1111        ))
1112    )
1113}
1114
1115/// Attempts to use `git` CLI installed on the system to fetch a repository,
1116/// when the config value [`net.git-fetch-with-cli`][1] is set.
1117///
1118/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1119/// when compared to the `git` command line. As a result, allow an escape
1120/// hatch for users that would prefer to use `git`-the-CLI for fetching
1121/// repositories instead of `libgit2`-the-library. This should make more
1122/// flavors of authentication possible while also still giving us all the
1123/// speed and portability of using `libgit2`.
1124///
1125/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1126fn fetch_with_cli(
1127    repo: &mut git2::Repository,
1128    url: &str,
1129    refspecs: &[String],
1130    tags: bool,
1131    shallow: gix::remote::fetch::Shallow,
1132    gctx: &GlobalContext,
1133) -> CargoResult<()> {
1134    debug!(target: "git-fetch", backend = "git-cli");
1135
1136    let mut cmd = ProcessBuilder::new("git");
1137    cmd.arg("fetch");
1138    if tags {
1139        cmd.arg("--tags");
1140    } else {
1141        cmd.arg("--no-tags");
1142    }
1143    if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1144        let depth = 0i32.saturating_add_unsigned(depth.get());
1145        cmd.arg(format!("--depth={depth}"));
1146    }
1147    match gctx.shell().verbosity() {
1148        Verbosity::Normal => {}
1149        Verbosity::Verbose => {
1150            cmd.arg("--verbose");
1151        }
1152        Verbosity::Quiet => {
1153            cmd.arg("--quiet");
1154        }
1155    }
1156    cmd.arg("--force") // handle force pushes
1157        .arg("--update-head-ok") // see discussion in #2078
1158        .arg(url)
1159        .args(refspecs)
1160        // If cargo is run by git (for example, the `exec` command in `git
1161        // rebase`), the GIT_DIR is set by git and will point to the wrong
1162        // location. This makes sure GIT_DIR is always the repository path.
1163        .env("GIT_DIR", repo.path())
1164        // The reset of these may not be necessary, but I'm including them
1165        // just to be extra paranoid and avoid any issues.
1166        .env_remove("GIT_WORK_TREE")
1167        .env_remove("GIT_INDEX_FILE")
1168        .env_remove("GIT_OBJECT_DIRECTORY")
1169        .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1170        .cwd(repo.path());
1171    gctx.shell()
1172        .verbose(|s| s.status("Running", &cmd.to_string()))?;
1173    network::retry::with_retry(gctx, || {
1174        cmd.exec()
1175            .map_err(|error| GitCliError::new(error, true).into())
1176    })?;
1177
1178    Ok(())
1179}
1180
1181fn fetch_with_gitoxide(
1182    repo: &mut git2::Repository,
1183    remote_url: &str,
1184    refspecs: Vec<String>,
1185    tags: bool,
1186    shallow: gix::remote::fetch::Shallow,
1187    gctx: &GlobalContext,
1188) -> CargoResult<()> {
1189    debug!(target: "git-fetch", backend = "gitoxide");
1190
1191    let git2_repo = repo;
1192    let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1193    let repo_reinitialized = AtomicBool::default();
1194    let res = oxide::with_retry_and_progress(
1195        git2_repo.path(),
1196        gctx,
1197        remote_url,
1198        &|repo_path,
1199          should_interrupt,
1200          mut progress,
1201          url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1202            // The `fetch` operation here may fail spuriously due to a corrupt
1203            // repository. It could also fail, however, for a whole slew of other
1204            // reasons (aka network related reasons). We want Cargo to automatically
1205            // recover from corrupt repositories, but we don't want Cargo to stomp
1206            // over other legitimate errors.
1207            //
1208            // Consequently we save off the error of the `fetch` operation and if it
1209            // looks like a "corrupt repo" error then we blow away the repo and try
1210            // again. If it looks like any other kind of error, or if we've already
1211            // blown away the repository, then we want to return the error as-is.
1212            loop {
1213                let res = oxide::open_repo(
1214                    repo_path,
1215                    config_overrides.clone(),
1216                    oxide::OpenMode::ForFetch,
1217                )
1218                .map_err(crate::sources::git::fetch::Error::from)
1219                .and_then(|repo| {
1220                    debug!("initiating fetch of {refspecs:?} from {remote_url}");
1221                    let url_for_authentication = &mut *url_for_authentication;
1222                    let remote = repo
1223                        .remote_at(remote_url)?
1224                        .with_fetch_tags(if tags {
1225                            gix::remote::fetch::Tags::All
1226                        } else {
1227                            gix::remote::fetch::Tags::Included
1228                        })
1229                        .with_refspecs(
1230                            refspecs.iter().map(|s| s.as_str()),
1231                            gix::remote::Direction::Fetch,
1232                        )
1233                        .map_err(crate::sources::git::fetch::Error::Other)?;
1234                    let url = remote
1235                        .url(gix::remote::Direction::Fetch)
1236                        .expect("set at init")
1237                        .to_owned();
1238                    let connection = remote.connect(gix::remote::Direction::Fetch)?;
1239                    let mut authenticate = connection.configured_credentials(url)?;
1240                    let connection = connection.with_credentials(
1241                        move |action: gix::protocol::credentials::helper::Action| {
1242                            if let Some(url) = action
1243                                .context()
1244                                .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1245                            {
1246                                url_for_authentication(url.as_ref());
1247                            }
1248                            authenticate(action)
1249                        },
1250                    );
1251                    let outcome = connection
1252                        .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1253                        .with_shallow(shallow.clone())
1254                        .receive(&mut progress, should_interrupt)?;
1255                    Ok(outcome)
1256                });
1257                let err = match res {
1258                    Ok(_) => break,
1259                    Err(e) => e,
1260                };
1261                debug!("fetch failed: {}", err);
1262
1263                if !repo_reinitialized.load(Ordering::Relaxed)
1264                        // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1265                        // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1266                        // folder before writing files into it, or else not even open a directory as git repository (which is
1267                        // also handled here).
1268                        && err.is_corrupted()
1269                    || has_shallow_lock_file(&err)
1270                {
1271                    repo_reinitialized.store(true, Ordering::Relaxed);
1272                    debug!(
1273                        "looks like this is a corrupt repository, reinitializing \
1274                     and trying again"
1275                    );
1276                    if oxide::reinitialize(repo_path).is_ok() {
1277                        continue;
1278                    }
1279                }
1280
1281                return Err(err.into());
1282            }
1283            Ok(())
1284        },
1285    );
1286    if repo_reinitialized.load(Ordering::Relaxed) {
1287        *git2_repo = git2::Repository::open(git2_repo.path())?;
1288    }
1289    res
1290}
1291
1292fn fetch_with_libgit2(
1293    repo: &mut git2::Repository,
1294    remote_url: &str,
1295    refspecs: Vec<String>,
1296    tags: bool,
1297    shallow: gix::remote::fetch::Shallow,
1298    gctx: &GlobalContext,
1299) -> CargoResult<()> {
1300    debug!(target: "git-fetch", backend = "libgit2");
1301
1302    let git_config = git2::Config::open_default()?;
1303    with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1304        if tags {
1305            opts.download_tags(git2::AutotagOption::All);
1306        }
1307        if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1308            opts.depth(0i32.saturating_add_unsigned(depth.get()));
1309        }
1310        // The `fetch` operation here may fail spuriously due to a corrupt
1311        // repository. It could also fail, however, for a whole slew of other
1312        // reasons (aka network related reasons). We want Cargo to automatically
1313        // recover from corrupt repositories, but we don't want Cargo to stomp
1314        // over other legitimate errors.
1315        //
1316        // Consequently we save off the error of the `fetch` operation and if it
1317        // looks like a "corrupt repo" error then we blow away the repo and try
1318        // again. If it looks like any other kind of error, or if we've already
1319        // blown away the repository, then we want to return the error as-is.
1320        let mut repo_reinitialized = false;
1321        loop {
1322            debug!("initiating fetch of {refspecs:?} from {remote_url}");
1323            let res = repo
1324                .remote_anonymous(remote_url)?
1325                .fetch(&refspecs, Some(&mut opts), None);
1326            let err = match res {
1327                Ok(()) => break,
1328                Err(e) => e,
1329            };
1330            debug!("fetch failed: {}", err);
1331
1332            if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1333            {
1334                repo_reinitialized = true;
1335                debug!(
1336                    "looks like this is a corrupt repository, reinitializing \
1337                     and trying again"
1338                );
1339                if reinitialize(repo).is_ok() {
1340                    continue;
1341                }
1342            }
1343
1344            return Err(err.into());
1345        }
1346        Ok(())
1347    })
1348}
1349
1350/// Attempts to `git gc` a repository.
1351///
1352/// Cargo has a bunch of long-lived git repositories in its global cache and
1353/// some, like the index, are updated very frequently. Right now each update
1354/// creates a new "pack file" inside the git database, and over time this can
1355/// cause bad performance and bad current behavior in libgit2.
1356///
1357/// One pathological use case today is where libgit2 opens hundreds of file
1358/// descriptors, getting us dangerously close to blowing out the OS limits of
1359/// how many fds we can have open. This is detailed in [#4403].
1360///
1361/// Instead of trying to be clever about when gc is needed, we just run
1362/// `git gc --auto` and let git figure it out. It checks its own thresholds
1363/// (gc.auto, gc.autoPackLimit) and either does the work or exits quickly.
1364/// If git isn't installed, no worries - we skip it.
1365///
1366/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1367fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1368    // Let git decide whether gc is actually needed based on its own thresholds
1369    // (gc.auto, gc.autoPackLimit). This avoids duplicating git's internal logic
1370    // for deciding when housekeeping is needed.
1371    //
1372    // For testing purposes, __CARGO_PACKFILE_LIMIT can be set to override
1373    // gc.autoPackLimit, which has the same meaning. This lets tests force gc
1374    // to run by setting a low threshold without depending on git's defaults.
1375    let mut cmd = Command::new("git");
1376    if let Ok(limit) = gctx.get_env("__CARGO_PACKFILE_LIMIT") {
1377        cmd.arg(format!("-c gc.autoPackLimit={}", limit));
1378    }
1379    cmd.arg("gc").arg("--auto").current_dir(repo.path());
1380
1381    match cmd.output() {
1382        Ok(out) => {
1383            debug!(
1384                "git-gc --auto status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1385                out.status,
1386                String::from_utf8_lossy(&out.stdout),
1387                String::from_utf8_lossy(&out.stderr)
1388            );
1389            if out.status.success() {
1390                let new = git2::Repository::open(repo.path())?;
1391                *repo = new;
1392                return Ok(());
1393            }
1394        }
1395        Err(e) => debug!("git-gc --auto failed to spawn: {}", e),
1396    }
1397
1398    // Alright all else failed, let's start over.
1399    reinitialize(repo)
1400}
1401
1402/// Removes temporary files left from previous activity.
1403///
1404/// If libgit2 is interrupted while indexing pack files, it will leave behind
1405/// some temporary files that it doesn't clean up. These can be quite large in
1406/// size, so this tries to clean things up.
1407///
1408/// This intentionally ignores errors. This is only an opportunistic cleaning,
1409/// and we don't really care if there are issues (there's unlikely anything
1410/// that can be done).
1411///
1412/// The git CLI has similar behavior (its temp files look like
1413/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1414/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1415/// filenames, so they never get cleaned up.
1416fn clean_repo_temp_files(repo: &git2::Repository) {
1417    let path = repo.path().join("objects/pack/pack_git2_*");
1418    let Some(pattern) = path.to_str() else {
1419        tracing::warn!("cannot convert {path:?} to a string");
1420        return;
1421    };
1422    let Ok(paths) = glob::glob(pattern) else {
1423        return;
1424    };
1425    for path in paths {
1426        if let Ok(path) = path {
1427            match paths::remove_file(&path) {
1428                Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1429                Err(e) => {
1430                    tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1431                }
1432            }
1433        }
1434    }
1435}
1436
1437/// Reinitializes a given Git repository. This is useful when a Git repository
1438/// seems corrupted and we want to start over.
1439fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1440    // Here we want to drop the current repository object pointed to by `repo`,
1441    // so we initialize temporary repository in a sub-folder, blow away the
1442    // existing git folder, and then recreate the git repo. Finally we blow away
1443    // the `tmp` folder we allocated.
1444    let path = repo.path().to_path_buf();
1445    debug!("reinitializing git repo at {:?}", path);
1446    let tmp = path.join("tmp");
1447    let bare = !repo.path().ends_with(".git");
1448    *repo = init(&tmp, false)?;
1449    for entry in path.read_dir()? {
1450        let entry = entry?;
1451        if entry.file_name().to_str() == Some("tmp") {
1452            continue;
1453        }
1454        let path = entry.path();
1455        drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1456    }
1457    *repo = init(&path, bare)?;
1458    paths::remove_dir_all(&tmp)?;
1459    Ok(())
1460}
1461
1462/// Initializes a Git repository at `path`.
1463fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1464    let mut opts = git2::RepositoryInitOptions::new();
1465    // Skip anything related to templates, they just call all sorts of issues as
1466    // we really don't want to use them yet they insist on being used. See #6240
1467    // for an example issue that comes up.
1468    opts.external_template(false);
1469    opts.bare(bare);
1470    Ok(git2::Repository::init_opts(&path, &opts)?)
1471}
1472
1473/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1474enum FastPathRev {
1475    /// The local rev (determined by `reference.resolve(repo)`) is already up to
1476    /// date with what this rev resolves to on GitHub's server.
1477    UpToDate,
1478    /// The following SHA must be fetched in order for the local rev to become
1479    /// up to date.
1480    NeedsFetch(Oid),
1481    /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1482    /// and tags from the server and see what happens.
1483    Indeterminate,
1484}
1485
1486/// Attempts GitHub's special fast path for testing if we've already got an
1487/// up-to-date copy of the repository.
1488///
1489/// Updating the index is done pretty regularly so we want it to be as fast as
1490/// possible. For registries hosted on GitHub (like the crates.io index) there's
1491/// a fast path available to use[^1] to tell us that there's no updates to be
1492/// made.
1493///
1494/// Note that this function should never cause an actual failure because it's
1495/// just a fast path. As a result, a caller should ignore `Err` returned from
1496/// this function and move forward on the normal path.
1497///
1498/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1499fn github_fast_path(
1500    repo: &mut git2::Repository,
1501    url: &str,
1502    reference: &GitReference,
1503    gctx: &GlobalContext,
1504) -> CargoResult<FastPathRev> {
1505    let url = Url::parse(url)?;
1506    if !is_github(&url) {
1507        return Ok(FastPathRev::Indeterminate);
1508    }
1509
1510    let local_object = resolve_ref(reference, repo).ok();
1511
1512    let github_branch_name = match reference {
1513        GitReference::Branch(branch) => branch,
1514        GitReference::Tag(tag) => tag,
1515        GitReference::DefaultBranch => "HEAD",
1516        GitReference::Rev(rev) => {
1517            if rev.starts_with("refs/") {
1518                rev
1519            } else if looks_like_commit_hash(rev) {
1520                // `revparse_single` (used by `resolve`) is the only way to turn
1521                // short hash -> long hash, but it also parses other things,
1522                // like branch and tag names, which might coincidentally be
1523                // valid hex.
1524                //
1525                // We only return early if `rev` is a prefix of the object found
1526                // by `revparse_single`. Don't bother talking to GitHub in that
1527                // case, since commit hashes are permanent. If a commit with the
1528                // requested hash is already present in the local clone, its
1529                // contents must be the same as what is on the server for that
1530                // hash.
1531                //
1532                // If `rev` is not found locally by `revparse_single`, we'll
1533                // need GitHub to resolve it and get a hash. If `rev` is found
1534                // but is not a short hash of the found object, it's probably a
1535                // branch and we also need to get a hash from GitHub, in case
1536                // the branch has moved.
1537                if let Some(local_object) = local_object {
1538                    if is_short_hash_of(rev, local_object) {
1539                        debug!("github fast path already has {local_object}");
1540                        return Ok(FastPathRev::UpToDate);
1541                    }
1542                }
1543                // If `rev` is a full commit hash, the only thing it can resolve
1544                // to is itself. Don't bother talking to GitHub in that case
1545                // either. (This ensures that we always attempt to fetch the
1546                // commit directly even if we can't reach the GitHub API.)
1547                if let Some(oid) = rev_to_oid(rev) {
1548                    debug!("github fast path is already a full commit hash {rev}");
1549                    return Ok(FastPathRev::NeedsFetch(oid));
1550                }
1551                rev
1552            } else {
1553                debug!("can't use github fast path with `rev = \"{}\"`", rev);
1554                return Ok(FastPathRev::Indeterminate);
1555            }
1556        }
1557    };
1558
1559    // This expects GitHub urls in the form `github.com/user/repo` and nothing
1560    // else
1561    let mut pieces = url
1562        .path_segments()
1563        .ok_or_else(|| anyhow!("no path segments on url"))?;
1564    let username = pieces
1565        .next()
1566        .ok_or_else(|| anyhow!("couldn't find username"))?;
1567    let repository = pieces
1568        .next()
1569        .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1570    if pieces.next().is_some() {
1571        anyhow::bail!("too many segments on URL");
1572    }
1573
1574    // Trim off the `.git` from the repository, if present, since that's
1575    // optional for GitHub and won't work when we try to use the API as well.
1576    let repository = repository.strip_suffix(".git").unwrap_or(repository);
1577
1578    let url = format!(
1579        "https://api.github.com/repos/{}/{}/commits/{}",
1580        username, repository, github_branch_name,
1581    );
1582    let mut handle = gctx.http()?.lock().unwrap();
1583    debug!("attempting GitHub fast path for {}", url);
1584    handle.get(true)?;
1585    handle.url(&url)?;
1586    handle.useragent("cargo")?;
1587    handle.follow_location(true)?; // follow redirects
1588    handle.http_headers({
1589        let mut headers = List::new();
1590        headers.append("Accept: application/vnd.github.3.sha")?;
1591        if let Some(local_object) = local_object {
1592            headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1593        }
1594        headers
1595    })?;
1596
1597    let mut response_body = Vec::new();
1598    let mut transfer = handle.transfer();
1599    transfer.write_function(|data| {
1600        response_body.extend_from_slice(data);
1601        Ok(data.len())
1602    })?;
1603    transfer.perform()?;
1604    drop(transfer); // end borrow of handle so that response_code can be called
1605
1606    let response_code = handle.response_code()?;
1607    if response_code == 304 {
1608        debug!("github fast path up-to-date");
1609        Ok(FastPathRev::UpToDate)
1610    } else if response_code == 200
1611        && let Some(oid_to_fetch) = rev_to_oid(str::from_utf8(&response_body)?)
1612    {
1613        // response expected to be a full hash hexstring (40 or 64 chars)
1614        debug!("github fast path fetch {oid_to_fetch}");
1615        Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1616    } else {
1617        // Usually response_code == 404 if the repository does not exist, and
1618        // response_code == 422 if exists but GitHub is unable to resolve the
1619        // requested rev.
1620        debug!("github fast path bad response code {response_code}");
1621        Ok(FastPathRev::Indeterminate)
1622    }
1623}
1624
1625/// Whether a `url` is one from GitHub.
1626fn is_github(url: &Url) -> bool {
1627    url.host_str() == Some("github.com")
1628}
1629
1630// Give some messages on GitHub PR URL given as is
1631pub(crate) fn note_github_pull_request(url: &str) -> Option<String> {
1632    if let Ok(url) = url.parse::<Url>()
1633        && is_github(&url)
1634    {
1635        let path_segments = url
1636            .path_segments()
1637            .map(|p| p.into_iter().collect::<Vec<_>>())
1638            .unwrap_or_default();
1639        if let [owner, repo, "pull", pr_number, ..] = path_segments[..] {
1640            let repo_url = format!("https://github.com/{owner}/{repo}.git");
1641            let rev = format!("refs/pull/{pr_number}/head");
1642            return Some(format!(
1643                concat!(
1644                    "\n\nnote: GitHub url {} is not a repository. \n",
1645                    "help: Replace the dependency with \n",
1646                    "       `git = \"{}\" rev = \"{}\"` \n",
1647                    "   to specify pull requests as dependencies' revision."
1648                ),
1649                url, repo_url, rev
1650            ));
1651        }
1652    }
1653
1654    None
1655}
1656
1657/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1658fn looks_like_commit_hash(rev: &str) -> bool {
1659    rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1660}
1661
1662/// Whether `rev` is a shorter hash of `oid`.
1663fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1664    let long_hash = oid.to_string();
1665    match long_hash.get(..rev.len()) {
1666        Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1667        None => false,
1668    }
1669}
1670
1671#[cfg(test)]
1672mod tests {
1673    use super::absolute_submodule_url;
1674
1675    #[test]
1676    fn test_absolute_submodule_url() {
1677        let cases = [
1678            (
1679                "ssh://git@gitub.com/rust-lang/cargo",
1680                "git@github.com:rust-lang/cargo.git",
1681                "git@github.com:rust-lang/cargo.git",
1682            ),
1683            (
1684                "ssh://git@gitub.com/rust-lang/cargo",
1685                "./",
1686                "ssh://git@gitub.com/rust-lang/cargo/",
1687            ),
1688            (
1689                "ssh://git@gitub.com/rust-lang/cargo",
1690                "../",
1691                "ssh://git@gitub.com/rust-lang/",
1692            ),
1693            (
1694                "ssh://git@gitub.com/rust-lang/cargo",
1695                "./foo",
1696                "ssh://git@gitub.com/rust-lang/cargo/foo",
1697            ),
1698            (
1699                "ssh://git@gitub.com/rust-lang/cargo/",
1700                "./foo",
1701                "ssh://git@gitub.com/rust-lang/cargo/foo",
1702            ),
1703            (
1704                "ssh://git@gitub.com/rust-lang/cargo/",
1705                "../foo",
1706                "ssh://git@gitub.com/rust-lang/foo",
1707            ),
1708            (
1709                "ssh://git@gitub.com/rust-lang/cargo",
1710                "../foo",
1711                "ssh://git@gitub.com/rust-lang/foo",
1712            ),
1713            (
1714                "ssh://git@gitub.com/rust-lang/cargo",
1715                "../foo/bar/../baz",
1716                "ssh://git@gitub.com/rust-lang/foo/baz",
1717            ),
1718            (
1719                "git@github.com:rust-lang/cargo.git",
1720                "ssh://git@gitub.com/rust-lang/cargo",
1721                "ssh://git@gitub.com/rust-lang/cargo",
1722            ),
1723            (
1724                "git@github.com:rust-lang/cargo.git",
1725                "./",
1726                "git@github.com:rust-lang/cargo.git/./",
1727            ),
1728            (
1729                "git@github.com:rust-lang/cargo.git",
1730                "../",
1731                "git@github.com:rust-lang/cargo.git/../",
1732            ),
1733            (
1734                "git@github.com:rust-lang/cargo.git",
1735                "./foo",
1736                "git@github.com:rust-lang/cargo.git/./foo",
1737            ),
1738            (
1739                "git@github.com:rust-lang/cargo.git/",
1740                "./foo",
1741                "git@github.com:rust-lang/cargo.git/./foo",
1742            ),
1743            (
1744                "git@github.com:rust-lang/cargo.git",
1745                "../foo",
1746                "git@github.com:rust-lang/cargo.git/../foo",
1747            ),
1748            (
1749                "git@github.com:rust-lang/cargo.git/",
1750                "../foo",
1751                "git@github.com:rust-lang/cargo.git/../foo",
1752            ),
1753            (
1754                "git@github.com:rust-lang/cargo.git",
1755                "../foo/bar/../baz",
1756                "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1757            ),
1758        ];
1759
1760        for (base_url, submodule_url, expected) in cases {
1761            let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1762            assert_eq!(
1763                expected, url,
1764                "base `{base_url}`; submodule `{submodule_url}`"
1765            );
1766        }
1767    }
1768}
1769
1770/// Turns a full commit hash revision into an oid.
1771///
1772/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1773/// Its length must be double to the underlying bytes (40 or 64),
1774/// otherwise libgit2 would happily zero-pad the returned oid.
1775///
1776/// See:
1777///
1778/// * <https://github.com/rust-lang/cargo/issues/13188>
1779/// * <https://github.com/rust-lang/cargo/issues/13968>
1780pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1781    Oid::from_str(rev)
1782        .ok()
1783        .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1784}