cargo/sources/git/utils.rs
1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, SourceId};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::sources::git::source::GitSource;
9use crate::sources::source::Source as _;
10use crate::util::HumanBytes;
11use crate::util::errors::{CargoResult, GitCliError};
12use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
13
14use anyhow::{Context as _, anyhow};
15use cargo_util::{ProcessBuilder, paths};
16use cargo_util_terminal::Verbosity;
17use curl::easy::List;
18use git2::{ErrorClass, ObjectType, Oid};
19use tracing::{debug, info};
20use url::Url;
21
22use std::borrow::Cow;
23use std::path::{Path, PathBuf};
24use std::process::Command;
25use std::str;
26use std::sync::atomic::{AtomicBool, Ordering};
27use std::time::{Duration, Instant};
28
29/// A file indicates that if present, `git reset` has been done and a repo
30/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
31const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
32
33/// A short abbreviated OID.
34///
35/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
36pub struct GitShortID(git2::Buf);
37
38impl GitShortID {
39 /// Views the short ID as a `str`.
40 pub fn as_str(&self) -> &str {
41 self.0.as_str().unwrap()
42 }
43}
44
45/// A remote repository. It gets cloned into a local [`GitDatabase`].
46#[derive(PartialEq, Clone, Debug)]
47pub struct GitRemote {
48 /// URL to a remote repository.
49 ///
50 /// This may differ from the [`SourceId`] URL when the original URL
51 /// can't be represented as a WHATWG [`Url`], for example SCP-like URLs.
52 /// See <https://github.com/rust-lang/cargo/issues/16740>.
53 url: String,
54}
55
56/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
57/// can be cloned from a single [`GitDatabase`].
58pub struct GitDatabase {
59 /// The remote repository where this database is fetched from.
60 remote: GitRemote,
61 /// Path to the root of the underlying Git repository on the local filesystem.
62 path: PathBuf,
63 /// Underlying Git repository instance for this database.
64 repo: git2::Repository,
65}
66
67/// A local checkout of a particular revision from a [`GitDatabase`].
68pub struct GitCheckout<'a> {
69 /// The git database where this checkout is cloned from.
70 database: &'a GitDatabase,
71 /// Path to the root of the underlying Git repository on the local filesystem.
72 path: PathBuf,
73 /// The git revision this checkout is for.
74 revision: git2::Oid,
75 /// Underlying Git repository instance for this checkout.
76 repo: git2::Repository,
77}
78
79impl GitRemote {
80 /// Creates an instance for a remote repository URL.
81 pub fn new(url: &Url) -> GitRemote {
82 GitRemote {
83 url: url.as_str().to_owned(),
84 }
85 }
86
87 /// Creates an instance with an URL that may not be a valid WHATWG URL.
88 ///
89 /// This is needed because [`SourceId`] hasn't yet supported SCP-like URLs.
90 pub(super) fn new_from_str(url: String) -> GitRemote {
91 GitRemote { url }
92 }
93
94 /// Gets the remote repository URL.
95 pub fn url(&self) -> &str {
96 &self.url
97 }
98
99 /// Fetches and checkouts to a reference or a revision from this remote
100 /// into a local path.
101 ///
102 /// This ensures that it gets the up-to-date commit when a named reference
103 /// is given (tag, branch, refs/*). Thus, network connection is involved.
104 ///
105 /// If we have a previous instance of [`GitDatabase`] then fetch into that
106 /// if we can. If that can successfully load our revision then we've
107 /// populated the database with the latest version of `reference`, so
108 /// return that database and the rev we resolve to.
109 pub fn checkout(
110 &self,
111 into: &Path,
112 db: Option<GitDatabase>,
113 manifest_reference: &GitReference,
114 reference: &GitReference,
115 gctx: &GlobalContext,
116 ) -> CargoResult<(GitDatabase, git2::Oid)> {
117 if let Some(mut db) = db {
118 fetch(
119 &mut db.repo,
120 self.url(),
121 manifest_reference,
122 reference,
123 gctx,
124 RemoteKind::GitDependency,
125 )
126 .with_context(|| format!("failed to fetch into: {}", into.display()))?;
127
128 if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
129 return Ok((db, rev));
130 }
131 }
132
133 // Otherwise start from scratch to handle corrupt git repositories.
134 // After our fetch (which is interpreted as a clone now) we do the same
135 // resolution to figure out what we cloned.
136 if into.exists() {
137 paths::remove_dir_all(into)?;
138 }
139 paths::create_dir_all(into)?;
140 let mut repo = init(into, true)?;
141 fetch(
142 &mut repo,
143 self.url(),
144 manifest_reference,
145 reference,
146 gctx,
147 RemoteKind::GitDependency,
148 )
149 .with_context(|| format!("failed to clone into: {}", into.display()))?;
150 let rev = resolve_ref(reference, &repo)?;
151
152 Ok((
153 GitDatabase {
154 remote: self.clone(),
155 path: into.to_path_buf(),
156 repo,
157 },
158 rev,
159 ))
160 }
161
162 /// Creates a [`GitDatabase`] of this remote at `db_path`.
163 pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
164 let repo = git2::Repository::open(db_path)?;
165 Ok(GitDatabase {
166 remote: self.clone(),
167 path: db_path.to_path_buf(),
168 repo,
169 })
170 }
171}
172
173impl GitDatabase {
174 /// Checkouts to a revision at `dest`ination from this database.
175 #[tracing::instrument(skip(self, gctx))]
176 pub fn copy_to(
177 &self,
178 rev: git2::Oid,
179 dest: &Path,
180 gctx: &GlobalContext,
181 quiet: bool,
182 ) -> CargoResult<GitCheckout<'_>> {
183 // If the existing checkout exists, and it is fresh, use it.
184 // A non-fresh checkout can happen if the checkout operation was
185 // interrupted. In that case, the checkout gets deleted and a new
186 // clone is created.
187 let checkout = match git2::Repository::open(dest)
188 .ok()
189 .map(|repo| GitCheckout::new(self, rev, repo))
190 .filter(|co| co.is_fresh())
191 {
192 Some(co) => co,
193 None => {
194 let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
195 checkout.update_submodules(gctx, quiet)?;
196 guard.mark_ok()?;
197 checkout
198 }
199 };
200
201 Ok(checkout)
202 }
203
204 /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
205 pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
206 let obj = self.repo.find_object(revision, None)?;
207 Ok(GitShortID(obj.short_id()?))
208 }
209
210 /// Checks if the database contains the object of this `oid`..
211 pub fn contains(&self, oid: git2::Oid) -> bool {
212 self.repo.revparse_single(&oid.to_string()).is_ok()
213 }
214
215 /// [`resolve_ref`]s this reference with this database.
216 pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
217 resolve_ref(r, &self.repo)
218 }
219}
220
221/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
222pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
223 let id = match gitref {
224 // Note that we resolve the named tag here in sync with where it's
225 // fetched into via `fetch` below.
226 GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
227 let refname = format!("refs/remotes/origin/tags/{}", s);
228 let id = repo.refname_to_id(&refname)?;
229 let obj = repo.find_object(id, None)?;
230 let obj = obj.peel(ObjectType::Commit)?;
231 Ok(obj.id())
232 })()
233 .with_context(|| format!("failed to find tag `{}`", s))?,
234
235 // Resolve the remote name since that's all we're configuring in
236 // `fetch` below.
237 GitReference::Branch(s) => {
238 let name = format!("origin/{}", s);
239 let b = repo
240 .find_branch(&name, git2::BranchType::Remote)
241 .with_context(|| format!("failed to find branch `{}`", s))?;
242 b.get()
243 .target()
244 .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
245 }
246
247 // We'll be using the HEAD commit
248 GitReference::DefaultBranch => {
249 let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
250 let head = repo.find_object(head_id, None)?;
251 head.peel(ObjectType::Commit)?.id()
252 }
253
254 GitReference::Rev(s) => {
255 let obj = repo.revparse_single(s)?;
256 match obj.as_tag() {
257 Some(tag) => tag.target_id(),
258 None => obj.id(),
259 }
260 }
261 };
262 Ok(id)
263}
264
265impl<'a> GitCheckout<'a> {
266 /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
267 /// is done. Use [`GitCheckout::is_fresh`] to check.
268 ///
269 /// * The `database` is where this checkout is from.
270 /// * The `repo` will be the checked out Git repository.
271 fn new(
272 database: &'a GitDatabase,
273 revision: git2::Oid,
274 repo: git2::Repository,
275 ) -> GitCheckout<'a> {
276 let path = repo.workdir().unwrap_or_else(|| repo.path());
277 GitCheckout {
278 path: path.to_path_buf(),
279 database,
280 revision,
281 repo,
282 }
283 }
284
285 /// Gets the remote repository URL.
286 fn remote_url(&self) -> &str {
287 self.database.remote.url()
288 }
289
290 /// Clone a repo for a `revision` into a local path from a `database`.
291 /// This is a filesystem-to-filesystem clone.
292 fn clone_into(
293 into: &Path,
294 database: &'a GitDatabase,
295 revision: git2::Oid,
296 gctx: &GlobalContext,
297 ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
298 let dirname = into.parent().unwrap();
299 paths::create_dir_all(&dirname)?;
300 if into.exists() {
301 paths::remove_dir_all(into)?;
302 }
303
304 // we're doing a local filesystem-to-filesystem clone so there should
305 // be no need to respect global configuration options, so pass in
306 // an empty instance of `git2::Config` below.
307 let git_config = git2::Config::new()?;
308
309 // Clone the repository, but make sure we use the "local" option in
310 // libgit2 which will attempt to use hardlinks to set up the database.
311 // This should speed up the clone operation quite a bit if it works.
312 //
313 // Note that we still use the same fetch options because while we don't
314 // need authentication information we may want progress bars and such.
315 let url = database.path.into_url()?;
316 let mut repo = None;
317 with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
318 let mut checkout = git2::build::CheckoutBuilder::new();
319 checkout.dry_run(); // we'll do this below during a `reset`
320
321 let r = git2::build::RepoBuilder::new()
322 // use hard links and/or copy the database, we're doing a
323 // filesystem clone so this'll speed things up quite a bit.
324 .clone_local(git2::build::CloneLocal::Local)
325 .with_checkout(checkout)
326 .fetch_options(fopts)
327 .clone(url.as_str(), into)?;
328 // `git2` doesn't seem to handle shallow repos correctly when doing
329 // a local clone. Fortunately all that's needed is the copy of the
330 // one file that defines the shallow boundary, the commits which
331 // have their parents omitted as part of the shallow clone.
332 //
333 // TODO(git2): remove this when git2 supports shallow clone correctly
334 if database.repo.is_shallow() {
335 std::fs::copy(
336 database.repo.path().join("shallow"),
337 r.path().join("shallow"),
338 )?;
339 }
340 repo = Some(r);
341 Ok(())
342 })?;
343 let repo = repo.unwrap();
344
345 let checkout = GitCheckout::new(database, revision, repo);
346 let guard = checkout.reset(gctx)?;
347 Ok((checkout, guard))
348 }
349
350 /// Checks if the `HEAD` of this checkout points to the expected revision.
351 fn is_fresh(&self) -> bool {
352 match self.repo.revparse_single("HEAD") {
353 Ok(ref head) if head.id() == self.revision => {
354 // See comments in reset() for why we check this
355 self.path.join(CHECKOUT_READY_LOCK).exists()
356 }
357 _ => false,
358 }
359 }
360
361 /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
362 /// revision of this checkout, with additional interrupt protection by a
363 /// dummy file [`CHECKOUT_READY_LOCK`].
364 ///
365 /// If we're interrupted while performing a `git reset` (e.g., we die
366 /// because of a signal) Cargo needs to be sure to try to check out this
367 /// repo again on the next go-round.
368 ///
369 /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
370 /// which if present means that the repo has been successfully reset and is
371 /// ready to go. Hence if we start to do a reset, we make sure this file
372 /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
373 /// (e.g. submodule update) before marking the check-out as ready.
374 ///
375 /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
376 fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
377 let guard = CheckoutGuard::guard(&self.path);
378 info!("reset {} to {}", self.repo.path().display(), self.revision);
379
380 // Ensure libgit2 won't mess with newlines when we vendor.
381 if let Ok(mut git_config) = self.repo.config() {
382 git_config.set_bool("core.autocrlf", false)?;
383 }
384
385 let object = self.repo.find_object(self.revision, None)?;
386 reset(&self.repo, &object, gctx)?;
387
388 Ok(guard)
389 }
390
391 /// Like `git submodule update --recursive` but for this git checkout.
392 ///
393 /// This function respects `submodule.<name>.update = none`[^1] git config.
394 /// Submodules set to `none` won't be fetched.
395 ///
396 /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
397 fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> {
398 return update_submodules(&self.repo, gctx, quiet, self.remote_url());
399
400 /// Recursive helper for [`GitCheckout::update_submodules`].
401 fn update_submodules(
402 repo: &git2::Repository,
403 gctx: &GlobalContext,
404 quiet: bool,
405 parent_remote_url: &str,
406 ) -> CargoResult<()> {
407 debug!("update submodules for: {:?}", repo.workdir().unwrap());
408
409 for mut child in repo.submodules()? {
410 update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context(
411 || {
412 format!(
413 "failed to update submodule `{}`",
414 child.name().unwrap_or("")
415 )
416 },
417 )?;
418 }
419 Ok(())
420 }
421
422 /// Update a single Git submodule, and recurse into its submodules.
423 fn update_submodule(
424 parent: &git2::Repository,
425 child: &mut git2::Submodule<'_>,
426 gctx: &GlobalContext,
427 quiet: bool,
428 parent_remote_url: &str,
429 ) -> CargoResult<()> {
430 child.init(false)?;
431
432 let child_url_str = child.url().ok_or_else(|| {
433 anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
434 })?;
435
436 // Skip the submodule if the config says not to update it.
437 if child.update_strategy() == git2::SubmoduleUpdate::None {
438 gctx.shell().status(
439 "Skipping",
440 format!(
441 "git submodule `{}` due to update strategy in .gitmodules",
442 child_url_str
443 ),
444 )?;
445 return Ok(());
446 }
447
448 let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
449
450 // A submodule which is listed in .gitmodules but not actually
451 // checked out will not have a head id, so we should ignore it.
452 let Some(head) = child.head_id() else {
453 return Ok(());
454 };
455
456 // If the submodule hasn't been checked out yet, we need to
457 // clone it. If it has been checked out and the head is the same
458 // as the submodule's head, then we can skip an update and keep
459 // recursing.
460 let head_and_repo = child.open().and_then(|repo| {
461 let target = repo.head()?.target();
462 Ok((target, repo))
463 });
464 let repo = match head_and_repo {
465 Ok((head, repo)) => {
466 if child.head_id() == head {
467 return update_submodules(&repo, gctx, quiet, &child_remote_url);
468 }
469 repo
470 }
471 Err(..) => {
472 let path = parent.workdir().unwrap().join(child.path());
473 let _ = paths::remove_dir_all(&path);
474 init(&path, false)?
475 }
476 };
477 // Fetch submodule database and checkout to target revision
478 let reference = GitReference::Rev(head.to_string());
479
480 // SCP-like URL is not a WHATWG Standard URL.
481 // `url` crate can't parse SCP-like URLs.
482 // We convert to `ssh://` for SourceId,
483 // but preserve the original URL for fetch to maintain correct semantics
484 // See <https://github.com/rust-lang/cargo/issues/16740>
485 let (source_url, fetch_url) = match child_remote_url.as_ref().into_url() {
486 Ok(url) => (url, None),
487 Err(_) => {
488 let ssh_url = scp_to_ssh(&child_remote_url)
489 .ok_or_else(|| anyhow::format_err!("invalid url `{child_remote_url}`"))?
490 .as_str()
491 .into_url()?;
492 (ssh_url, Some(child_remote_url.into_owned()))
493 }
494 };
495
496 // GitSource created from SourceId without git precise will result to
497 // locked_rev being Deferred and fetch_db always try to fetch if online
498 let source_id =
499 SourceId::for_git(&source_url, reference)?.with_git_precise(Some(head.to_string()));
500
501 let mut source = match &fetch_url {
502 Some(url) => GitSource::new_for_submodule(source_id, url.to_owned(), gctx)?,
503 None => GitSource::new(source_id, gctx)?,
504 };
505 source.set_quiet(quiet);
506
507 let (db, actual_rev) = source.fetch_db(true).with_context(|| {
508 let name = child.name().unwrap_or("");
509 let url = fetch_url.unwrap_or_else(|| source_url.to_string());
510 format!("failed to fetch submodule `{name}` from {url}")
511 })?;
512 db.copy_to(actual_rev, repo.path(), gctx, quiet)?;
513 Ok(())
514 }
515 }
516}
517
518/// See [`GitCheckout::reset`] for rationale on this type.
519#[must_use]
520struct CheckoutGuard {
521 ok_file: PathBuf,
522}
523
524impl CheckoutGuard {
525 fn guard(path: &Path) -> Self {
526 let ok_file = path.join(CHECKOUT_READY_LOCK);
527 let _ = paths::remove_file(&ok_file);
528 Self { ok_file }
529 }
530
531 fn mark_ok(self) -> CargoResult<()> {
532 let _ = paths::create(self.ok_file)?;
533 Ok(())
534 }
535}
536
537/// Constructs an absolute URL for a child submodule URL with its parent base URL.
538///
539/// Git only assumes a submodule URL is a relative path if it starts with `./`
540/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
541/// submodule URL.
542///
543/// At this moment it comes with some limitations:
544///
545/// * GitHub doesn't accept non-normalized URLs with relative paths.
546/// (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
547/// * `url` crate cannot parse SCP-like URLs.
548/// (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
549///
550/// To overcome these, this patch always tries [`Url::parse`] first to normalize
551/// the path. If it couldn't, append the relative path and/or convert SCP-like URLs
552/// to ssh:// format as the last resorts and pray the remote git service supports
553/// non-normalized URLs.
554///
555/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
556///
557/// [^1]: <https://git-scm.com/docs/git-submodule>
558fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
559 let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
560 match Url::parse(base_url) {
561 Ok(mut base_url) => {
562 let path = base_url.path();
563 if !path.ends_with('/') {
564 base_url.set_path(&format!("{path}/"));
565 }
566 let absolute_url = base_url.join(submodule_url).with_context(|| {
567 format!(
568 "failed to parse relative child submodule url `{submodule_url}` \
569 using parent base url `{base_url}`"
570 )
571 })?;
572 Cow::from(absolute_url.to_string())
573 }
574 Err(_) => {
575 let mut absolute_url = base_url.to_string();
576 if !absolute_url.ends_with('/') {
577 absolute_url.push('/');
578 }
579 absolute_url.push_str(submodule_url);
580 Cow::from(absolute_url)
581 }
582 }
583 } else {
584 Cow::from(submodule_url)
585 };
586
587 Ok(absolute_url)
588}
589
590/// Converts an SCP-like URL to `ssh://` format.
591fn scp_to_ssh(url: &str) -> Option<String> {
592 let mut gix_url = gix::url::parse(gix::bstr::BStr::new(url.as_bytes())).ok()?;
593 if gix_url.serialize_alternative_form && gix_url.scheme == gix::url::Scheme::Ssh {
594 gix_url.serialize_alternative_form = false;
595 Some(gix_url.to_bstring().to_string())
596 } else {
597 None
598 }
599}
600
601/// Prepare the authentication callbacks for cloning a git repository.
602///
603/// The main purpose of this function is to construct the "authentication
604/// callback" which is used to clone a repository. This callback will attempt to
605/// find the right authentication on the system (without user input) and will
606/// guide libgit2 in doing so.
607///
608/// The callback is provided `allowed` types of credentials, and we try to do as
609/// much as possible based on that:
610///
611/// * Prioritize SSH keys from the local ssh agent as they're likely the most
612/// reliable. The username here is prioritized from the credential
613/// callback, then from whatever is configured in git itself, and finally
614/// we fall back to the generic user of `git`.
615///
616/// * If a username/password is allowed, then we fallback to git2-rs's
617/// implementation of the credential helper. This is what is configured
618/// with `credential.helper` in git, and is the interface for the macOS
619/// keychain, for example.
620///
621/// * After the above two have failed, we just kinda grapple attempting to
622/// return *something*.
623///
624/// If any form of authentication fails, libgit2 will repeatedly ask us for
625/// credentials until we give it a reason to not do so. To ensure we don't
626/// just sit here looping forever we keep track of authentications we've
627/// attempted and we don't try the same ones again.
628fn with_authentication<T, F>(
629 gctx: &GlobalContext,
630 url: &str,
631 cfg: &git2::Config,
632 mut f: F,
633) -> CargoResult<T>
634where
635 F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
636{
637 let mut cred_helper = git2::CredentialHelper::new(url);
638 cred_helper.config(cfg);
639
640 let mut ssh_username_requested = false;
641 let mut cred_helper_bad = None;
642 let mut ssh_agent_attempts = Vec::new();
643 let mut any_attempts = false;
644 let mut tried_sshkey = false;
645 let mut url_attempt = None;
646
647 let orig_url = url;
648 let mut res = f(&mut |url, username, allowed| {
649 any_attempts = true;
650 if url != orig_url {
651 url_attempt = Some(url.to_string());
652 }
653 // libgit2's "USERNAME" authentication actually means that it's just
654 // asking us for a username to keep going. This is currently only really
655 // used for SSH authentication and isn't really an authentication type.
656 // The logic currently looks like:
657 //
658 // let user = ...;
659 // if (user.is_null())
660 // user = callback(USERNAME, null, ...);
661 //
662 // callback(SSH_KEY, user, ...)
663 //
664 // So if we're being called here then we know that (a) we're using ssh
665 // authentication and (b) no username was specified in the URL that
666 // we're trying to clone. We need to guess an appropriate username here,
667 // but that may involve a few attempts. Unfortunately we can't switch
668 // usernames during one authentication session with libgit2, so to
669 // handle this we bail out of this authentication session after setting
670 // the flag `ssh_username_requested`, and then we handle this below.
671 if allowed.contains(git2::CredentialType::USERNAME) {
672 debug_assert!(username.is_none());
673 ssh_username_requested = true;
674 return Err(git2::Error::from_str("gonna try usernames later"));
675 }
676
677 // An "SSH_KEY" authentication indicates that we need some sort of SSH
678 // authentication. This can currently either come from the ssh-agent
679 // process or from a raw in-memory SSH key. Cargo only supports using
680 // ssh-agent currently.
681 //
682 // If we get called with this then the only way that should be possible
683 // is if a username is specified in the URL itself (e.g., `username` is
684 // Some), hence the unwrap() here. We try custom usernames down below.
685 if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
686 // If ssh-agent authentication fails, libgit2 will keep
687 // calling this callback asking for other authentication
688 // methods to try. Make sure we only try ssh-agent once,
689 // to avoid looping forever.
690 tried_sshkey = true;
691 let username = username.unwrap();
692 debug_assert!(!ssh_username_requested);
693 ssh_agent_attempts.push(username.to_string());
694 return git2::Cred::ssh_key_from_agent(username);
695 }
696
697 // Sometimes libgit2 will ask for a username/password in plaintext. This
698 // is where Cargo would have an interactive prompt if we supported it,
699 // but we currently don't! Right now the only way we support fetching a
700 // plaintext password is through the `credential.helper` support, so
701 // fetch that here.
702 //
703 // If ssh-agent authentication fails, libgit2 will keep calling this
704 // callback asking for other authentication methods to try. Check
705 // cred_helper_bad to make sure we only try the git credential helper
706 // once, to avoid looping forever.
707 if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
708 {
709 let r = git2::Cred::credential_helper(cfg, url, username);
710 cred_helper_bad = Some(r.is_err());
711 return r;
712 }
713
714 // I'm... not sure what the DEFAULT kind of authentication is, but seems
715 // easy to support?
716 if allowed.contains(git2::CredentialType::DEFAULT) {
717 return git2::Cred::default();
718 }
719
720 // Whelp, we tried our best
721 Err(git2::Error::from_str("no authentication methods succeeded"))
722 });
723
724 // Ok, so if it looks like we're going to be doing ssh authentication, we
725 // want to try a few different usernames as one wasn't specified in the URL
726 // for us to use. In order, we'll try:
727 //
728 // * A credential helper's username for this URL, if available.
729 // * This account's username.
730 // * "git"
731 //
732 // We have to restart the authentication session each time (due to
733 // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
734 // call our callback, `f`, in a loop here.
735 if ssh_username_requested {
736 debug_assert!(res.is_err());
737 let mut attempts = vec![String::from("git")];
738 if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
739 attempts.push(s.to_string());
740 }
741 if let Some(ref s) = cred_helper.username {
742 attempts.push(s.clone());
743 }
744
745 while let Some(s) = attempts.pop() {
746 // We should get `USERNAME` first, where we just return our attempt,
747 // and then after that we should get `SSH_KEY`. If the first attempt
748 // fails we'll get called again, but we don't have another option so
749 // we bail out.
750 let mut attempts = 0;
751 res = f(&mut |_url, username, allowed| {
752 if allowed.contains(git2::CredentialType::USERNAME) {
753 return git2::Cred::username(&s);
754 }
755 if allowed.contains(git2::CredentialType::SSH_KEY) {
756 debug_assert_eq!(Some(&s[..]), username);
757 attempts += 1;
758 if attempts == 1 {
759 ssh_agent_attempts.push(s.to_string());
760 return git2::Cred::ssh_key_from_agent(&s);
761 }
762 }
763 Err(git2::Error::from_str("no authentication methods succeeded"))
764 });
765
766 // If we made two attempts then that means:
767 //
768 // 1. A username was requested, we returned `s`.
769 // 2. An ssh key was requested, we returned to look up `s` in the
770 // ssh agent.
771 // 3. For whatever reason that lookup failed, so we were asked again
772 // for another mode of authentication.
773 //
774 // Essentially, if `attempts == 2` then in theory the only error was
775 // that this username failed to authenticate (e.g., no other network
776 // errors happened). Otherwise something else is funny so we bail
777 // out.
778 if attempts != 2 {
779 break;
780 }
781 }
782 }
783 let mut err = match res {
784 Ok(e) => return Ok(e),
785 Err(e) => e,
786 };
787
788 // In the case of an authentication failure (where we tried something) then
789 // we try to give a more helpful error message about precisely what we
790 // tried.
791 if any_attempts {
792 let mut msg = "failed to authenticate when downloading \
793 repository"
794 .to_string();
795
796 if let Some(attempt) = &url_attempt {
797 if url != attempt {
798 msg.push_str(": ");
799 msg.push_str(attempt);
800 }
801 }
802 msg.push('\n');
803 if !ssh_agent_attempts.is_empty() {
804 let names = ssh_agent_attempts
805 .iter()
806 .map(|s| format!("`{}`", s))
807 .collect::<Vec<_>>()
808 .join(", ");
809 msg.push_str(&format!(
810 "\n* attempted ssh-agent authentication, but \
811 no usernames succeeded: {}",
812 names
813 ));
814 }
815 if let Some(failed_cred_helper) = cred_helper_bad {
816 if failed_cred_helper {
817 msg.push_str(
818 "\n* attempted to find username/password via \
819 git's `credential.helper` support, but failed",
820 );
821 } else {
822 msg.push_str(
823 "\n* attempted to find username/password via \
824 `credential.helper`, but maybe the found \
825 credentials were incorrect",
826 );
827 }
828 }
829 msg.push_str("\n\n");
830 msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
831 msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
832 err = err.context(msg);
833
834 // Otherwise if we didn't even get to the authentication phase them we may
835 // have failed to set up a connection, in these cases hint on the
836 // `net.git-fetch-with-cli` configuration option.
837 } else if let Some(e) = err.downcast_ref::<git2::Error>() {
838 match e.class() {
839 ErrorClass::Net
840 | ErrorClass::Ssl
841 | ErrorClass::Submodule
842 | ErrorClass::FetchHead
843 | ErrorClass::Ssh
844 | ErrorClass::Http => {
845 let msg = format!(
846 concat!(
847 "network failure seems to have happened\n",
848 "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
849 "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
850 "{}"
851 ),
852 note_github_pull_request(url).unwrap_or_default()
853 );
854 err = err.context(msg);
855 }
856 ErrorClass::Callback => {
857 // This unwraps the git2 error. We're using the callback error
858 // specifically to convey errors from Rust land through the C
859 // callback interface. We don't need the `; class=Callback
860 // (26)` that gets tacked on to the git2 error message.
861 err = anyhow::format_err!("{}", e.message());
862 }
863 _ => {}
864 }
865 }
866
867 Err(err)
868}
869
870/// `git reset --hard` to the given `obj` for the `repo`.
871///
872/// The `obj` is a commit-ish to which the head should be moved.
873fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
874 let mut pb = Progress::new("Checkout", gctx);
875 let mut opts = git2::build::CheckoutBuilder::new();
876 opts.progress(|_, cur, max| {
877 drop(pb.tick(cur, max, ""));
878 });
879 debug!("doing reset");
880 repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
881 debug!("reset done");
882 Ok(())
883}
884
885/// Prepares the callbacks for fetching a git repository.
886///
887/// The main purpose of this function is to construct everything before a fetch.
888/// This will attempt to setup a progress bar, the authentication for git,
889/// ssh known hosts check, and the network retry mechanism.
890///
891/// The callback is provided a fetch options, which can be used by the actual
892/// git fetch.
893pub fn with_fetch_options(
894 git_config: &git2::Config,
895 url: &str,
896 gctx: &GlobalContext,
897 cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
898) -> CargoResult<()> {
899 let mut progress = Progress::new("Fetch", gctx);
900 let ssh_config = gctx.net_config()?.ssh.as_ref();
901 let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
902 let diagnostic_home_config = gctx.diagnostic_home_config();
903 network::retry::with_retry(gctx, || {
904 // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
905 // so we store the error additionally and unwrap it later
906 let mut check_cb_result = Ok(());
907 let auth_result = with_authentication(gctx, url, git_config, |f| {
908 let port = Url::parse(url).ok().and_then(|url| url.port());
909 let mut last_update = Instant::now();
910 let mut rcb = git2::RemoteCallbacks::new();
911 // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
912 // sliding window for tracking the data transfer rate (in bytes/s).
913 let mut counter = MetricsCounter::<10>::new(0, last_update);
914 rcb.credentials(f);
915 rcb.certificate_check(|cert, host| {
916 match super::known_hosts::certificate_check(
917 gctx,
918 cert,
919 host,
920 port,
921 config_known_hosts,
922 &diagnostic_home_config,
923 ) {
924 Ok(status) => Ok(status),
925 Err(e) => {
926 check_cb_result = Err(e);
927 // This is not really used because it'll be overridden by libgit2
928 // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
929 Err(git2::Error::from_str(
930 "invalid or unknown remote ssh hostkey",
931 ))
932 }
933 }
934 });
935 rcb.transfer_progress(|stats| {
936 let indexed_deltas = stats.indexed_deltas();
937 let msg = if indexed_deltas > 0 {
938 // Resolving deltas.
939 format!(
940 ", ({}/{}) resolving deltas",
941 indexed_deltas,
942 stats.total_deltas()
943 )
944 } else {
945 // Receiving objects.
946 //
947 // # Caveat
948 //
949 // Progress bar relies on git2 calling `transfer_progress`
950 // to update its transfer rate, but we cannot guarantee a
951 // periodic call of that callback. Thus if we don't receive
952 // any data for, say, 10 seconds, the rate will get stuck
953 // and never go down to 0B/s.
954 // In the future, we need to find away to update the rate
955 // even when the callback is not called.
956 let now = Instant::now();
957 // Scrape a `received_bytes` to the counter every 300ms.
958 if now - last_update > Duration::from_millis(300) {
959 counter.add(stats.received_bytes(), now);
960 last_update = now;
961 }
962 let rate = HumanBytes(counter.rate() as u64);
963 format!(", {rate:.2}/s")
964 };
965 progress
966 .tick(stats.indexed_objects(), stats.total_objects(), &msg)
967 .is_ok()
968 });
969
970 // Create a local anonymous remote in the repository to fetch the
971 // url
972 let mut opts = git2::FetchOptions::new();
973 opts.remote_callbacks(rcb);
974 cb(opts)
975 });
976 if auth_result.is_err() {
977 check_cb_result?;
978 }
979 auth_result?;
980 Ok(())
981 })
982}
983
984/// Attempts to fetch the given git `reference` for a Git repository.
985///
986/// This is the main entry for git clone/fetch. It does the followings:
987///
988/// * Turns [`GitReference`] into refspecs accordingly.
989/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
990///
991/// The `remote_url` argument is the git remote URL where we want to fetch from.
992///
993/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
994/// at this time. It could be extended when libgit2 supports shallow clones.
995///
996/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
997pub fn fetch(
998 repo: &mut git2::Repository,
999 remote_url: &str,
1000 manifest_reference: &GitReference,
1001 locked_reference: &GitReference,
1002 gctx: &GlobalContext,
1003 remote_kind: RemoteKind,
1004) -> CargoResult<()> {
1005 if let Some(offline_flag) = gctx.offline_flag() {
1006 anyhow::bail!(
1007 "attempting to update a git repository, but {offline_flag} \
1008 was specified"
1009 )
1010 }
1011
1012 let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
1013
1014 // Flag to keep track if the rev is a full commit hash
1015 let mut fast_path_rev: bool = false;
1016
1017 let oid_to_fetch = match github_fast_path(repo, remote_url, locked_reference, gctx) {
1018 Ok(FastPathRev::UpToDate) => return Ok(()),
1019 Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
1020 Ok(FastPathRev::Indeterminate) => None,
1021 Err(e) => {
1022 debug!("failed to check github {:?}", e);
1023 None
1024 }
1025 };
1026
1027 maybe_gc_repo(repo, gctx)?;
1028
1029 clean_repo_temp_files(repo);
1030
1031 // Translate the reference desired here into an actual list of refspecs
1032 // which need to get fetched. Additionally record if we're fetching tags.
1033 let mut refspecs = Vec::new();
1034 let mut tags = false;
1035 // The `+` symbol on the refspec means to allow a forced (fast-forward)
1036 // update which is needed if there is ever a force push that requires a
1037 // fast-forward.
1038 match locked_reference {
1039 // For branches and tags we can fetch simply one reference and copy it
1040 // locally, no need to fetch other branches/tags.
1041 GitReference::Branch(b) => {
1042 refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
1043 }
1044
1045 GitReference::Tag(t) => {
1046 refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
1047 }
1048
1049 GitReference::DefaultBranch => {
1050 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1051 }
1052
1053 GitReference::Rev(rev) => {
1054 if rev.starts_with("refs/") {
1055 refspecs.push(format!("+{0}:{0}", rev));
1056 } else if let Some(oid_to_fetch) = oid_to_fetch {
1057 fast_path_rev = true;
1058 refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1059 } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1060 && rev_to_oid(rev).is_some()
1061 {
1062 // There is a specific commit to fetch and we will do so in shallow-mode only
1063 // to not disturb the previous logic.
1064 // Note that with typical settings for shallowing, we will just fetch a single `rev`
1065 // as single commit.
1066 // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1067 // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1068 refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1069 } else if let GitReference::Rev(rev) = manifest_reference
1070 && rev.starts_with("refs/")
1071 {
1072 // If the lockfile has a commit. we can't directly fetch it (unless we're talking
1073 // to GitHub), so we fetch the ref associated with it from the manifest.
1074 refspecs.push(format!("+{0}:{0}", rev));
1075 } else {
1076 // We don't know what the rev will point to. To handle this
1077 // situation we fetch all branches and tags, and then we pray
1078 // it's somewhere in there.
1079 refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1080 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1081 tags = true;
1082 }
1083 }
1084 }
1085
1086 debug!("doing a fetch for {remote_url}");
1087 let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1088 fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
1089 } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1090 fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1091 } else {
1092 fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1093 };
1094
1095 if fast_path_rev {
1096 if let Some(oid) = oid_to_fetch {
1097 return result.with_context(|| format!("revision {} not found", oid));
1098 }
1099 }
1100 result
1101}
1102
1103/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1104/// files atomically.
1105/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1106/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1107fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1108 matches!(
1109 err,
1110 gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1111 gix::protocol::fetch::Error::LockShallowFile(_)
1112 ))
1113 )
1114}
1115
1116/// Attempts to use `git` CLI installed on the system to fetch a repository,
1117/// when the config value [`net.git-fetch-with-cli`][1] is set.
1118///
1119/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1120/// when compared to the `git` command line. As a result, allow an escape
1121/// hatch for users that would prefer to use `git`-the-CLI for fetching
1122/// repositories instead of `libgit2`-the-library. This should make more
1123/// flavors of authentication possible while also still giving us all the
1124/// speed and portability of using `libgit2`.
1125///
1126/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1127fn fetch_with_cli(
1128 repo: &mut git2::Repository,
1129 url: &str,
1130 refspecs: &[String],
1131 tags: bool,
1132 shallow: gix::remote::fetch::Shallow,
1133 gctx: &GlobalContext,
1134) -> CargoResult<()> {
1135 debug!(target: "git-fetch", backend = "git-cli");
1136
1137 let mut cmd = ProcessBuilder::new("git");
1138 cmd.arg("fetch");
1139 if tags {
1140 cmd.arg("--tags");
1141 } else {
1142 cmd.arg("--no-tags");
1143 }
1144 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1145 let depth = 0i32.saturating_add_unsigned(depth.get());
1146 cmd.arg(format!("--depth={depth}"));
1147 }
1148 match gctx.shell().verbosity() {
1149 Verbosity::Normal => {}
1150 Verbosity::Verbose => {
1151 cmd.arg("--verbose");
1152 }
1153 Verbosity::Quiet => {
1154 cmd.arg("--quiet");
1155 }
1156 }
1157 cmd.arg("--force") // handle force pushes
1158 .arg("--update-head-ok") // see discussion in #2078
1159 .arg(url)
1160 .args(refspecs)
1161 // If cargo is run by git (for example, the `exec` command in `git
1162 // rebase`), the GIT_DIR is set by git and will point to the wrong
1163 // location. This makes sure GIT_DIR is always the repository path.
1164 .env("GIT_DIR", repo.path())
1165 // The reset of these may not be necessary, but I'm including them
1166 // just to be extra paranoid and avoid any issues.
1167 .env_remove("GIT_WORK_TREE")
1168 .env_remove("GIT_INDEX_FILE")
1169 .env_remove("GIT_OBJECT_DIRECTORY")
1170 .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1171 .cwd(repo.path());
1172 gctx.shell()
1173 .verbose(|s| s.status("Running", &cmd.to_string()))?;
1174 network::retry::with_retry(gctx, || {
1175 cmd.exec()
1176 .map_err(|error| GitCliError::new(error, true).into())
1177 })?;
1178
1179 Ok(())
1180}
1181
1182fn fetch_with_gitoxide(
1183 repo: &mut git2::Repository,
1184 remote_url: &str,
1185 refspecs: Vec<String>,
1186 tags: bool,
1187 shallow: gix::remote::fetch::Shallow,
1188 gctx: &GlobalContext,
1189) -> CargoResult<()> {
1190 debug!(target: "git-fetch", backend = "gitoxide");
1191
1192 let git2_repo = repo;
1193 let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1194 let repo_reinitialized = AtomicBool::default();
1195 let res = oxide::with_retry_and_progress(
1196 git2_repo.path(),
1197 gctx,
1198 remote_url,
1199 &|repo_path,
1200 should_interrupt,
1201 mut progress,
1202 url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1203 // The `fetch` operation here may fail spuriously due to a corrupt
1204 // repository. It could also fail, however, for a whole slew of other
1205 // reasons (aka network related reasons). We want Cargo to automatically
1206 // recover from corrupt repositories, but we don't want Cargo to stomp
1207 // over other legitimate errors.
1208 //
1209 // Consequently we save off the error of the `fetch` operation and if it
1210 // looks like a "corrupt repo" error then we blow away the repo and try
1211 // again. If it looks like any other kind of error, or if we've already
1212 // blown away the repository, then we want to return the error as-is.
1213 loop {
1214 let res = oxide::open_repo(
1215 repo_path,
1216 config_overrides.clone(),
1217 oxide::OpenMode::ForFetch,
1218 )
1219 .map_err(crate::sources::git::fetch::Error::from)
1220 .and_then(|repo| {
1221 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1222 let url_for_authentication = &mut *url_for_authentication;
1223 let remote = repo
1224 .remote_at(remote_url)?
1225 .with_fetch_tags(if tags {
1226 gix::remote::fetch::Tags::All
1227 } else {
1228 gix::remote::fetch::Tags::Included
1229 })
1230 .with_refspecs(
1231 refspecs.iter().map(|s| s.as_str()),
1232 gix::remote::Direction::Fetch,
1233 )
1234 .map_err(crate::sources::git::fetch::Error::Other)?;
1235 let url = remote
1236 .url(gix::remote::Direction::Fetch)
1237 .expect("set at init")
1238 .to_owned();
1239 let connection = remote.connect(gix::remote::Direction::Fetch)?;
1240 let mut authenticate = connection.configured_credentials(url)?;
1241 let connection = connection.with_credentials(
1242 move |action: gix::protocol::credentials::helper::Action| {
1243 if let Some(url) = action
1244 .context()
1245 .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1246 {
1247 url_for_authentication(url.as_ref());
1248 }
1249 authenticate(action)
1250 },
1251 );
1252 let outcome = connection
1253 .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1254 .with_shallow(shallow.clone())
1255 .receive(&mut progress, should_interrupt)?;
1256 Ok(outcome)
1257 });
1258 let err = match res {
1259 Ok(_) => break,
1260 Err(e) => e,
1261 };
1262 debug!("fetch failed: {}", err);
1263
1264 if !repo_reinitialized.load(Ordering::Relaxed)
1265 // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1266 // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1267 // folder before writing files into it, or else not even open a directory as git repository (which is
1268 // also handled here).
1269 && err.is_corrupted()
1270 || has_shallow_lock_file(&err)
1271 {
1272 repo_reinitialized.store(true, Ordering::Relaxed);
1273 debug!(
1274 "looks like this is a corrupt repository, reinitializing \
1275 and trying again"
1276 );
1277 if oxide::reinitialize(repo_path).is_ok() {
1278 continue;
1279 }
1280 }
1281
1282 return Err(err.into());
1283 }
1284 Ok(())
1285 },
1286 );
1287 if repo_reinitialized.load(Ordering::Relaxed) {
1288 *git2_repo = git2::Repository::open(git2_repo.path())?;
1289 }
1290 res
1291}
1292
1293fn fetch_with_libgit2(
1294 repo: &mut git2::Repository,
1295 remote_url: &str,
1296 refspecs: Vec<String>,
1297 tags: bool,
1298 shallow: gix::remote::fetch::Shallow,
1299 gctx: &GlobalContext,
1300) -> CargoResult<()> {
1301 debug!(target: "git-fetch", backend = "libgit2");
1302
1303 let git_config = git2::Config::open_default()?;
1304 with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1305 if tags {
1306 opts.download_tags(git2::AutotagOption::All);
1307 }
1308 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1309 opts.depth(0i32.saturating_add_unsigned(depth.get()));
1310 }
1311 // The `fetch` operation here may fail spuriously due to a corrupt
1312 // repository. It could also fail, however, for a whole slew of other
1313 // reasons (aka network related reasons). We want Cargo to automatically
1314 // recover from corrupt repositories, but we don't want Cargo to stomp
1315 // over other legitimate errors.
1316 //
1317 // Consequently we save off the error of the `fetch` operation and if it
1318 // looks like a "corrupt repo" error then we blow away the repo and try
1319 // again. If it looks like any other kind of error, or if we've already
1320 // blown away the repository, then we want to return the error as-is.
1321 let mut repo_reinitialized = false;
1322 loop {
1323 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1324 let res = repo
1325 .remote_anonymous(remote_url)?
1326 .fetch(&refspecs, Some(&mut opts), None);
1327 let err = match res {
1328 Ok(()) => break,
1329 Err(e) => e,
1330 };
1331 debug!("fetch failed: {}", err);
1332
1333 if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1334 {
1335 repo_reinitialized = true;
1336 debug!(
1337 "looks like this is a corrupt repository, reinitializing \
1338 and trying again"
1339 );
1340 if reinitialize(repo).is_ok() {
1341 continue;
1342 }
1343 }
1344
1345 return Err(err.into());
1346 }
1347 Ok(())
1348 })
1349}
1350
1351/// Attempts to `git gc` a repository.
1352///
1353/// Cargo has a bunch of long-lived git repositories in its global cache and
1354/// some, like the index, are updated very frequently. Right now each update
1355/// creates a new "pack file" inside the git database, and over time this can
1356/// cause bad performance and bad current behavior in libgit2.
1357///
1358/// One pathological use case today is where libgit2 opens hundreds of file
1359/// descriptors, getting us dangerously close to blowing out the OS limits of
1360/// how many fds we can have open. This is detailed in [#4403].
1361///
1362/// Instead of trying to be clever about when gc is needed, we just run
1363/// `git gc --auto` and let git figure it out. It checks its own thresholds
1364/// (gc.auto, gc.autoPackLimit) and either does the work or exits quickly.
1365/// If git isn't installed, no worries - we skip it.
1366///
1367/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1368fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1369 // Let git decide whether gc is actually needed based on its own thresholds
1370 // (gc.auto, gc.autoPackLimit). This avoids duplicating git's internal logic
1371 // for deciding when housekeeping is needed.
1372 //
1373 // For testing purposes, __CARGO_PACKFILE_LIMIT can be set to override
1374 // gc.autoPackLimit, which has the same meaning. This lets tests force gc
1375 // to run by setting a low threshold without depending on git's defaults.
1376 let mut cmd = Command::new("git");
1377 if let Ok(limit) = gctx.get_env("__CARGO_PACKFILE_LIMIT") {
1378 cmd.arg(format!("-c gc.autoPackLimit={}", limit));
1379 }
1380 cmd.arg("gc").arg("--auto").current_dir(repo.path());
1381
1382 match cmd.output() {
1383 Ok(out) => {
1384 debug!(
1385 "git-gc --auto status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1386 out.status,
1387 String::from_utf8_lossy(&out.stdout),
1388 String::from_utf8_lossy(&out.stderr)
1389 );
1390 if out.status.success() {
1391 let new = git2::Repository::open(repo.path())?;
1392 *repo = new;
1393 return Ok(());
1394 }
1395 }
1396 Err(e) => debug!("git-gc --auto failed to spawn: {}", e),
1397 }
1398
1399 // Alright all else failed, let's start over.
1400 reinitialize(repo)
1401}
1402
1403/// Removes temporary files left from previous activity.
1404///
1405/// If libgit2 is interrupted while indexing pack files, it will leave behind
1406/// some temporary files that it doesn't clean up. These can be quite large in
1407/// size, so this tries to clean things up.
1408///
1409/// This intentionally ignores errors. This is only an opportunistic cleaning,
1410/// and we don't really care if there are issues (there's unlikely anything
1411/// that can be done).
1412///
1413/// The git CLI has similar behavior (its temp files look like
1414/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1415/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1416/// filenames, so they never get cleaned up.
1417fn clean_repo_temp_files(repo: &git2::Repository) {
1418 let path = repo.path().join("objects/pack/pack_git2_*");
1419 let Some(pattern) = path.to_str() else {
1420 tracing::warn!("cannot convert {path:?} to a string");
1421 return;
1422 };
1423 let Ok(paths) = glob::glob(pattern) else {
1424 return;
1425 };
1426 for path in paths {
1427 if let Ok(path) = path {
1428 match paths::remove_file(&path) {
1429 Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1430 Err(e) => {
1431 tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1432 }
1433 }
1434 }
1435 }
1436}
1437
1438/// Reinitializes a given Git repository. This is useful when a Git repository
1439/// seems corrupted and we want to start over.
1440fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1441 // Here we want to drop the current repository object pointed to by `repo`,
1442 // so we initialize temporary repository in a sub-folder, blow away the
1443 // existing git folder, and then recreate the git repo. Finally we blow away
1444 // the `tmp` folder we allocated.
1445 let path = repo.path().to_path_buf();
1446 debug!("reinitializing git repo at {:?}", path);
1447 let tmp = path.join("tmp");
1448 let bare = !repo.path().ends_with(".git");
1449 *repo = init(&tmp, false)?;
1450 for entry in path.read_dir()? {
1451 let entry = entry?;
1452 if entry.file_name().to_str() == Some("tmp") {
1453 continue;
1454 }
1455 let path = entry.path();
1456 drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1457 }
1458 *repo = init(&path, bare)?;
1459 paths::remove_dir_all(&tmp)?;
1460 Ok(())
1461}
1462
1463/// Initializes a Git repository at `path`.
1464fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1465 let mut opts = git2::RepositoryInitOptions::new();
1466 // Skip anything related to templates, they just call all sorts of issues as
1467 // we really don't want to use them yet they insist on being used. See #6240
1468 // for an example issue that comes up.
1469 opts.external_template(false);
1470 opts.bare(bare);
1471 Ok(git2::Repository::init_opts(&path, &opts)?)
1472}
1473
1474/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1475enum FastPathRev {
1476 /// The local rev (determined by `reference.resolve(repo)`) is already up to
1477 /// date with what this rev resolves to on GitHub's server.
1478 UpToDate,
1479 /// The following SHA must be fetched in order for the local rev to become
1480 /// up to date.
1481 NeedsFetch(Oid),
1482 /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1483 /// and tags from the server and see what happens.
1484 Indeterminate,
1485}
1486
1487/// Attempts GitHub's special fast path for testing if we've already got an
1488/// up-to-date copy of the repository.
1489///
1490/// Updating the index is done pretty regularly so we want it to be as fast as
1491/// possible. For registries hosted on GitHub (like the crates.io index) there's
1492/// a fast path available to use[^1] to tell us that there's no updates to be
1493/// made.
1494///
1495/// Note that this function should never cause an actual failure because it's
1496/// just a fast path. As a result, a caller should ignore `Err` returned from
1497/// this function and move forward on the normal path.
1498///
1499/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1500fn github_fast_path(
1501 repo: &mut git2::Repository,
1502 url: &str,
1503 reference: &GitReference,
1504 gctx: &GlobalContext,
1505) -> CargoResult<FastPathRev> {
1506 let url = Url::parse(url)?;
1507 if !is_github(&url) {
1508 return Ok(FastPathRev::Indeterminate);
1509 }
1510
1511 let local_object = resolve_ref(reference, repo).ok();
1512
1513 let github_branch_name = match reference {
1514 GitReference::Branch(branch) => branch,
1515 GitReference::Tag(tag) => tag,
1516 GitReference::DefaultBranch => "HEAD",
1517 GitReference::Rev(rev) => {
1518 if rev.starts_with("refs/") {
1519 rev
1520 } else if looks_like_commit_hash(rev) {
1521 // `revparse_single` (used by `resolve`) is the only way to turn
1522 // short hash -> long hash, but it also parses other things,
1523 // like branch and tag names, which might coincidentally be
1524 // valid hex.
1525 //
1526 // We only return early if `rev` is a prefix of the object found
1527 // by `revparse_single`. Don't bother talking to GitHub in that
1528 // case, since commit hashes are permanent. If a commit with the
1529 // requested hash is already present in the local clone, its
1530 // contents must be the same as what is on the server for that
1531 // hash.
1532 //
1533 // If `rev` is not found locally by `revparse_single`, we'll
1534 // need GitHub to resolve it and get a hash. If `rev` is found
1535 // but is not a short hash of the found object, it's probably a
1536 // branch and we also need to get a hash from GitHub, in case
1537 // the branch has moved.
1538 if let Some(local_object) = local_object {
1539 if is_short_hash_of(rev, local_object) {
1540 debug!("github fast path already has {local_object}");
1541 return Ok(FastPathRev::UpToDate);
1542 }
1543 }
1544 // If `rev` is a full commit hash, the only thing it can resolve
1545 // to is itself. Don't bother talking to GitHub in that case
1546 // either. (This ensures that we always attempt to fetch the
1547 // commit directly even if we can't reach the GitHub API.)
1548 if let Some(oid) = rev_to_oid(rev) {
1549 debug!("github fast path is already a full commit hash {rev}");
1550 return Ok(FastPathRev::NeedsFetch(oid));
1551 }
1552 rev
1553 } else {
1554 debug!("can't use github fast path with `rev = \"{}\"`", rev);
1555 return Ok(FastPathRev::Indeterminate);
1556 }
1557 }
1558 };
1559
1560 // This expects GitHub urls in the form `github.com/user/repo` and nothing
1561 // else
1562 let mut pieces = url
1563 .path_segments()
1564 .ok_or_else(|| anyhow!("no path segments on url"))?;
1565 let username = pieces
1566 .next()
1567 .ok_or_else(|| anyhow!("couldn't find username"))?;
1568 let repository = pieces
1569 .next()
1570 .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1571 if pieces.next().is_some() {
1572 anyhow::bail!("too many segments on URL");
1573 }
1574
1575 // Trim off the `.git` from the repository, if present, since that's
1576 // optional for GitHub and won't work when we try to use the API as well.
1577 let repository = repository.strip_suffix(".git").unwrap_or(repository);
1578
1579 let url = format!(
1580 "https://api.github.com/repos/{}/{}/commits/{}",
1581 username, repository, github_branch_name,
1582 );
1583 let mut handle = gctx.http()?.lock().unwrap();
1584 debug!("attempting GitHub fast path for {}", url);
1585 handle.get(true)?;
1586 handle.url(&url)?;
1587 handle.useragent("cargo")?;
1588 handle.follow_location(true)?; // follow redirects
1589 handle.http_headers({
1590 let mut headers = List::new();
1591 headers.append("Accept: application/vnd.github.3.sha")?;
1592 if let Some(local_object) = local_object {
1593 headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1594 }
1595 headers
1596 })?;
1597
1598 let mut response_body = Vec::new();
1599 let mut transfer = handle.transfer();
1600 transfer.write_function(|data| {
1601 response_body.extend_from_slice(data);
1602 Ok(data.len())
1603 })?;
1604 transfer.perform()?;
1605 drop(transfer); // end borrow of handle so that response_code can be called
1606
1607 let response_code = handle.response_code()?;
1608 if response_code == 304 {
1609 debug!("github fast path up-to-date");
1610 Ok(FastPathRev::UpToDate)
1611 } else if response_code == 200
1612 && let Some(oid_to_fetch) = rev_to_oid(str::from_utf8(&response_body)?)
1613 {
1614 // response expected to be a full hash hexstring (40 or 64 chars)
1615 debug!("github fast path fetch {oid_to_fetch}");
1616 Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1617 } else {
1618 // Usually response_code == 404 if the repository does not exist, and
1619 // response_code == 422 if exists but GitHub is unable to resolve the
1620 // requested rev.
1621 debug!("github fast path bad response code {response_code}");
1622 Ok(FastPathRev::Indeterminate)
1623 }
1624}
1625
1626/// Whether a `url` is one from GitHub.
1627fn is_github(url: &Url) -> bool {
1628 url.host_str() == Some("github.com")
1629}
1630
1631// Give some messages on GitHub PR URL given as is
1632pub(crate) fn note_github_pull_request(url: &str) -> Option<String> {
1633 if let Ok(url) = url.parse::<Url>()
1634 && is_github(&url)
1635 {
1636 let path_segments = url
1637 .path_segments()
1638 .map(|p| p.into_iter().collect::<Vec<_>>())
1639 .unwrap_or_default();
1640 if let [owner, repo, "pull", pr_number, ..] = path_segments[..] {
1641 let repo_url = format!("https://github.com/{owner}/{repo}.git");
1642 let rev = format!("refs/pull/{pr_number}/head");
1643 return Some(format!(
1644 concat!(
1645 "\n\nnote: GitHub url {} is not a repository. \n",
1646 "help: Replace the dependency with \n",
1647 " `git = \"{}\" rev = \"{}\"` \n",
1648 " to specify pull requests as dependencies' revision."
1649 ),
1650 url, repo_url, rev
1651 ));
1652 }
1653 }
1654
1655 None
1656}
1657
1658/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1659fn looks_like_commit_hash(rev: &str) -> bool {
1660 rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1661}
1662
1663/// Whether `rev` is a shorter hash of `oid`.
1664fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1665 let long_hash = oid.to_string();
1666 match long_hash.get(..rev.len()) {
1667 Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1668 None => false,
1669 }
1670}
1671
1672#[cfg(test)]
1673mod tests {
1674 use super::absolute_submodule_url;
1675
1676 #[test]
1677 fn test_absolute_submodule_url() {
1678 let cases = [
1679 (
1680 "ssh://git@gitub.com/rust-lang/cargo",
1681 "git@github.com:rust-lang/cargo.git",
1682 "git@github.com:rust-lang/cargo.git",
1683 ),
1684 (
1685 "ssh://git@gitub.com/rust-lang/cargo",
1686 "./",
1687 "ssh://git@gitub.com/rust-lang/cargo/",
1688 ),
1689 (
1690 "ssh://git@gitub.com/rust-lang/cargo",
1691 "../",
1692 "ssh://git@gitub.com/rust-lang/",
1693 ),
1694 (
1695 "ssh://git@gitub.com/rust-lang/cargo",
1696 "./foo",
1697 "ssh://git@gitub.com/rust-lang/cargo/foo",
1698 ),
1699 (
1700 "ssh://git@gitub.com/rust-lang/cargo/",
1701 "./foo",
1702 "ssh://git@gitub.com/rust-lang/cargo/foo",
1703 ),
1704 (
1705 "ssh://git@gitub.com/rust-lang/cargo/",
1706 "../foo",
1707 "ssh://git@gitub.com/rust-lang/foo",
1708 ),
1709 (
1710 "ssh://git@gitub.com/rust-lang/cargo",
1711 "../foo",
1712 "ssh://git@gitub.com/rust-lang/foo",
1713 ),
1714 (
1715 "ssh://git@gitub.com/rust-lang/cargo",
1716 "../foo/bar/../baz",
1717 "ssh://git@gitub.com/rust-lang/foo/baz",
1718 ),
1719 (
1720 "git@github.com:rust-lang/cargo.git",
1721 "ssh://git@gitub.com/rust-lang/cargo",
1722 "ssh://git@gitub.com/rust-lang/cargo",
1723 ),
1724 (
1725 "git@github.com:rust-lang/cargo.git",
1726 "./",
1727 "git@github.com:rust-lang/cargo.git/./",
1728 ),
1729 (
1730 "git@github.com:rust-lang/cargo.git",
1731 "../",
1732 "git@github.com:rust-lang/cargo.git/../",
1733 ),
1734 (
1735 "git@github.com:rust-lang/cargo.git",
1736 "./foo",
1737 "git@github.com:rust-lang/cargo.git/./foo",
1738 ),
1739 (
1740 "git@github.com:rust-lang/cargo.git/",
1741 "./foo",
1742 "git@github.com:rust-lang/cargo.git/./foo",
1743 ),
1744 (
1745 "git@github.com:rust-lang/cargo.git",
1746 "../foo",
1747 "git@github.com:rust-lang/cargo.git/../foo",
1748 ),
1749 (
1750 "git@github.com:rust-lang/cargo.git/",
1751 "../foo",
1752 "git@github.com:rust-lang/cargo.git/../foo",
1753 ),
1754 (
1755 "git@github.com:rust-lang/cargo.git",
1756 "../foo/bar/../baz",
1757 "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1758 ),
1759 ];
1760
1761 for (base_url, submodule_url, expected) in cases {
1762 let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1763 assert_eq!(
1764 expected, url,
1765 "base `{base_url}`; submodule `{submodule_url}`"
1766 );
1767 }
1768 }
1769}
1770
1771/// Turns a full commit hash revision into an oid.
1772///
1773/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1774/// Its length must be double to the underlying bytes (40 or 64),
1775/// otherwise libgit2 would happily zero-pad the returned oid.
1776///
1777/// See:
1778///
1779/// * <https://github.com/rust-lang/cargo/issues/13188>
1780/// * <https://github.com/rust-lang/cargo/issues/13968>
1781pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1782 Oid::from_str(rev)
1783 .ok()
1784 .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1785}