cargo/sources/git/utils.rs
1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, SourceId, Verbosity};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::sources::git::source::GitSource;
9use crate::sources::source::Source as _;
10use crate::util::HumanBytes;
11use crate::util::errors::{CargoResult, GitCliError};
12use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
13
14use anyhow::{Context as _, anyhow};
15use cargo_util::{ProcessBuilder, paths};
16use curl::easy::List;
17use git2::{ErrorClass, ObjectType, Oid};
18use tracing::{debug, info};
19use url::Url;
20
21use std::borrow::Cow;
22use std::path::{Path, PathBuf};
23use std::process::Command;
24use std::str;
25use std::sync::atomic::{AtomicBool, Ordering};
26use std::time::{Duration, Instant};
27
28/// A file indicates that if present, `git reset` has been done and a repo
29/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
30const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
31
32/// A short abbreviated OID.
33///
34/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
35pub struct GitShortID(git2::Buf);
36
37impl GitShortID {
38 /// Views the short ID as a `str`.
39 pub fn as_str(&self) -> &str {
40 self.0.as_str().unwrap()
41 }
42}
43
44/// A remote repository. It gets cloned into a local [`GitDatabase`].
45#[derive(PartialEq, Clone, Debug)]
46pub struct GitRemote {
47 /// URL to a remote repository.
48 url: Url,
49}
50
51/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
52/// can be cloned from a single [`GitDatabase`].
53pub struct GitDatabase {
54 /// The remote repository where this database is fetched from.
55 remote: GitRemote,
56 /// Path to the root of the underlying Git repository on the local filesystem.
57 path: PathBuf,
58 /// Underlying Git repository instance for this database.
59 repo: git2::Repository,
60}
61
62/// A local checkout of a particular revision from a [`GitDatabase`].
63pub struct GitCheckout<'a> {
64 /// The git database where this checkout is cloned from.
65 database: &'a GitDatabase,
66 /// Path to the root of the underlying Git repository on the local filesystem.
67 path: PathBuf,
68 /// The git revision this checkout is for.
69 revision: git2::Oid,
70 /// Underlying Git repository instance for this checkout.
71 repo: git2::Repository,
72}
73
74impl GitRemote {
75 /// Creates an instance for a remote repository URL.
76 pub fn new(url: &Url) -> GitRemote {
77 GitRemote { url: url.clone() }
78 }
79
80 /// Gets the remote repository URL.
81 pub fn url(&self) -> &Url {
82 &self.url
83 }
84
85 /// Fetches and checkouts to a reference or a revision from this remote
86 /// into a local path.
87 ///
88 /// This ensures that it gets the up-to-date commit when a named reference
89 /// is given (tag, branch, refs/*). Thus, network connection is involved.
90 ///
91 /// If we have a previous instance of [`GitDatabase`] then fetch into that
92 /// if we can. If that can successfully load our revision then we've
93 /// populated the database with the latest version of `reference`, so
94 /// return that database and the rev we resolve to.
95 pub fn checkout(
96 &self,
97 into: &Path,
98 db: Option<GitDatabase>,
99 reference: &GitReference,
100 gctx: &GlobalContext,
101 ) -> CargoResult<(GitDatabase, git2::Oid)> {
102 if let Some(mut db) = db {
103 fetch(
104 &mut db.repo,
105 self.url.as_str(),
106 reference,
107 gctx,
108 RemoteKind::GitDependency,
109 )
110 .with_context(|| format!("failed to fetch into: {}", into.display()))?;
111
112 if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
113 return Ok((db, rev));
114 }
115 }
116
117 // Otherwise start from scratch to handle corrupt git repositories.
118 // After our fetch (which is interpreted as a clone now) we do the same
119 // resolution to figure out what we cloned.
120 if into.exists() {
121 paths::remove_dir_all(into)?;
122 }
123 paths::create_dir_all(into)?;
124 let mut repo = init(into, true)?;
125 fetch(
126 &mut repo,
127 self.url.as_str(),
128 reference,
129 gctx,
130 RemoteKind::GitDependency,
131 )
132 .with_context(|| format!("failed to clone into: {}", into.display()))?;
133 let rev = resolve_ref(reference, &repo)?;
134
135 Ok((
136 GitDatabase {
137 remote: self.clone(),
138 path: into.to_path_buf(),
139 repo,
140 },
141 rev,
142 ))
143 }
144
145 /// Creates a [`GitDatabase`] of this remote at `db_path`.
146 pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
147 let repo = git2::Repository::open(db_path)?;
148 Ok(GitDatabase {
149 remote: self.clone(),
150 path: db_path.to_path_buf(),
151 repo,
152 })
153 }
154}
155
156impl GitDatabase {
157 /// Checkouts to a revision at `dest`ination from this database.
158 #[tracing::instrument(skip(self, gctx))]
159 pub fn copy_to(
160 &self,
161 rev: git2::Oid,
162 dest: &Path,
163 gctx: &GlobalContext,
164 quiet: bool,
165 ) -> CargoResult<GitCheckout<'_>> {
166 // If the existing checkout exists, and it is fresh, use it.
167 // A non-fresh checkout can happen if the checkout operation was
168 // interrupted. In that case, the checkout gets deleted and a new
169 // clone is created.
170 let checkout = match git2::Repository::open(dest)
171 .ok()
172 .map(|repo| GitCheckout::new(self, rev, repo))
173 .filter(|co| co.is_fresh())
174 {
175 Some(co) => co,
176 None => {
177 let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
178 checkout.update_submodules(gctx, quiet)?;
179 guard.mark_ok()?;
180 checkout
181 }
182 };
183
184 Ok(checkout)
185 }
186
187 /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
188 pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
189 let obj = self.repo.find_object(revision, None)?;
190 Ok(GitShortID(obj.short_id()?))
191 }
192
193 /// Checks if the database contains the object of this `oid`..
194 pub fn contains(&self, oid: git2::Oid) -> bool {
195 self.repo.revparse_single(&oid.to_string()).is_ok()
196 }
197
198 /// [`resolve_ref`]s this reference with this database.
199 pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
200 resolve_ref(r, &self.repo)
201 }
202}
203
204/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
205pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
206 let id = match gitref {
207 // Note that we resolve the named tag here in sync with where it's
208 // fetched into via `fetch` below.
209 GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
210 let refname = format!("refs/remotes/origin/tags/{}", s);
211 let id = repo.refname_to_id(&refname)?;
212 let obj = repo.find_object(id, None)?;
213 let obj = obj.peel(ObjectType::Commit)?;
214 Ok(obj.id())
215 })()
216 .with_context(|| format!("failed to find tag `{}`", s))?,
217
218 // Resolve the remote name since that's all we're configuring in
219 // `fetch` below.
220 GitReference::Branch(s) => {
221 let name = format!("origin/{}", s);
222 let b = repo
223 .find_branch(&name, git2::BranchType::Remote)
224 .with_context(|| format!("failed to find branch `{}`", s))?;
225 b.get()
226 .target()
227 .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
228 }
229
230 // We'll be using the HEAD commit
231 GitReference::DefaultBranch => {
232 let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
233 let head = repo.find_object(head_id, None)?;
234 head.peel(ObjectType::Commit)?.id()
235 }
236
237 GitReference::Rev(s) => {
238 let obj = repo.revparse_single(s)?;
239 match obj.as_tag() {
240 Some(tag) => tag.target_id(),
241 None => obj.id(),
242 }
243 }
244 };
245 Ok(id)
246}
247
248impl<'a> GitCheckout<'a> {
249 /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
250 /// is done. Use [`GitCheckout::is_fresh`] to check.
251 ///
252 /// * The `database` is where this checkout is from.
253 /// * The `repo` will be the checked out Git repository.
254 fn new(
255 database: &'a GitDatabase,
256 revision: git2::Oid,
257 repo: git2::Repository,
258 ) -> GitCheckout<'a> {
259 let path = repo.workdir().unwrap_or_else(|| repo.path());
260 GitCheckout {
261 path: path.to_path_buf(),
262 database,
263 revision,
264 repo,
265 }
266 }
267
268 /// Gets the remote repository URL.
269 fn remote_url(&self) -> &Url {
270 &self.database.remote.url()
271 }
272
273 /// Clone a repo for a `revision` into a local path from a `database`.
274 /// This is a filesystem-to-filesystem clone.
275 fn clone_into(
276 into: &Path,
277 database: &'a GitDatabase,
278 revision: git2::Oid,
279 gctx: &GlobalContext,
280 ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
281 let dirname = into.parent().unwrap();
282 paths::create_dir_all(&dirname)?;
283 if into.exists() {
284 paths::remove_dir_all(into)?;
285 }
286
287 // we're doing a local filesystem-to-filesystem clone so there should
288 // be no need to respect global configuration options, so pass in
289 // an empty instance of `git2::Config` below.
290 let git_config = git2::Config::new()?;
291
292 // Clone the repository, but make sure we use the "local" option in
293 // libgit2 which will attempt to use hardlinks to set up the database.
294 // This should speed up the clone operation quite a bit if it works.
295 //
296 // Note that we still use the same fetch options because while we don't
297 // need authentication information we may want progress bars and such.
298 let url = database.path.into_url()?;
299 let mut repo = None;
300 with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
301 let mut checkout = git2::build::CheckoutBuilder::new();
302 checkout.dry_run(); // we'll do this below during a `reset`
303
304 let r = git2::build::RepoBuilder::new()
305 // use hard links and/or copy the database, we're doing a
306 // filesystem clone so this'll speed things up quite a bit.
307 .clone_local(git2::build::CloneLocal::Local)
308 .with_checkout(checkout)
309 .fetch_options(fopts)
310 .clone(url.as_str(), into)?;
311 // `git2` doesn't seem to handle shallow repos correctly when doing
312 // a local clone. Fortunately all that's needed is the copy of the
313 // one file that defines the shallow boundary, the commits which
314 // have their parents omitted as part of the shallow clone.
315 //
316 // TODO(git2): remove this when git2 supports shallow clone correctly
317 if database.repo.is_shallow() {
318 std::fs::copy(
319 database.repo.path().join("shallow"),
320 r.path().join("shallow"),
321 )?;
322 }
323 repo = Some(r);
324 Ok(())
325 })?;
326 let repo = repo.unwrap();
327
328 let checkout = GitCheckout::new(database, revision, repo);
329 let guard = checkout.reset(gctx)?;
330 Ok((checkout, guard))
331 }
332
333 /// Checks if the `HEAD` of this checkout points to the expected revision.
334 fn is_fresh(&self) -> bool {
335 match self.repo.revparse_single("HEAD") {
336 Ok(ref head) if head.id() == self.revision => {
337 // See comments in reset() for why we check this
338 self.path.join(CHECKOUT_READY_LOCK).exists()
339 }
340 _ => false,
341 }
342 }
343
344 /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
345 /// revision of this checkout, with additional interrupt protection by a
346 /// dummy file [`CHECKOUT_READY_LOCK`].
347 ///
348 /// If we're interrupted while performing a `git reset` (e.g., we die
349 /// because of a signal) Cargo needs to be sure to try to check out this
350 /// repo again on the next go-round.
351 ///
352 /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
353 /// which if present means that the repo has been successfully reset and is
354 /// ready to go. Hence if we start to do a reset, we make sure this file
355 /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
356 /// (e.g. submodule update) before marking the check-out as ready.
357 ///
358 /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
359 fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
360 let guard = CheckoutGuard::guard(&self.path);
361 info!("reset {} to {}", self.repo.path().display(), self.revision);
362
363 // Ensure libgit2 won't mess with newlines when we vendor.
364 if let Ok(mut git_config) = self.repo.config() {
365 git_config.set_bool("core.autocrlf", false)?;
366 }
367
368 let object = self.repo.find_object(self.revision, None)?;
369 reset(&self.repo, &object, gctx)?;
370
371 Ok(guard)
372 }
373
374 /// Like `git submodule update --recursive` but for this git checkout.
375 ///
376 /// This function respects `submodule.<name>.update = none`[^1] git config.
377 /// Submodules set to `none` won't be fetched.
378 ///
379 /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
380 fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> {
381 return update_submodules(&self.repo, gctx, quiet, self.remote_url().as_str());
382
383 /// Recursive helper for [`GitCheckout::update_submodules`].
384 fn update_submodules(
385 repo: &git2::Repository,
386 gctx: &GlobalContext,
387 quiet: bool,
388 parent_remote_url: &str,
389 ) -> CargoResult<()> {
390 debug!("update submodules for: {:?}", repo.workdir().unwrap());
391
392 for mut child in repo.submodules()? {
393 update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context(
394 || {
395 format!(
396 "failed to update submodule `{}`",
397 child.name().unwrap_or("")
398 )
399 },
400 )?;
401 }
402 Ok(())
403 }
404
405 /// Update a single Git submodule, and recurse into its submodules.
406 fn update_submodule(
407 parent: &git2::Repository,
408 child: &mut git2::Submodule<'_>,
409 gctx: &GlobalContext,
410 quiet: bool,
411 parent_remote_url: &str,
412 ) -> CargoResult<()> {
413 child.init(false)?;
414
415 let child_url_str = child.url().ok_or_else(|| {
416 anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
417 })?;
418
419 // Skip the submodule if the config says not to update it.
420 if child.update_strategy() == git2::SubmoduleUpdate::None {
421 gctx.shell().status(
422 "Skipping",
423 format!(
424 "git submodule `{}` due to update strategy in .gitmodules",
425 child_url_str
426 ),
427 )?;
428 return Ok(());
429 }
430
431 let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
432
433 // A submodule which is listed in .gitmodules but not actually
434 // checked out will not have a head id, so we should ignore it.
435 let Some(head) = child.head_id() else {
436 return Ok(());
437 };
438
439 // If the submodule hasn't been checked out yet, we need to
440 // clone it. If it has been checked out and the head is the same
441 // as the submodule's head, then we can skip an update and keep
442 // recursing.
443 let head_and_repo = child.open().and_then(|repo| {
444 let target = repo.head()?.target();
445 Ok((target, repo))
446 });
447 let repo = match head_and_repo {
448 Ok((head, repo)) => {
449 if child.head_id() == head {
450 return update_submodules(&repo, gctx, quiet, &child_remote_url);
451 }
452 repo
453 }
454 Err(..) => {
455 let path = parent.workdir().unwrap().join(child.path());
456 let _ = paths::remove_dir_all(&path);
457 init(&path, false)?
458 }
459 };
460 // Fetch submodule database and checkout to target revision
461 let reference = GitReference::Rev(head.to_string());
462
463 // GitSource created from SourceId without git precise will result to
464 // locked_rev being Deferred and fetch_db always try to fetch if online
465 let source_id = SourceId::for_git(&child_remote_url.into_url()?, reference)?
466 .with_git_precise(Some(head.to_string()));
467
468 let mut source = GitSource::new(source_id, gctx)?;
469 source.set_quiet(quiet);
470
471 let (db, actual_rev) = source.fetch_db(true).with_context(|| {
472 let name = child.name().unwrap_or("");
473 format!("failed to fetch submodule `{name}` from {child_remote_url}",)
474 })?;
475 db.copy_to(actual_rev, repo.path(), gctx, quiet)?;
476 Ok(())
477 }
478 }
479}
480
481/// See [`GitCheckout::reset`] for rationale on this type.
482#[must_use]
483struct CheckoutGuard {
484 ok_file: PathBuf,
485}
486
487impl CheckoutGuard {
488 fn guard(path: &Path) -> Self {
489 let ok_file = path.join(CHECKOUT_READY_LOCK);
490 let _ = paths::remove_file(&ok_file);
491 Self { ok_file }
492 }
493
494 fn mark_ok(self) -> CargoResult<()> {
495 let _ = paths::create(self.ok_file)?;
496 Ok(())
497 }
498}
499
500/// Constructs an absolute URL for a child submodule URL with its parent base URL.
501///
502/// Git only assumes a submodule URL is a relative path if it starts with `./`
503/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
504/// submodule URL.
505///
506/// At this moment it comes with some limitations:
507///
508/// * GitHub doesn't accept non-normalized URLs with relative paths.
509/// (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
510/// * `url` crate cannot parse SCP-like URLs.
511/// (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
512///
513/// To overcome these, this patch always tries [`Url::parse`] first to normalize
514/// the path. If it couldn't, append the relative path as the last resort and
515/// pray the remote git service supports non-normalized URLs.
516///
517/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
518///
519/// [^1]: <https://git-scm.com/docs/git-submodule>
520fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
521 let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
522 match Url::parse(base_url) {
523 Ok(mut base_url) => {
524 let path = base_url.path();
525 if !path.ends_with('/') {
526 base_url.set_path(&format!("{path}/"));
527 }
528 let absolute_url = base_url.join(submodule_url).with_context(|| {
529 format!(
530 "failed to parse relative child submodule url `{submodule_url}` \
531 using parent base url `{base_url}`"
532 )
533 })?;
534 Cow::from(absolute_url.to_string())
535 }
536 Err(_) => {
537 let mut absolute_url = base_url.to_string();
538 if !absolute_url.ends_with('/') {
539 absolute_url.push('/');
540 }
541 absolute_url.push_str(submodule_url);
542 Cow::from(absolute_url)
543 }
544 }
545 } else {
546 Cow::from(submodule_url)
547 };
548
549 Ok(absolute_url)
550}
551
552/// Prepare the authentication callbacks for cloning a git repository.
553///
554/// The main purpose of this function is to construct the "authentication
555/// callback" which is used to clone a repository. This callback will attempt to
556/// find the right authentication on the system (without user input) and will
557/// guide libgit2 in doing so.
558///
559/// The callback is provided `allowed` types of credentials, and we try to do as
560/// much as possible based on that:
561///
562/// * Prioritize SSH keys from the local ssh agent as they're likely the most
563/// reliable. The username here is prioritized from the credential
564/// callback, then from whatever is configured in git itself, and finally
565/// we fall back to the generic user of `git`.
566///
567/// * If a username/password is allowed, then we fallback to git2-rs's
568/// implementation of the credential helper. This is what is configured
569/// with `credential.helper` in git, and is the interface for the macOS
570/// keychain, for example.
571///
572/// * After the above two have failed, we just kinda grapple attempting to
573/// return *something*.
574///
575/// If any form of authentication fails, libgit2 will repeatedly ask us for
576/// credentials until we give it a reason to not do so. To ensure we don't
577/// just sit here looping forever we keep track of authentications we've
578/// attempted and we don't try the same ones again.
579fn with_authentication<T, F>(
580 gctx: &GlobalContext,
581 url: &str,
582 cfg: &git2::Config,
583 mut f: F,
584) -> CargoResult<T>
585where
586 F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
587{
588 let mut cred_helper = git2::CredentialHelper::new(url);
589 cred_helper.config(cfg);
590
591 let mut ssh_username_requested = false;
592 let mut cred_helper_bad = None;
593 let mut ssh_agent_attempts = Vec::new();
594 let mut any_attempts = false;
595 let mut tried_sshkey = false;
596 let mut url_attempt = None;
597
598 let orig_url = url;
599 let mut res = f(&mut |url, username, allowed| {
600 any_attempts = true;
601 if url != orig_url {
602 url_attempt = Some(url.to_string());
603 }
604 // libgit2's "USERNAME" authentication actually means that it's just
605 // asking us for a username to keep going. This is currently only really
606 // used for SSH authentication and isn't really an authentication type.
607 // The logic currently looks like:
608 //
609 // let user = ...;
610 // if (user.is_null())
611 // user = callback(USERNAME, null, ...);
612 //
613 // callback(SSH_KEY, user, ...)
614 //
615 // So if we're being called here then we know that (a) we're using ssh
616 // authentication and (b) no username was specified in the URL that
617 // we're trying to clone. We need to guess an appropriate username here,
618 // but that may involve a few attempts. Unfortunately we can't switch
619 // usernames during one authentication session with libgit2, so to
620 // handle this we bail out of this authentication session after setting
621 // the flag `ssh_username_requested`, and then we handle this below.
622 if allowed.contains(git2::CredentialType::USERNAME) {
623 debug_assert!(username.is_none());
624 ssh_username_requested = true;
625 return Err(git2::Error::from_str("gonna try usernames later"));
626 }
627
628 // An "SSH_KEY" authentication indicates that we need some sort of SSH
629 // authentication. This can currently either come from the ssh-agent
630 // process or from a raw in-memory SSH key. Cargo only supports using
631 // ssh-agent currently.
632 //
633 // If we get called with this then the only way that should be possible
634 // is if a username is specified in the URL itself (e.g., `username` is
635 // Some), hence the unwrap() here. We try custom usernames down below.
636 if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
637 // If ssh-agent authentication fails, libgit2 will keep
638 // calling this callback asking for other authentication
639 // methods to try. Make sure we only try ssh-agent once,
640 // to avoid looping forever.
641 tried_sshkey = true;
642 let username = username.unwrap();
643 debug_assert!(!ssh_username_requested);
644 ssh_agent_attempts.push(username.to_string());
645 return git2::Cred::ssh_key_from_agent(username);
646 }
647
648 // Sometimes libgit2 will ask for a username/password in plaintext. This
649 // is where Cargo would have an interactive prompt if we supported it,
650 // but we currently don't! Right now the only way we support fetching a
651 // plaintext password is through the `credential.helper` support, so
652 // fetch that here.
653 //
654 // If ssh-agent authentication fails, libgit2 will keep calling this
655 // callback asking for other authentication methods to try. Check
656 // cred_helper_bad to make sure we only try the git credential helper
657 // once, to avoid looping forever.
658 if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
659 {
660 let r = git2::Cred::credential_helper(cfg, url, username);
661 cred_helper_bad = Some(r.is_err());
662 return r;
663 }
664
665 // I'm... not sure what the DEFAULT kind of authentication is, but seems
666 // easy to support?
667 if allowed.contains(git2::CredentialType::DEFAULT) {
668 return git2::Cred::default();
669 }
670
671 // Whelp, we tried our best
672 Err(git2::Error::from_str("no authentication methods succeeded"))
673 });
674
675 // Ok, so if it looks like we're going to be doing ssh authentication, we
676 // want to try a few different usernames as one wasn't specified in the URL
677 // for us to use. In order, we'll try:
678 //
679 // * A credential helper's username for this URL, if available.
680 // * This account's username.
681 // * "git"
682 //
683 // We have to restart the authentication session each time (due to
684 // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
685 // call our callback, `f`, in a loop here.
686 if ssh_username_requested {
687 debug_assert!(res.is_err());
688 let mut attempts = vec![String::from("git")];
689 if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
690 attempts.push(s.to_string());
691 }
692 if let Some(ref s) = cred_helper.username {
693 attempts.push(s.clone());
694 }
695
696 while let Some(s) = attempts.pop() {
697 // We should get `USERNAME` first, where we just return our attempt,
698 // and then after that we should get `SSH_KEY`. If the first attempt
699 // fails we'll get called again, but we don't have another option so
700 // we bail out.
701 let mut attempts = 0;
702 res = f(&mut |_url, username, allowed| {
703 if allowed.contains(git2::CredentialType::USERNAME) {
704 return git2::Cred::username(&s);
705 }
706 if allowed.contains(git2::CredentialType::SSH_KEY) {
707 debug_assert_eq!(Some(&s[..]), username);
708 attempts += 1;
709 if attempts == 1 {
710 ssh_agent_attempts.push(s.to_string());
711 return git2::Cred::ssh_key_from_agent(&s);
712 }
713 }
714 Err(git2::Error::from_str("no authentication methods succeeded"))
715 });
716
717 // If we made two attempts then that means:
718 //
719 // 1. A username was requested, we returned `s`.
720 // 2. An ssh key was requested, we returned to look up `s` in the
721 // ssh agent.
722 // 3. For whatever reason that lookup failed, so we were asked again
723 // for another mode of authentication.
724 //
725 // Essentially, if `attempts == 2` then in theory the only error was
726 // that this username failed to authenticate (e.g., no other network
727 // errors happened). Otherwise something else is funny so we bail
728 // out.
729 if attempts != 2 {
730 break;
731 }
732 }
733 }
734 let mut err = match res {
735 Ok(e) => return Ok(e),
736 Err(e) => e,
737 };
738
739 // In the case of an authentication failure (where we tried something) then
740 // we try to give a more helpful error message about precisely what we
741 // tried.
742 if any_attempts {
743 let mut msg = "failed to authenticate when downloading \
744 repository"
745 .to_string();
746
747 if let Some(attempt) = &url_attempt {
748 if url != attempt {
749 msg.push_str(": ");
750 msg.push_str(attempt);
751 }
752 }
753 msg.push('\n');
754 if !ssh_agent_attempts.is_empty() {
755 let names = ssh_agent_attempts
756 .iter()
757 .map(|s| format!("`{}`", s))
758 .collect::<Vec<_>>()
759 .join(", ");
760 msg.push_str(&format!(
761 "\n* attempted ssh-agent authentication, but \
762 no usernames succeeded: {}",
763 names
764 ));
765 }
766 if let Some(failed_cred_helper) = cred_helper_bad {
767 if failed_cred_helper {
768 msg.push_str(
769 "\n* attempted to find username/password via \
770 git's `credential.helper` support, but failed",
771 );
772 } else {
773 msg.push_str(
774 "\n* attempted to find username/password via \
775 `credential.helper`, but maybe the found \
776 credentials were incorrect",
777 );
778 }
779 }
780 msg.push_str("\n\n");
781 msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
782 msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
783 err = err.context(msg);
784
785 // Otherwise if we didn't even get to the authentication phase them we may
786 // have failed to set up a connection, in these cases hint on the
787 // `net.git-fetch-with-cli` configuration option.
788 } else if let Some(e) = err.downcast_ref::<git2::Error>() {
789 match e.class() {
790 ErrorClass::Net
791 | ErrorClass::Ssl
792 | ErrorClass::Submodule
793 | ErrorClass::FetchHead
794 | ErrorClass::Ssh
795 | ErrorClass::Http => {
796 let msg = format!(
797 concat!(
798 "network failure seems to have happened\n",
799 "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
800 "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
801 "{}"
802 ),
803 note_github_pull_request(url).unwrap_or_default()
804 );
805 err = err.context(msg);
806 }
807 ErrorClass::Callback => {
808 // This unwraps the git2 error. We're using the callback error
809 // specifically to convey errors from Rust land through the C
810 // callback interface. We don't need the `; class=Callback
811 // (26)` that gets tacked on to the git2 error message.
812 err = anyhow::format_err!("{}", e.message());
813 }
814 _ => {}
815 }
816 }
817
818 Err(err)
819}
820
821/// `git reset --hard` to the given `obj` for the `repo`.
822///
823/// The `obj` is a commit-ish to which the head should be moved.
824fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
825 let mut pb = Progress::new("Checkout", gctx);
826 let mut opts = git2::build::CheckoutBuilder::new();
827 opts.progress(|_, cur, max| {
828 drop(pb.tick(cur, max, ""));
829 });
830 debug!("doing reset");
831 repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
832 debug!("reset done");
833 Ok(())
834}
835
836/// Prepares the callbacks for fetching a git repository.
837///
838/// The main purpose of this function is to construct everything before a fetch.
839/// This will attempt to setup a progress bar, the authentication for git,
840/// ssh known hosts check, and the network retry mechanism.
841///
842/// The callback is provided a fetch options, which can be used by the actual
843/// git fetch.
844pub fn with_fetch_options(
845 git_config: &git2::Config,
846 url: &str,
847 gctx: &GlobalContext,
848 cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
849) -> CargoResult<()> {
850 let mut progress = Progress::new("Fetch", gctx);
851 let ssh_config = gctx.net_config()?.ssh.as_ref();
852 let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
853 let diagnostic_home_config = gctx.diagnostic_home_config();
854 network::retry::with_retry(gctx, || {
855 // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
856 // so we store the error additionally and unwrap it later
857 let mut check_cb_result = Ok(());
858 let auth_result = with_authentication(gctx, url, git_config, |f| {
859 let port = Url::parse(url).ok().and_then(|url| url.port());
860 let mut last_update = Instant::now();
861 let mut rcb = git2::RemoteCallbacks::new();
862 // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
863 // sliding window for tracking the data transfer rate (in bytes/s).
864 let mut counter = MetricsCounter::<10>::new(0, last_update);
865 rcb.credentials(f);
866 rcb.certificate_check(|cert, host| {
867 match super::known_hosts::certificate_check(
868 gctx,
869 cert,
870 host,
871 port,
872 config_known_hosts,
873 &diagnostic_home_config,
874 ) {
875 Ok(status) => Ok(status),
876 Err(e) => {
877 check_cb_result = Err(e);
878 // This is not really used because it'll be overridden by libgit2
879 // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
880 Err(git2::Error::from_str(
881 "invalid or unknown remote ssh hostkey",
882 ))
883 }
884 }
885 });
886 rcb.transfer_progress(|stats| {
887 let indexed_deltas = stats.indexed_deltas();
888 let msg = if indexed_deltas > 0 {
889 // Resolving deltas.
890 format!(
891 ", ({}/{}) resolving deltas",
892 indexed_deltas,
893 stats.total_deltas()
894 )
895 } else {
896 // Receiving objects.
897 //
898 // # Caveat
899 //
900 // Progress bar relies on git2 calling `transfer_progress`
901 // to update its transfer rate, but we cannot guarantee a
902 // periodic call of that callback. Thus if we don't receive
903 // any data for, say, 10 seconds, the rate will get stuck
904 // and never go down to 0B/s.
905 // In the future, we need to find away to update the rate
906 // even when the callback is not called.
907 let now = Instant::now();
908 // Scrape a `received_bytes` to the counter every 300ms.
909 if now - last_update > Duration::from_millis(300) {
910 counter.add(stats.received_bytes(), now);
911 last_update = now;
912 }
913 let rate = HumanBytes(counter.rate() as u64);
914 format!(", {rate:.2}/s")
915 };
916 progress
917 .tick(stats.indexed_objects(), stats.total_objects(), &msg)
918 .is_ok()
919 });
920
921 // Create a local anonymous remote in the repository to fetch the
922 // url
923 let mut opts = git2::FetchOptions::new();
924 opts.remote_callbacks(rcb);
925 cb(opts)
926 });
927 if auth_result.is_err() {
928 check_cb_result?;
929 }
930 auth_result?;
931 Ok(())
932 })
933}
934
935/// Attempts to fetch the given git `reference` for a Git repository.
936///
937/// This is the main entry for git clone/fetch. It does the followings:
938///
939/// * Turns [`GitReference`] into refspecs accordingly.
940/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
941///
942/// The `remote_url` argument is the git remote URL where we want to fetch from.
943///
944/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
945/// at this time. It could be extended when libgit2 supports shallow clones.
946///
947/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
948pub fn fetch(
949 repo: &mut git2::Repository,
950 remote_url: &str,
951 reference: &GitReference,
952 gctx: &GlobalContext,
953 remote_kind: RemoteKind,
954) -> CargoResult<()> {
955 if let Some(offline_flag) = gctx.offline_flag() {
956 anyhow::bail!(
957 "attempting to update a git repository, but {offline_flag} \
958 was specified"
959 )
960 }
961
962 let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
963
964 // Flag to keep track if the rev is a full commit hash
965 let mut fast_path_rev: bool = false;
966
967 let oid_to_fetch = match github_fast_path(repo, remote_url, reference, gctx) {
968 Ok(FastPathRev::UpToDate) => return Ok(()),
969 Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
970 Ok(FastPathRev::Indeterminate) => None,
971 Err(e) => {
972 debug!("failed to check github {:?}", e);
973 None
974 }
975 };
976
977 maybe_gc_repo(repo, gctx)?;
978
979 clean_repo_temp_files(repo);
980
981 // Translate the reference desired here into an actual list of refspecs
982 // which need to get fetched. Additionally record if we're fetching tags.
983 let mut refspecs = Vec::new();
984 let mut tags = false;
985 // The `+` symbol on the refspec means to allow a forced (fast-forward)
986 // update which is needed if there is ever a force push that requires a
987 // fast-forward.
988 match reference {
989 // For branches and tags we can fetch simply one reference and copy it
990 // locally, no need to fetch other branches/tags.
991 GitReference::Branch(b) => {
992 refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
993 }
994
995 GitReference::Tag(t) => {
996 refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
997 }
998
999 GitReference::DefaultBranch => {
1000 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1001 }
1002
1003 GitReference::Rev(rev) => {
1004 if rev.starts_with("refs/") {
1005 refspecs.push(format!("+{0}:{0}", rev));
1006 } else if let Some(oid_to_fetch) = oid_to_fetch {
1007 fast_path_rev = true;
1008 refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1009 } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1010 && rev_to_oid(rev).is_some()
1011 {
1012 // There is a specific commit to fetch and we will do so in shallow-mode only
1013 // to not disturb the previous logic.
1014 // Note that with typical settings for shallowing, we will just fetch a single `rev`
1015 // as single commit.
1016 // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1017 // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1018 refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1019 } else {
1020 // We don't know what the rev will point to. To handle this
1021 // situation we fetch all branches and tags, and then we pray
1022 // it's somewhere in there.
1023 refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1024 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1025 tags = true;
1026 }
1027 }
1028 }
1029
1030 debug!("doing a fetch for {remote_url}");
1031 let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1032 fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
1033 } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1034 fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1035 } else {
1036 fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1037 };
1038
1039 if fast_path_rev {
1040 if let Some(oid) = oid_to_fetch {
1041 return result.with_context(|| format!("revision {} not found", oid));
1042 }
1043 }
1044 result
1045}
1046
1047/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1048/// files atomically.
1049/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1050/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1051fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1052 matches!(
1053 err,
1054 gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1055 gix::protocol::fetch::Error::LockShallowFile(_)
1056 ))
1057 )
1058}
1059
1060/// Attempts to use `git` CLI installed on the system to fetch a repository,
1061/// when the config value [`net.git-fetch-with-cli`][1] is set.
1062///
1063/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1064/// when compared to the `git` command line. As a result, allow an escape
1065/// hatch for users that would prefer to use `git`-the-CLI for fetching
1066/// repositories instead of `libgit2`-the-library. This should make more
1067/// flavors of authentication possible while also still giving us all the
1068/// speed and portability of using `libgit2`.
1069///
1070/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1071fn fetch_with_cli(
1072 repo: &mut git2::Repository,
1073 url: &str,
1074 refspecs: &[String],
1075 tags: bool,
1076 shallow: gix::remote::fetch::Shallow,
1077 gctx: &GlobalContext,
1078) -> CargoResult<()> {
1079 debug!(target: "git-fetch", backend = "git-cli");
1080
1081 let mut cmd = ProcessBuilder::new("git");
1082 cmd.arg("fetch");
1083 if tags {
1084 cmd.arg("--tags");
1085 } else {
1086 cmd.arg("--no-tags");
1087 }
1088 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1089 let depth = 0i32.saturating_add_unsigned(depth.get());
1090 cmd.arg(format!("--depth={depth}"));
1091 }
1092 match gctx.shell().verbosity() {
1093 Verbosity::Normal => {}
1094 Verbosity::Verbose => {
1095 cmd.arg("--verbose");
1096 }
1097 Verbosity::Quiet => {
1098 cmd.arg("--quiet");
1099 }
1100 }
1101 cmd.arg("--force") // handle force pushes
1102 .arg("--update-head-ok") // see discussion in #2078
1103 .arg(url)
1104 .args(refspecs)
1105 // If cargo is run by git (for example, the `exec` command in `git
1106 // rebase`), the GIT_DIR is set by git and will point to the wrong
1107 // location. This makes sure GIT_DIR is always the repository path.
1108 .env("GIT_DIR", repo.path())
1109 // The reset of these may not be necessary, but I'm including them
1110 // just to be extra paranoid and avoid any issues.
1111 .env_remove("GIT_WORK_TREE")
1112 .env_remove("GIT_INDEX_FILE")
1113 .env_remove("GIT_OBJECT_DIRECTORY")
1114 .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1115 .cwd(repo.path());
1116 gctx.shell()
1117 .verbose(|s| s.status("Running", &cmd.to_string()))?;
1118 network::retry::with_retry(gctx, || {
1119 cmd.exec()
1120 .map_err(|error| GitCliError::new(error, true).into())
1121 })?;
1122
1123 Ok(())
1124}
1125
1126fn fetch_with_gitoxide(
1127 repo: &mut git2::Repository,
1128 remote_url: &str,
1129 refspecs: Vec<String>,
1130 tags: bool,
1131 shallow: gix::remote::fetch::Shallow,
1132 gctx: &GlobalContext,
1133) -> CargoResult<()> {
1134 debug!(target: "git-fetch", backend = "gitoxide");
1135
1136 let git2_repo = repo;
1137 let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1138 let repo_reinitialized = AtomicBool::default();
1139 let res = oxide::with_retry_and_progress(
1140 git2_repo.path(),
1141 gctx,
1142 remote_url,
1143 &|repo_path,
1144 should_interrupt,
1145 mut progress,
1146 url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1147 // The `fetch` operation here may fail spuriously due to a corrupt
1148 // repository. It could also fail, however, for a whole slew of other
1149 // reasons (aka network related reasons). We want Cargo to automatically
1150 // recover from corrupt repositories, but we don't want Cargo to stomp
1151 // over other legitimate errors.
1152 //
1153 // Consequently we save off the error of the `fetch` operation and if it
1154 // looks like a "corrupt repo" error then we blow away the repo and try
1155 // again. If it looks like any other kind of error, or if we've already
1156 // blown away the repository, then we want to return the error as-is.
1157 loop {
1158 let res = oxide::open_repo(
1159 repo_path,
1160 config_overrides.clone(),
1161 oxide::OpenMode::ForFetch,
1162 )
1163 .map_err(crate::sources::git::fetch::Error::from)
1164 .and_then(|repo| {
1165 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1166 let url_for_authentication = &mut *url_for_authentication;
1167 let remote = repo
1168 .remote_at(remote_url)?
1169 .with_fetch_tags(if tags {
1170 gix::remote::fetch::Tags::All
1171 } else {
1172 gix::remote::fetch::Tags::Included
1173 })
1174 .with_refspecs(
1175 refspecs.iter().map(|s| s.as_str()),
1176 gix::remote::Direction::Fetch,
1177 )
1178 .map_err(crate::sources::git::fetch::Error::Other)?;
1179 let url = remote
1180 .url(gix::remote::Direction::Fetch)
1181 .expect("set at init")
1182 .to_owned();
1183 let connection = remote.connect(gix::remote::Direction::Fetch)?;
1184 let mut authenticate = connection.configured_credentials(url)?;
1185 let connection = connection.with_credentials(
1186 move |action: gix::protocol::credentials::helper::Action| {
1187 if let Some(url) = action
1188 .context()
1189 .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1190 {
1191 url_for_authentication(url.as_ref());
1192 }
1193 authenticate(action)
1194 },
1195 );
1196 let outcome = connection
1197 .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1198 .with_shallow(shallow.clone())
1199 .receive(&mut progress, should_interrupt)?;
1200 Ok(outcome)
1201 });
1202 let err = match res {
1203 Ok(_) => break,
1204 Err(e) => e,
1205 };
1206 debug!("fetch failed: {}", err);
1207
1208 if !repo_reinitialized.load(Ordering::Relaxed)
1209 // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1210 // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1211 // folder before writing files into it, or else not even open a directory as git repository (which is
1212 // also handled here).
1213 && err.is_corrupted()
1214 || has_shallow_lock_file(&err)
1215 {
1216 repo_reinitialized.store(true, Ordering::Relaxed);
1217 debug!(
1218 "looks like this is a corrupt repository, reinitializing \
1219 and trying again"
1220 );
1221 if oxide::reinitialize(repo_path).is_ok() {
1222 continue;
1223 }
1224 }
1225
1226 return Err(err.into());
1227 }
1228 Ok(())
1229 },
1230 );
1231 if repo_reinitialized.load(Ordering::Relaxed) {
1232 *git2_repo = git2::Repository::open(git2_repo.path())?;
1233 }
1234 res
1235}
1236
1237fn fetch_with_libgit2(
1238 repo: &mut git2::Repository,
1239 remote_url: &str,
1240 refspecs: Vec<String>,
1241 tags: bool,
1242 shallow: gix::remote::fetch::Shallow,
1243 gctx: &GlobalContext,
1244) -> CargoResult<()> {
1245 debug!(target: "git-fetch", backend = "libgit2");
1246
1247 let git_config = git2::Config::open_default()?;
1248 with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1249 if tags {
1250 opts.download_tags(git2::AutotagOption::All);
1251 }
1252 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1253 opts.depth(0i32.saturating_add_unsigned(depth.get()));
1254 }
1255 // The `fetch` operation here may fail spuriously due to a corrupt
1256 // repository. It could also fail, however, for a whole slew of other
1257 // reasons (aka network related reasons). We want Cargo to automatically
1258 // recover from corrupt repositories, but we don't want Cargo to stomp
1259 // over other legitimate errors.
1260 //
1261 // Consequently we save off the error of the `fetch` operation and if it
1262 // looks like a "corrupt repo" error then we blow away the repo and try
1263 // again. If it looks like any other kind of error, or if we've already
1264 // blown away the repository, then we want to return the error as-is.
1265 let mut repo_reinitialized = false;
1266 loop {
1267 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1268 let res = repo
1269 .remote_anonymous(remote_url)?
1270 .fetch(&refspecs, Some(&mut opts), None);
1271 let err = match res {
1272 Ok(()) => break,
1273 Err(e) => e,
1274 };
1275 debug!("fetch failed: {}", err);
1276
1277 if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1278 {
1279 repo_reinitialized = true;
1280 debug!(
1281 "looks like this is a corrupt repository, reinitializing \
1282 and trying again"
1283 );
1284 if reinitialize(repo).is_ok() {
1285 continue;
1286 }
1287 }
1288
1289 return Err(err.into());
1290 }
1291 Ok(())
1292 })
1293}
1294
1295/// Attempts to `git gc` a repository.
1296///
1297/// Cargo has a bunch of long-lived git repositories in its global cache and
1298/// some, like the index, are updated very frequently. Right now each update
1299/// creates a new "pack file" inside the git database, and over time this can
1300/// cause bad performance and bad current behavior in libgit2.
1301///
1302/// One pathological use case today is where libgit2 opens hundreds of file
1303/// descriptors, getting us dangerously close to blowing out the OS limits of
1304/// how many fds we can have open. This is detailed in [#4403].
1305///
1306/// To try to combat this problem we attempt a `git gc` here. Note, though, that
1307/// we may not even have `git` installed on the system! As a result we
1308/// opportunistically try a `git gc` when the pack directory looks too big, and
1309/// failing that we just blow away the repository and start over.
1310///
1311/// In theory this shouldn't be too expensive compared to the network request
1312/// we're about to issue.
1313///
1314/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1315fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1316 // Here we arbitrarily declare that if you have more than 100 files in your
1317 // `pack` folder that we need to do a gc.
1318 let entries = match repo.path().join("objects/pack").read_dir() {
1319 Ok(e) => e.count(),
1320 Err(_) => {
1321 debug!("skipping gc as pack dir appears gone");
1322 return Ok(());
1323 }
1324 };
1325 let max = gctx
1326 .get_env("__CARGO_PACKFILE_LIMIT")
1327 .ok()
1328 .and_then(|s| s.parse::<usize>().ok())
1329 .unwrap_or(100);
1330 if entries < max {
1331 debug!("skipping gc as there's only {} pack files", entries);
1332 return Ok(());
1333 }
1334
1335 // First up, try a literal `git gc` by shelling out to git. This is pretty
1336 // likely to fail though as we may not have `git` installed. Note that
1337 // libgit2 doesn't currently implement the gc operation, so there's no
1338 // equivalent there.
1339 match Command::new("git")
1340 .arg("gc")
1341 .current_dir(repo.path())
1342 .output()
1343 {
1344 Ok(out) => {
1345 debug!(
1346 "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1347 out.status,
1348 String::from_utf8_lossy(&out.stdout),
1349 String::from_utf8_lossy(&out.stderr)
1350 );
1351 if out.status.success() {
1352 let new = git2::Repository::open(repo.path())?;
1353 *repo = new;
1354 return Ok(());
1355 }
1356 }
1357 Err(e) => debug!("git-gc failed to spawn: {}", e),
1358 }
1359
1360 // Alright all else failed, let's start over.
1361 reinitialize(repo)
1362}
1363
1364/// Removes temporary files left from previous activity.
1365///
1366/// If libgit2 is interrupted while indexing pack files, it will leave behind
1367/// some temporary files that it doesn't clean up. These can be quite large in
1368/// size, so this tries to clean things up.
1369///
1370/// This intentionally ignores errors. This is only an opportunistic cleaning,
1371/// and we don't really care if there are issues (there's unlikely anything
1372/// that can be done).
1373///
1374/// The git CLI has similar behavior (its temp files look like
1375/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1376/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1377/// filenames, so they never get cleaned up.
1378fn clean_repo_temp_files(repo: &git2::Repository) {
1379 let path = repo.path().join("objects/pack/pack_git2_*");
1380 let Some(pattern) = path.to_str() else {
1381 tracing::warn!("cannot convert {path:?} to a string");
1382 return;
1383 };
1384 let Ok(paths) = glob::glob(pattern) else {
1385 return;
1386 };
1387 for path in paths {
1388 if let Ok(path) = path {
1389 match paths::remove_file(&path) {
1390 Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1391 Err(e) => {
1392 tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1393 }
1394 }
1395 }
1396 }
1397}
1398
1399/// Reinitializes a given Git repository. This is useful when a Git repository
1400/// seems corrupted and we want to start over.
1401fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1402 // Here we want to drop the current repository object pointed to by `repo`,
1403 // so we initialize temporary repository in a sub-folder, blow away the
1404 // existing git folder, and then recreate the git repo. Finally we blow away
1405 // the `tmp` folder we allocated.
1406 let path = repo.path().to_path_buf();
1407 debug!("reinitializing git repo at {:?}", path);
1408 let tmp = path.join("tmp");
1409 let bare = !repo.path().ends_with(".git");
1410 *repo = init(&tmp, false)?;
1411 for entry in path.read_dir()? {
1412 let entry = entry?;
1413 if entry.file_name().to_str() == Some("tmp") {
1414 continue;
1415 }
1416 let path = entry.path();
1417 drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1418 }
1419 *repo = init(&path, bare)?;
1420 paths::remove_dir_all(&tmp)?;
1421 Ok(())
1422}
1423
1424/// Initializes a Git repository at `path`.
1425fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1426 let mut opts = git2::RepositoryInitOptions::new();
1427 // Skip anything related to templates, they just call all sorts of issues as
1428 // we really don't want to use them yet they insist on being used. See #6240
1429 // for an example issue that comes up.
1430 opts.external_template(false);
1431 opts.bare(bare);
1432 Ok(git2::Repository::init_opts(&path, &opts)?)
1433}
1434
1435/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1436enum FastPathRev {
1437 /// The local rev (determined by `reference.resolve(repo)`) is already up to
1438 /// date with what this rev resolves to on GitHub's server.
1439 UpToDate,
1440 /// The following SHA must be fetched in order for the local rev to become
1441 /// up to date.
1442 NeedsFetch(Oid),
1443 /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1444 /// and tags from the server and see what happens.
1445 Indeterminate,
1446}
1447
1448/// Attempts GitHub's special fast path for testing if we've already got an
1449/// up-to-date copy of the repository.
1450///
1451/// Updating the index is done pretty regularly so we want it to be as fast as
1452/// possible. For registries hosted on GitHub (like the crates.io index) there's
1453/// a fast path available to use[^1] to tell us that there's no updates to be
1454/// made.
1455///
1456/// Note that this function should never cause an actual failure because it's
1457/// just a fast path. As a result, a caller should ignore `Err` returned from
1458/// this function and move forward on the normal path.
1459///
1460/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1461fn github_fast_path(
1462 repo: &mut git2::Repository,
1463 url: &str,
1464 reference: &GitReference,
1465 gctx: &GlobalContext,
1466) -> CargoResult<FastPathRev> {
1467 let url = Url::parse(url)?;
1468 if !is_github(&url) {
1469 return Ok(FastPathRev::Indeterminate);
1470 }
1471
1472 let local_object = resolve_ref(reference, repo).ok();
1473
1474 let github_branch_name = match reference {
1475 GitReference::Branch(branch) => branch,
1476 GitReference::Tag(tag) => tag,
1477 GitReference::DefaultBranch => "HEAD",
1478 GitReference::Rev(rev) => {
1479 if rev.starts_with("refs/") {
1480 rev
1481 } else if looks_like_commit_hash(rev) {
1482 // `revparse_single` (used by `resolve`) is the only way to turn
1483 // short hash -> long hash, but it also parses other things,
1484 // like branch and tag names, which might coincidentally be
1485 // valid hex.
1486 //
1487 // We only return early if `rev` is a prefix of the object found
1488 // by `revparse_single`. Don't bother talking to GitHub in that
1489 // case, since commit hashes are permanent. If a commit with the
1490 // requested hash is already present in the local clone, its
1491 // contents must be the same as what is on the server for that
1492 // hash.
1493 //
1494 // If `rev` is not found locally by `revparse_single`, we'll
1495 // need GitHub to resolve it and get a hash. If `rev` is found
1496 // but is not a short hash of the found object, it's probably a
1497 // branch and we also need to get a hash from GitHub, in case
1498 // the branch has moved.
1499 if let Some(local_object) = local_object {
1500 if is_short_hash_of(rev, local_object) {
1501 debug!("github fast path already has {local_object}");
1502 return Ok(FastPathRev::UpToDate);
1503 }
1504 }
1505 // If `rev` is a full commit hash, the only thing it can resolve
1506 // to is itself. Don't bother talking to GitHub in that case
1507 // either. (This ensures that we always attempt to fetch the
1508 // commit directly even if we can't reach the GitHub API.)
1509 if let Some(oid) = rev_to_oid(rev) {
1510 debug!("github fast path is already a full commit hash {rev}");
1511 return Ok(FastPathRev::NeedsFetch(oid));
1512 }
1513 rev
1514 } else {
1515 debug!("can't use github fast path with `rev = \"{}\"`", rev);
1516 return Ok(FastPathRev::Indeterminate);
1517 }
1518 }
1519 };
1520
1521 // This expects GitHub urls in the form `github.com/user/repo` and nothing
1522 // else
1523 let mut pieces = url
1524 .path_segments()
1525 .ok_or_else(|| anyhow!("no path segments on url"))?;
1526 let username = pieces
1527 .next()
1528 .ok_or_else(|| anyhow!("couldn't find username"))?;
1529 let repository = pieces
1530 .next()
1531 .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1532 if pieces.next().is_some() {
1533 anyhow::bail!("too many segments on URL");
1534 }
1535
1536 // Trim off the `.git` from the repository, if present, since that's
1537 // optional for GitHub and won't work when we try to use the API as well.
1538 let repository = repository.strip_suffix(".git").unwrap_or(repository);
1539
1540 let url = format!(
1541 "https://api.github.com/repos/{}/{}/commits/{}",
1542 username, repository, github_branch_name,
1543 );
1544 let mut handle = gctx.http()?.lock().unwrap();
1545 debug!("attempting GitHub fast path for {}", url);
1546 handle.get(true)?;
1547 handle.url(&url)?;
1548 handle.useragent("cargo")?;
1549 handle.follow_location(true)?; // follow redirects
1550 handle.http_headers({
1551 let mut headers = List::new();
1552 headers.append("Accept: application/vnd.github.3.sha")?;
1553 if let Some(local_object) = local_object {
1554 headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1555 }
1556 headers
1557 })?;
1558
1559 let mut response_body = Vec::new();
1560 let mut transfer = handle.transfer();
1561 transfer.write_function(|data| {
1562 response_body.extend_from_slice(data);
1563 Ok(data.len())
1564 })?;
1565 transfer.perform()?;
1566 drop(transfer); // end borrow of handle so that response_code can be called
1567
1568 let response_code = handle.response_code()?;
1569 if response_code == 304 {
1570 debug!("github fast path up-to-date");
1571 Ok(FastPathRev::UpToDate)
1572 } else if response_code == 200
1573 && let Some(oid_to_fetch) = rev_to_oid(str::from_utf8(&response_body)?)
1574 {
1575 // response expected to be a full hash hexstring (40 or 64 chars)
1576 debug!("github fast path fetch {oid_to_fetch}");
1577 Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1578 } else {
1579 // Usually response_code == 404 if the repository does not exist, and
1580 // response_code == 422 if exists but GitHub is unable to resolve the
1581 // requested rev.
1582 debug!("github fast path bad response code {response_code}");
1583 Ok(FastPathRev::Indeterminate)
1584 }
1585}
1586
1587/// Whether a `url` is one from GitHub.
1588fn is_github(url: &Url) -> bool {
1589 url.host_str() == Some("github.com")
1590}
1591
1592// Give some messages on GitHub PR URL given as is
1593pub(crate) fn note_github_pull_request(url: &str) -> Option<String> {
1594 if let Ok(url) = url.parse::<Url>()
1595 && is_github(&url)
1596 {
1597 let path_segments = url
1598 .path_segments()
1599 .map(|p| p.into_iter().collect::<Vec<_>>())
1600 .unwrap_or_default();
1601 if let [owner, repo, "pull", pr_number, ..] = path_segments[..] {
1602 let repo_url = format!("https://github.com/{owner}/{repo}.git");
1603 let rev = format!("refs/pull/{pr_number}/head");
1604 return Some(format!(
1605 concat!(
1606 "\n\nnote: GitHub url {} is not a repository. \n",
1607 "help: Replace the dependency with \n",
1608 " `git = \"{}\" rev = \"{}\"` \n",
1609 " to specify pull requests as dependencies' revision."
1610 ),
1611 url, repo_url, rev
1612 ));
1613 }
1614 }
1615
1616 None
1617}
1618
1619/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1620fn looks_like_commit_hash(rev: &str) -> bool {
1621 rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1622}
1623
1624/// Whether `rev` is a shorter hash of `oid`.
1625fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1626 let long_hash = oid.to_string();
1627 match long_hash.get(..rev.len()) {
1628 Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1629 None => false,
1630 }
1631}
1632
1633#[cfg(test)]
1634mod tests {
1635 use super::absolute_submodule_url;
1636
1637 #[test]
1638 fn test_absolute_submodule_url() {
1639 let cases = [
1640 (
1641 "ssh://git@gitub.com/rust-lang/cargo",
1642 "git@github.com:rust-lang/cargo.git",
1643 "git@github.com:rust-lang/cargo.git",
1644 ),
1645 (
1646 "ssh://git@gitub.com/rust-lang/cargo",
1647 "./",
1648 "ssh://git@gitub.com/rust-lang/cargo/",
1649 ),
1650 (
1651 "ssh://git@gitub.com/rust-lang/cargo",
1652 "../",
1653 "ssh://git@gitub.com/rust-lang/",
1654 ),
1655 (
1656 "ssh://git@gitub.com/rust-lang/cargo",
1657 "./foo",
1658 "ssh://git@gitub.com/rust-lang/cargo/foo",
1659 ),
1660 (
1661 "ssh://git@gitub.com/rust-lang/cargo/",
1662 "./foo",
1663 "ssh://git@gitub.com/rust-lang/cargo/foo",
1664 ),
1665 (
1666 "ssh://git@gitub.com/rust-lang/cargo/",
1667 "../foo",
1668 "ssh://git@gitub.com/rust-lang/foo",
1669 ),
1670 (
1671 "ssh://git@gitub.com/rust-lang/cargo",
1672 "../foo",
1673 "ssh://git@gitub.com/rust-lang/foo",
1674 ),
1675 (
1676 "ssh://git@gitub.com/rust-lang/cargo",
1677 "../foo/bar/../baz",
1678 "ssh://git@gitub.com/rust-lang/foo/baz",
1679 ),
1680 (
1681 "git@github.com:rust-lang/cargo.git",
1682 "ssh://git@gitub.com/rust-lang/cargo",
1683 "ssh://git@gitub.com/rust-lang/cargo",
1684 ),
1685 (
1686 "git@github.com:rust-lang/cargo.git",
1687 "./",
1688 "git@github.com:rust-lang/cargo.git/./",
1689 ),
1690 (
1691 "git@github.com:rust-lang/cargo.git",
1692 "../",
1693 "git@github.com:rust-lang/cargo.git/../",
1694 ),
1695 (
1696 "git@github.com:rust-lang/cargo.git",
1697 "./foo",
1698 "git@github.com:rust-lang/cargo.git/./foo",
1699 ),
1700 (
1701 "git@github.com:rust-lang/cargo.git/",
1702 "./foo",
1703 "git@github.com:rust-lang/cargo.git/./foo",
1704 ),
1705 (
1706 "git@github.com:rust-lang/cargo.git",
1707 "../foo",
1708 "git@github.com:rust-lang/cargo.git/../foo",
1709 ),
1710 (
1711 "git@github.com:rust-lang/cargo.git/",
1712 "../foo",
1713 "git@github.com:rust-lang/cargo.git/../foo",
1714 ),
1715 (
1716 "git@github.com:rust-lang/cargo.git",
1717 "../foo/bar/../baz",
1718 "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1719 ),
1720 ];
1721
1722 for (base_url, submodule_url, expected) in cases {
1723 let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1724 assert_eq!(
1725 expected, url,
1726 "base `{base_url}`; submodule `{submodule_url}`"
1727 );
1728 }
1729 }
1730}
1731
1732/// Turns a full commit hash revision into an oid.
1733///
1734/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1735/// Its length must be double to the underlying bytes (40 or 64),
1736/// otherwise libgit2 would happily zero-pad the returned oid.
1737///
1738/// See:
1739///
1740/// * <https://github.com/rust-lang/cargo/issues/13188>
1741/// * <https://github.com/rust-lang/cargo/issues/13968>
1742pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1743 Oid::from_str(rev)
1744 .ok()
1745 .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1746}