cargo/sources/git/utils.rs
1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, SourceId, Verbosity};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::sources::git::source::GitSource;
9use crate::sources::source::Source as _;
10use crate::util::HumanBytes;
11use crate::util::errors::{CargoResult, GitCliError};
12use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
13use anyhow::{Context as _, anyhow};
14use cargo_util::{ProcessBuilder, paths};
15use curl::easy::List;
16use git2::{ErrorClass, ObjectType, Oid};
17use serde::Serialize;
18use serde::ser;
19use std::borrow::Cow;
20use std::fmt;
21use std::path::{Path, PathBuf};
22use std::process::Command;
23use std::str;
24use std::sync::atomic::{AtomicBool, Ordering};
25use std::time::{Duration, Instant};
26use tracing::{debug, info};
27use url::Url;
28
29/// A file indicates that if present, `git reset` has been done and a repo
30/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
31const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
32
33fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
34where
35 T: fmt::Display,
36 S: ser::Serializer,
37{
38 s.collect_str(t)
39}
40
41/// A short abbreviated OID.
42///
43/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
44pub struct GitShortID(git2::Buf);
45
46impl GitShortID {
47 /// Views the short ID as a `str`.
48 pub fn as_str(&self) -> &str {
49 self.0.as_str().unwrap()
50 }
51}
52
53/// A remote repository. It gets cloned into a local [`GitDatabase`].
54#[derive(PartialEq, Clone, Debug, Serialize)]
55pub struct GitRemote {
56 /// URL to a remote repository.
57 #[serde(serialize_with = "serialize_str")]
58 url: Url,
59}
60
61/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
62/// can be cloned from a single [`GitDatabase`].
63pub struct GitDatabase {
64 /// The remote repository where this database is fetched from.
65 remote: GitRemote,
66 /// Path to the root of the underlying Git repository on the local filesystem.
67 path: PathBuf,
68 /// Underlying Git repository instance for this database.
69 repo: git2::Repository,
70}
71
72/// A local checkout of a particular revision from a [`GitDatabase`].
73pub struct GitCheckout<'a> {
74 /// The git database where this checkout is cloned from.
75 database: &'a GitDatabase,
76 /// Path to the root of the underlying Git repository on the local filesystem.
77 path: PathBuf,
78 /// The git revision this checkout is for.
79 revision: git2::Oid,
80 /// Underlying Git repository instance for this checkout.
81 repo: git2::Repository,
82}
83
84impl GitRemote {
85 /// Creates an instance for a remote repository URL.
86 pub fn new(url: &Url) -> GitRemote {
87 GitRemote { url: url.clone() }
88 }
89
90 /// Gets the remote repository URL.
91 pub fn url(&self) -> &Url {
92 &self.url
93 }
94
95 /// Fetches and checkouts to a reference or a revision from this remote
96 /// into a local path.
97 ///
98 /// This ensures that it gets the up-to-date commit when a named reference
99 /// is given (tag, branch, refs/*). Thus, network connection is involved.
100 ///
101 /// If we have a previous instance of [`GitDatabase`] then fetch into that
102 /// if we can. If that can successfully load our revision then we've
103 /// populated the database with the latest version of `reference`, so
104 /// return that database and the rev we resolve to.
105 pub fn checkout(
106 &self,
107 into: &Path,
108 db: Option<GitDatabase>,
109 reference: &GitReference,
110 gctx: &GlobalContext,
111 ) -> CargoResult<(GitDatabase, git2::Oid)> {
112 if let Some(mut db) = db {
113 fetch(
114 &mut db.repo,
115 self.url.as_str(),
116 reference,
117 gctx,
118 RemoteKind::GitDependency,
119 )
120 .with_context(|| format!("failed to fetch into: {}", into.display()))?;
121
122 if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
123 return Ok((db, rev));
124 }
125 }
126
127 // Otherwise start from scratch to handle corrupt git repositories.
128 // After our fetch (which is interpreted as a clone now) we do the same
129 // resolution to figure out what we cloned.
130 if into.exists() {
131 paths::remove_dir_all(into)?;
132 }
133 paths::create_dir_all(into)?;
134 let mut repo = init(into, true)?;
135 fetch(
136 &mut repo,
137 self.url.as_str(),
138 reference,
139 gctx,
140 RemoteKind::GitDependency,
141 )
142 .with_context(|| format!("failed to clone into: {}", into.display()))?;
143 let rev = resolve_ref(reference, &repo)?;
144
145 Ok((
146 GitDatabase {
147 remote: self.clone(),
148 path: into.to_path_buf(),
149 repo,
150 },
151 rev,
152 ))
153 }
154
155 /// Creates a [`GitDatabase`] of this remote at `db_path`.
156 pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
157 let repo = git2::Repository::open(db_path)?;
158 Ok(GitDatabase {
159 remote: self.clone(),
160 path: db_path.to_path_buf(),
161 repo,
162 })
163 }
164}
165
166impl GitDatabase {
167 /// Checkouts to a revision at `dest`ination from this database.
168 #[tracing::instrument(skip(self, gctx))]
169 pub fn copy_to(
170 &self,
171 rev: git2::Oid,
172 dest: &Path,
173 gctx: &GlobalContext,
174 quiet: bool,
175 ) -> CargoResult<GitCheckout<'_>> {
176 // If the existing checkout exists, and it is fresh, use it.
177 // A non-fresh checkout can happen if the checkout operation was
178 // interrupted. In that case, the checkout gets deleted and a new
179 // clone is created.
180 let checkout = match git2::Repository::open(dest)
181 .ok()
182 .map(|repo| GitCheckout::new(self, rev, repo))
183 .filter(|co| co.is_fresh())
184 {
185 Some(co) => co,
186 None => {
187 let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
188 checkout.update_submodules(gctx, quiet)?;
189 guard.mark_ok()?;
190 checkout
191 }
192 };
193
194 Ok(checkout)
195 }
196
197 /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
198 pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
199 let obj = self.repo.find_object(revision, None)?;
200 Ok(GitShortID(obj.short_id()?))
201 }
202
203 /// Checks if the database contains the object of this `oid`..
204 pub fn contains(&self, oid: git2::Oid) -> bool {
205 self.repo.revparse_single(&oid.to_string()).is_ok()
206 }
207
208 /// [`resolve_ref`]s this reference with this database.
209 pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
210 resolve_ref(r, &self.repo)
211 }
212}
213
214/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
215pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
216 let id = match gitref {
217 // Note that we resolve the named tag here in sync with where it's
218 // fetched into via `fetch` below.
219 GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
220 let refname = format!("refs/remotes/origin/tags/{}", s);
221 let id = repo.refname_to_id(&refname)?;
222 let obj = repo.find_object(id, None)?;
223 let obj = obj.peel(ObjectType::Commit)?;
224 Ok(obj.id())
225 })()
226 .with_context(|| format!("failed to find tag `{}`", s))?,
227
228 // Resolve the remote name since that's all we're configuring in
229 // `fetch` below.
230 GitReference::Branch(s) => {
231 let name = format!("origin/{}", s);
232 let b = repo
233 .find_branch(&name, git2::BranchType::Remote)
234 .with_context(|| format!("failed to find branch `{}`", s))?;
235 b.get()
236 .target()
237 .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
238 }
239
240 // We'll be using the HEAD commit
241 GitReference::DefaultBranch => {
242 let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
243 let head = repo.find_object(head_id, None)?;
244 head.peel(ObjectType::Commit)?.id()
245 }
246
247 GitReference::Rev(s) => {
248 let obj = repo.revparse_single(s)?;
249 match obj.as_tag() {
250 Some(tag) => tag.target_id(),
251 None => obj.id(),
252 }
253 }
254 };
255 Ok(id)
256}
257
258impl<'a> GitCheckout<'a> {
259 /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
260 /// is done. Use [`GitCheckout::is_fresh`] to check.
261 ///
262 /// * The `database` is where this checkout is from.
263 /// * The `repo` will be the checked out Git repository.
264 fn new(
265 database: &'a GitDatabase,
266 revision: git2::Oid,
267 repo: git2::Repository,
268 ) -> GitCheckout<'a> {
269 let path = repo.workdir().unwrap_or_else(|| repo.path());
270 GitCheckout {
271 path: path.to_path_buf(),
272 database,
273 revision,
274 repo,
275 }
276 }
277
278 /// Gets the remote repository URL.
279 fn remote_url(&self) -> &Url {
280 &self.database.remote.url()
281 }
282
283 /// Clone a repo for a `revision` into a local path from a `database`.
284 /// This is a filesystem-to-filesystem clone.
285 fn clone_into(
286 into: &Path,
287 database: &'a GitDatabase,
288 revision: git2::Oid,
289 gctx: &GlobalContext,
290 ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
291 let dirname = into.parent().unwrap();
292 paths::create_dir_all(&dirname)?;
293 if into.exists() {
294 paths::remove_dir_all(into)?;
295 }
296
297 // we're doing a local filesystem-to-filesystem clone so there should
298 // be no need to respect global configuration options, so pass in
299 // an empty instance of `git2::Config` below.
300 let git_config = git2::Config::new()?;
301
302 // Clone the repository, but make sure we use the "local" option in
303 // libgit2 which will attempt to use hardlinks to set up the database.
304 // This should speed up the clone operation quite a bit if it works.
305 //
306 // Note that we still use the same fetch options because while we don't
307 // need authentication information we may want progress bars and such.
308 let url = database.path.into_url()?;
309 let mut repo = None;
310 with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
311 let mut checkout = git2::build::CheckoutBuilder::new();
312 checkout.dry_run(); // we'll do this below during a `reset`
313
314 let r = git2::build::RepoBuilder::new()
315 // use hard links and/or copy the database, we're doing a
316 // filesystem clone so this'll speed things up quite a bit.
317 .clone_local(git2::build::CloneLocal::Local)
318 .with_checkout(checkout)
319 .fetch_options(fopts)
320 .clone(url.as_str(), into)?;
321 // `git2` doesn't seem to handle shallow repos correctly when doing
322 // a local clone. Fortunately all that's needed is the copy of the
323 // one file that defines the shallow boundary, the commits which
324 // have their parents omitted as part of the shallow clone.
325 //
326 // TODO(git2): remove this when git2 supports shallow clone correctly
327 if database.repo.is_shallow() {
328 std::fs::copy(
329 database.repo.path().join("shallow"),
330 r.path().join("shallow"),
331 )?;
332 }
333 repo = Some(r);
334 Ok(())
335 })?;
336 let repo = repo.unwrap();
337
338 let checkout = GitCheckout::new(database, revision, repo);
339 let guard = checkout.reset(gctx)?;
340 Ok((checkout, guard))
341 }
342
343 /// Checks if the `HEAD` of this checkout points to the expected revision.
344 fn is_fresh(&self) -> bool {
345 match self.repo.revparse_single("HEAD") {
346 Ok(ref head) if head.id() == self.revision => {
347 // See comments in reset() for why we check this
348 self.path.join(CHECKOUT_READY_LOCK).exists()
349 }
350 _ => false,
351 }
352 }
353
354 /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
355 /// revision of this checkout, with additional interrupt protection by a
356 /// dummy file [`CHECKOUT_READY_LOCK`].
357 ///
358 /// If we're interrupted while performing a `git reset` (e.g., we die
359 /// because of a signal) Cargo needs to be sure to try to check out this
360 /// repo again on the next go-round.
361 ///
362 /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
363 /// which if present means that the repo has been successfully reset and is
364 /// ready to go. Hence if we start to do a reset, we make sure this file
365 /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
366 /// (e.g. submodule update) before marking the check-out as ready.
367 ///
368 /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
369 fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
370 let guard = CheckoutGuard::guard(&self.path);
371 info!("reset {} to {}", self.repo.path().display(), self.revision);
372
373 // Ensure libgit2 won't mess with newlines when we vendor.
374 if let Ok(mut git_config) = self.repo.config() {
375 git_config.set_bool("core.autocrlf", false)?;
376 }
377
378 let object = self.repo.find_object(self.revision, None)?;
379 reset(&self.repo, &object, gctx)?;
380
381 Ok(guard)
382 }
383
384 /// Like `git submodule update --recursive` but for this git checkout.
385 ///
386 /// This function respects `submodule.<name>.update = none`[^1] git config.
387 /// Submodules set to `none` won't be fetched.
388 ///
389 /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
390 fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> {
391 return update_submodules(&self.repo, gctx, quiet, self.remote_url().as_str());
392
393 /// Recursive helper for [`GitCheckout::update_submodules`].
394 fn update_submodules(
395 repo: &git2::Repository,
396 gctx: &GlobalContext,
397 quiet: bool,
398 parent_remote_url: &str,
399 ) -> CargoResult<()> {
400 debug!("update submodules for: {:?}", repo.workdir().unwrap());
401
402 for mut child in repo.submodules()? {
403 update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context(
404 || {
405 format!(
406 "failed to update submodule `{}`",
407 child.name().unwrap_or("")
408 )
409 },
410 )?;
411 }
412 Ok(())
413 }
414
415 /// Update a single Git submodule, and recurse into its submodules.
416 fn update_submodule(
417 parent: &git2::Repository,
418 child: &mut git2::Submodule<'_>,
419 gctx: &GlobalContext,
420 quiet: bool,
421 parent_remote_url: &str,
422 ) -> CargoResult<()> {
423 child.init(false)?;
424
425 let child_url_str = child.url().ok_or_else(|| {
426 anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
427 })?;
428
429 // Skip the submodule if the config says not to update it.
430 if child.update_strategy() == git2::SubmoduleUpdate::None {
431 gctx.shell().status(
432 "Skipping",
433 format!(
434 "git submodule `{}` due to update strategy in .gitmodules",
435 child_url_str
436 ),
437 )?;
438 return Ok(());
439 }
440
441 let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
442
443 // A submodule which is listed in .gitmodules but not actually
444 // checked out will not have a head id, so we should ignore it.
445 let Some(head) = child.head_id() else {
446 return Ok(());
447 };
448
449 // If the submodule hasn't been checked out yet, we need to
450 // clone it. If it has been checked out and the head is the same
451 // as the submodule's head, then we can skip an update and keep
452 // recursing.
453 let head_and_repo = child.open().and_then(|repo| {
454 let target = repo.head()?.target();
455 Ok((target, repo))
456 });
457 let repo = match head_and_repo {
458 Ok((head, repo)) => {
459 if child.head_id() == head {
460 return update_submodules(&repo, gctx, quiet, &child_remote_url);
461 }
462 repo
463 }
464 Err(..) => {
465 let path = parent.workdir().unwrap().join(child.path());
466 let _ = paths::remove_dir_all(&path);
467 init(&path, false)?
468 }
469 };
470 // Fetch submodule database and checkout to target revision
471 let reference = GitReference::Rev(head.to_string());
472
473 // GitSource created from SourceId without git precise will result to
474 // locked_rev being Deferred and fetch_db always try to fetch if online
475 let source_id = SourceId::for_git(&child_remote_url.into_url()?, reference)?
476 .with_git_precise(Some(head.to_string()));
477
478 let mut source = GitSource::new(source_id, gctx)?;
479 source.set_quiet(quiet);
480
481 let (db, actual_rev) = source.fetch_db(true).with_context(|| {
482 let name = child.name().unwrap_or("");
483 format!("failed to fetch submodule `{name}` from {child_remote_url}",)
484 })?;
485 db.copy_to(actual_rev, repo.path(), gctx, quiet)?;
486 Ok(())
487 }
488 }
489}
490
491/// See [`GitCheckout::reset`] for rationale on this type.
492#[must_use]
493struct CheckoutGuard {
494 ok_file: PathBuf,
495}
496
497impl CheckoutGuard {
498 fn guard(path: &Path) -> Self {
499 let ok_file = path.join(CHECKOUT_READY_LOCK);
500 let _ = paths::remove_file(&ok_file);
501 Self { ok_file }
502 }
503
504 fn mark_ok(self) -> CargoResult<()> {
505 let _ = paths::create(self.ok_file)?;
506 Ok(())
507 }
508}
509
510/// Constructs an absolute URL for a child submodule URL with its parent base URL.
511///
512/// Git only assumes a submodule URL is a relative path if it starts with `./`
513/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
514/// submodule URL.
515///
516/// At this moment it comes with some limitations:
517///
518/// * GitHub doesn't accept non-normalized URLs with relative paths.
519/// (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
520/// * `url` crate cannot parse SCP-like URLs.
521/// (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
522///
523/// To overcome these, this patch always tries [`Url::parse`] first to normalize
524/// the path. If it couldn't, append the relative path as the last resort and
525/// pray the remote git service supports non-normalized URLs.
526///
527/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
528///
529/// [^1]: <https://git-scm.com/docs/git-submodule>
530fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
531 let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
532 match Url::parse(base_url) {
533 Ok(mut base_url) => {
534 let path = base_url.path();
535 if !path.ends_with('/') {
536 base_url.set_path(&format!("{path}/"));
537 }
538 let absolute_url = base_url.join(submodule_url).with_context(|| {
539 format!(
540 "failed to parse relative child submodule url `{submodule_url}` \
541 using parent base url `{base_url}`"
542 )
543 })?;
544 Cow::from(absolute_url.to_string())
545 }
546 Err(_) => {
547 let mut absolute_url = base_url.to_string();
548 if !absolute_url.ends_with('/') {
549 absolute_url.push('/');
550 }
551 absolute_url.push_str(submodule_url);
552 Cow::from(absolute_url)
553 }
554 }
555 } else {
556 Cow::from(submodule_url)
557 };
558
559 Ok(absolute_url)
560}
561
562/// Prepare the authentication callbacks for cloning a git repository.
563///
564/// The main purpose of this function is to construct the "authentication
565/// callback" which is used to clone a repository. This callback will attempt to
566/// find the right authentication on the system (without user input) and will
567/// guide libgit2 in doing so.
568///
569/// The callback is provided `allowed` types of credentials, and we try to do as
570/// much as possible based on that:
571///
572/// * Prioritize SSH keys from the local ssh agent as they're likely the most
573/// reliable. The username here is prioritized from the credential
574/// callback, then from whatever is configured in git itself, and finally
575/// we fall back to the generic user of `git`.
576///
577/// * If a username/password is allowed, then we fallback to git2-rs's
578/// implementation of the credential helper. This is what is configured
579/// with `credential.helper` in git, and is the interface for the macOS
580/// keychain, for example.
581///
582/// * After the above two have failed, we just kinda grapple attempting to
583/// return *something*.
584///
585/// If any form of authentication fails, libgit2 will repeatedly ask us for
586/// credentials until we give it a reason to not do so. To ensure we don't
587/// just sit here looping forever we keep track of authentications we've
588/// attempted and we don't try the same ones again.
589fn with_authentication<T, F>(
590 gctx: &GlobalContext,
591 url: &str,
592 cfg: &git2::Config,
593 mut f: F,
594) -> CargoResult<T>
595where
596 F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
597{
598 let mut cred_helper = git2::CredentialHelper::new(url);
599 cred_helper.config(cfg);
600
601 let mut ssh_username_requested = false;
602 let mut cred_helper_bad = None;
603 let mut ssh_agent_attempts = Vec::new();
604 let mut any_attempts = false;
605 let mut tried_sshkey = false;
606 let mut url_attempt = None;
607
608 let orig_url = url;
609 let mut res = f(&mut |url, username, allowed| {
610 any_attempts = true;
611 if url != orig_url {
612 url_attempt = Some(url.to_string());
613 }
614 // libgit2's "USERNAME" authentication actually means that it's just
615 // asking us for a username to keep going. This is currently only really
616 // used for SSH authentication and isn't really an authentication type.
617 // The logic currently looks like:
618 //
619 // let user = ...;
620 // if (user.is_null())
621 // user = callback(USERNAME, null, ...);
622 //
623 // callback(SSH_KEY, user, ...)
624 //
625 // So if we're being called here then we know that (a) we're using ssh
626 // authentication and (b) no username was specified in the URL that
627 // we're trying to clone. We need to guess an appropriate username here,
628 // but that may involve a few attempts. Unfortunately we can't switch
629 // usernames during one authentication session with libgit2, so to
630 // handle this we bail out of this authentication session after setting
631 // the flag `ssh_username_requested`, and then we handle this below.
632 if allowed.contains(git2::CredentialType::USERNAME) {
633 debug_assert!(username.is_none());
634 ssh_username_requested = true;
635 return Err(git2::Error::from_str("gonna try usernames later"));
636 }
637
638 // An "SSH_KEY" authentication indicates that we need some sort of SSH
639 // authentication. This can currently either come from the ssh-agent
640 // process or from a raw in-memory SSH key. Cargo only supports using
641 // ssh-agent currently.
642 //
643 // If we get called with this then the only way that should be possible
644 // is if a username is specified in the URL itself (e.g., `username` is
645 // Some), hence the unwrap() here. We try custom usernames down below.
646 if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
647 // If ssh-agent authentication fails, libgit2 will keep
648 // calling this callback asking for other authentication
649 // methods to try. Make sure we only try ssh-agent once,
650 // to avoid looping forever.
651 tried_sshkey = true;
652 let username = username.unwrap();
653 debug_assert!(!ssh_username_requested);
654 ssh_agent_attempts.push(username.to_string());
655 return git2::Cred::ssh_key_from_agent(username);
656 }
657
658 // Sometimes libgit2 will ask for a username/password in plaintext. This
659 // is where Cargo would have an interactive prompt if we supported it,
660 // but we currently don't! Right now the only way we support fetching a
661 // plaintext password is through the `credential.helper` support, so
662 // fetch that here.
663 //
664 // If ssh-agent authentication fails, libgit2 will keep calling this
665 // callback asking for other authentication methods to try. Check
666 // cred_helper_bad to make sure we only try the git credential helper
667 // once, to avoid looping forever.
668 if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
669 {
670 let r = git2::Cred::credential_helper(cfg, url, username);
671 cred_helper_bad = Some(r.is_err());
672 return r;
673 }
674
675 // I'm... not sure what the DEFAULT kind of authentication is, but seems
676 // easy to support?
677 if allowed.contains(git2::CredentialType::DEFAULT) {
678 return git2::Cred::default();
679 }
680
681 // Whelp, we tried our best
682 Err(git2::Error::from_str("no authentication methods succeeded"))
683 });
684
685 // Ok, so if it looks like we're going to be doing ssh authentication, we
686 // want to try a few different usernames as one wasn't specified in the URL
687 // for us to use. In order, we'll try:
688 //
689 // * A credential helper's username for this URL, if available.
690 // * This account's username.
691 // * "git"
692 //
693 // We have to restart the authentication session each time (due to
694 // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
695 // call our callback, `f`, in a loop here.
696 if ssh_username_requested {
697 debug_assert!(res.is_err());
698 let mut attempts = vec![String::from("git")];
699 if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
700 attempts.push(s.to_string());
701 }
702 if let Some(ref s) = cred_helper.username {
703 attempts.push(s.clone());
704 }
705
706 while let Some(s) = attempts.pop() {
707 // We should get `USERNAME` first, where we just return our attempt,
708 // and then after that we should get `SSH_KEY`. If the first attempt
709 // fails we'll get called again, but we don't have another option so
710 // we bail out.
711 let mut attempts = 0;
712 res = f(&mut |_url, username, allowed| {
713 if allowed.contains(git2::CredentialType::USERNAME) {
714 return git2::Cred::username(&s);
715 }
716 if allowed.contains(git2::CredentialType::SSH_KEY) {
717 debug_assert_eq!(Some(&s[..]), username);
718 attempts += 1;
719 if attempts == 1 {
720 ssh_agent_attempts.push(s.to_string());
721 return git2::Cred::ssh_key_from_agent(&s);
722 }
723 }
724 Err(git2::Error::from_str("no authentication methods succeeded"))
725 });
726
727 // If we made two attempts then that means:
728 //
729 // 1. A username was requested, we returned `s`.
730 // 2. An ssh key was requested, we returned to look up `s` in the
731 // ssh agent.
732 // 3. For whatever reason that lookup failed, so we were asked again
733 // for another mode of authentication.
734 //
735 // Essentially, if `attempts == 2` then in theory the only error was
736 // that this username failed to authenticate (e.g., no other network
737 // errors happened). Otherwise something else is funny so we bail
738 // out.
739 if attempts != 2 {
740 break;
741 }
742 }
743 }
744 let mut err = match res {
745 Ok(e) => return Ok(e),
746 Err(e) => e,
747 };
748
749 // In the case of an authentication failure (where we tried something) then
750 // we try to give a more helpful error message about precisely what we
751 // tried.
752 if any_attempts {
753 let mut msg = "failed to authenticate when downloading \
754 repository"
755 .to_string();
756
757 if let Some(attempt) = &url_attempt {
758 if url != attempt {
759 msg.push_str(": ");
760 msg.push_str(attempt);
761 }
762 }
763 msg.push('\n');
764 if !ssh_agent_attempts.is_empty() {
765 let names = ssh_agent_attempts
766 .iter()
767 .map(|s| format!("`{}`", s))
768 .collect::<Vec<_>>()
769 .join(", ");
770 msg.push_str(&format!(
771 "\n* attempted ssh-agent authentication, but \
772 no usernames succeeded: {}",
773 names
774 ));
775 }
776 if let Some(failed_cred_helper) = cred_helper_bad {
777 if failed_cred_helper {
778 msg.push_str(
779 "\n* attempted to find username/password via \
780 git's `credential.helper` support, but failed",
781 );
782 } else {
783 msg.push_str(
784 "\n* attempted to find username/password via \
785 `credential.helper`, but maybe the found \
786 credentials were incorrect",
787 );
788 }
789 }
790 msg.push_str("\n\n");
791 msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
792 msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
793 err = err.context(msg);
794
795 // Otherwise if we didn't even get to the authentication phase them we may
796 // have failed to set up a connection, in these cases hint on the
797 // `net.git-fetch-with-cli` configuration option.
798 } else if let Some(e) = err.downcast_ref::<git2::Error>() {
799 match e.class() {
800 ErrorClass::Net
801 | ErrorClass::Ssl
802 | ErrorClass::Submodule
803 | ErrorClass::FetchHead
804 | ErrorClass::Ssh
805 | ErrorClass::Http => {
806 let msg = format!(
807 concat!(
808 "network failure seems to have happened\n",
809 "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
810 "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
811 "{}"
812 ),
813 note_github_pull_request(url).unwrap_or_default()
814 );
815 err = err.context(msg);
816 }
817 ErrorClass::Callback => {
818 // This unwraps the git2 error. We're using the callback error
819 // specifically to convey errors from Rust land through the C
820 // callback interface. We don't need the `; class=Callback
821 // (26)` that gets tacked on to the git2 error message.
822 err = anyhow::format_err!("{}", e.message());
823 }
824 _ => {}
825 }
826 }
827
828 Err(err)
829}
830
831/// `git reset --hard` to the given `obj` for the `repo`.
832///
833/// The `obj` is a commit-ish to which the head should be moved.
834fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
835 let mut pb = Progress::new("Checkout", gctx);
836 let mut opts = git2::build::CheckoutBuilder::new();
837 opts.progress(|_, cur, max| {
838 drop(pb.tick(cur, max, ""));
839 });
840 debug!("doing reset");
841 repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
842 debug!("reset done");
843 Ok(())
844}
845
846/// Prepares the callbacks for fetching a git repository.
847///
848/// The main purpose of this function is to construct everything before a fetch.
849/// This will attempt to setup a progress bar, the authentication for git,
850/// ssh known hosts check, and the network retry mechanism.
851///
852/// The callback is provided a fetch options, which can be used by the actual
853/// git fetch.
854pub fn with_fetch_options(
855 git_config: &git2::Config,
856 url: &str,
857 gctx: &GlobalContext,
858 cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
859) -> CargoResult<()> {
860 let mut progress = Progress::new("Fetch", gctx);
861 let ssh_config = gctx.net_config()?.ssh.as_ref();
862 let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
863 let diagnostic_home_config = gctx.diagnostic_home_config();
864 network::retry::with_retry(gctx, || {
865 // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
866 // so we store the error additionally and unwrap it later
867 let mut check_cb_result = Ok(());
868 let auth_result = with_authentication(gctx, url, git_config, |f| {
869 let port = Url::parse(url).ok().and_then(|url| url.port());
870 let mut last_update = Instant::now();
871 let mut rcb = git2::RemoteCallbacks::new();
872 // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
873 // sliding window for tracking the data transfer rate (in bytes/s).
874 let mut counter = MetricsCounter::<10>::new(0, last_update);
875 rcb.credentials(f);
876 rcb.certificate_check(|cert, host| {
877 match super::known_hosts::certificate_check(
878 gctx,
879 cert,
880 host,
881 port,
882 config_known_hosts,
883 &diagnostic_home_config,
884 ) {
885 Ok(status) => Ok(status),
886 Err(e) => {
887 check_cb_result = Err(e);
888 // This is not really used because it'll be overridden by libgit2
889 // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
890 Err(git2::Error::from_str(
891 "invalid or unknown remote ssh hostkey",
892 ))
893 }
894 }
895 });
896 rcb.transfer_progress(|stats| {
897 let indexed_deltas = stats.indexed_deltas();
898 let msg = if indexed_deltas > 0 {
899 // Resolving deltas.
900 format!(
901 ", ({}/{}) resolving deltas",
902 indexed_deltas,
903 stats.total_deltas()
904 )
905 } else {
906 // Receiving objects.
907 //
908 // # Caveat
909 //
910 // Progress bar relies on git2 calling `transfer_progress`
911 // to update its transfer rate, but we cannot guarantee a
912 // periodic call of that callback. Thus if we don't receive
913 // any data for, say, 10 seconds, the rate will get stuck
914 // and never go down to 0B/s.
915 // In the future, we need to find away to update the rate
916 // even when the callback is not called.
917 let now = Instant::now();
918 // Scrape a `received_bytes` to the counter every 300ms.
919 if now - last_update > Duration::from_millis(300) {
920 counter.add(stats.received_bytes(), now);
921 last_update = now;
922 }
923 let rate = HumanBytes(counter.rate() as u64);
924 format!(", {rate:.2}/s")
925 };
926 progress
927 .tick(stats.indexed_objects(), stats.total_objects(), &msg)
928 .is_ok()
929 });
930
931 // Create a local anonymous remote in the repository to fetch the
932 // url
933 let mut opts = git2::FetchOptions::new();
934 opts.remote_callbacks(rcb);
935 cb(opts)
936 });
937 if auth_result.is_err() {
938 check_cb_result?;
939 }
940 auth_result?;
941 Ok(())
942 })
943}
944
945/// Attempts to fetch the given git `reference` for a Git repository.
946///
947/// This is the main entry for git clone/fetch. It does the followings:
948///
949/// * Turns [`GitReference`] into refspecs accordingly.
950/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
951///
952/// The `remote_url` argument is the git remote URL where we want to fetch from.
953///
954/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
955/// at this time. It could be extended when libgit2 supports shallow clones.
956///
957/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
958pub fn fetch(
959 repo: &mut git2::Repository,
960 remote_url: &str,
961 reference: &GitReference,
962 gctx: &GlobalContext,
963 remote_kind: RemoteKind,
964) -> CargoResult<()> {
965 if let Some(offline_flag) = gctx.offline_flag() {
966 anyhow::bail!(
967 "attempting to update a git repository, but {offline_flag} \
968 was specified"
969 )
970 }
971
972 let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
973
974 // Flag to keep track if the rev is a full commit hash
975 let mut fast_path_rev: bool = false;
976
977 let oid_to_fetch = match github_fast_path(repo, remote_url, reference, gctx) {
978 Ok(FastPathRev::UpToDate) => return Ok(()),
979 Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
980 Ok(FastPathRev::Indeterminate) => None,
981 Err(e) => {
982 debug!("failed to check github {:?}", e);
983 None
984 }
985 };
986
987 maybe_gc_repo(repo, gctx)?;
988
989 clean_repo_temp_files(repo);
990
991 // Translate the reference desired here into an actual list of refspecs
992 // which need to get fetched. Additionally record if we're fetching tags.
993 let mut refspecs = Vec::new();
994 let mut tags = false;
995 // The `+` symbol on the refspec means to allow a forced (fast-forward)
996 // update which is needed if there is ever a force push that requires a
997 // fast-forward.
998 match reference {
999 // For branches and tags we can fetch simply one reference and copy it
1000 // locally, no need to fetch other branches/tags.
1001 GitReference::Branch(b) => {
1002 refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
1003 }
1004
1005 GitReference::Tag(t) => {
1006 refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
1007 }
1008
1009 GitReference::DefaultBranch => {
1010 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1011 }
1012
1013 GitReference::Rev(rev) => {
1014 if rev.starts_with("refs/") {
1015 refspecs.push(format!("+{0}:{0}", rev));
1016 } else if let Some(oid_to_fetch) = oid_to_fetch {
1017 fast_path_rev = true;
1018 refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1019 } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1020 && rev.parse::<Oid>().is_ok()
1021 {
1022 // There is a specific commit to fetch and we will do so in shallow-mode only
1023 // to not disturb the previous logic.
1024 // Note that with typical settings for shallowing, we will just fetch a single `rev`
1025 // as single commit.
1026 // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1027 // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1028 refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1029 } else {
1030 // We don't know what the rev will point to. To handle this
1031 // situation we fetch all branches and tags, and then we pray
1032 // it's somewhere in there.
1033 refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1034 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1035 tags = true;
1036 }
1037 }
1038 }
1039
1040 debug!("doing a fetch for {remote_url}");
1041 let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1042 fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
1043 } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1044 fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1045 } else {
1046 fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1047 };
1048
1049 if fast_path_rev {
1050 if let Some(oid) = oid_to_fetch {
1051 return result.with_context(|| format!("revision {} not found", oid));
1052 }
1053 }
1054 result
1055}
1056
1057/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1058/// files atomically.
1059/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1060/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1061fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1062 matches!(
1063 err,
1064 gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1065 gix::protocol::fetch::Error::LockShallowFile(_)
1066 ))
1067 )
1068}
1069
1070/// Attempts to use `git` CLI installed on the system to fetch a repository,
1071/// when the config value [`net.git-fetch-with-cli`][1] is set.
1072///
1073/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1074/// when compared to the `git` command line. As a result, allow an escape
1075/// hatch for users that would prefer to use `git`-the-CLI for fetching
1076/// repositories instead of `libgit2`-the-library. This should make more
1077/// flavors of authentication possible while also still giving us all the
1078/// speed and portability of using `libgit2`.
1079///
1080/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1081fn fetch_with_cli(
1082 repo: &mut git2::Repository,
1083 url: &str,
1084 refspecs: &[String],
1085 tags: bool,
1086 shallow: gix::remote::fetch::Shallow,
1087 gctx: &GlobalContext,
1088) -> CargoResult<()> {
1089 debug!(target: "git-fetch", backend = "git-cli");
1090
1091 let mut cmd = ProcessBuilder::new("git");
1092 cmd.arg("fetch");
1093 if tags {
1094 cmd.arg("--tags");
1095 } else {
1096 cmd.arg("--no-tags");
1097 }
1098 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1099 let depth = 0i32.saturating_add_unsigned(depth.get());
1100 cmd.arg(format!("--depth={depth}"));
1101 }
1102 match gctx.shell().verbosity() {
1103 Verbosity::Normal => {}
1104 Verbosity::Verbose => {
1105 cmd.arg("--verbose");
1106 }
1107 Verbosity::Quiet => {
1108 cmd.arg("--quiet");
1109 }
1110 }
1111 cmd.arg("--force") // handle force pushes
1112 .arg("--update-head-ok") // see discussion in #2078
1113 .arg(url)
1114 .args(refspecs)
1115 // If cargo is run by git (for example, the `exec` command in `git
1116 // rebase`), the GIT_DIR is set by git and will point to the wrong
1117 // location. This makes sure GIT_DIR is always the repository path.
1118 .env("GIT_DIR", repo.path())
1119 // The reset of these may not be necessary, but I'm including them
1120 // just to be extra paranoid and avoid any issues.
1121 .env_remove("GIT_WORK_TREE")
1122 .env_remove("GIT_INDEX_FILE")
1123 .env_remove("GIT_OBJECT_DIRECTORY")
1124 .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1125 .cwd(repo.path());
1126 gctx.shell()
1127 .verbose(|s| s.status("Running", &cmd.to_string()))?;
1128 network::retry::with_retry(gctx, || {
1129 cmd.exec()
1130 .map_err(|error| GitCliError::new(error, true).into())
1131 })?;
1132
1133 Ok(())
1134}
1135
1136fn fetch_with_gitoxide(
1137 repo: &mut git2::Repository,
1138 remote_url: &str,
1139 refspecs: Vec<String>,
1140 tags: bool,
1141 shallow: gix::remote::fetch::Shallow,
1142 gctx: &GlobalContext,
1143) -> CargoResult<()> {
1144 debug!(target: "git-fetch", backend = "gitoxide");
1145
1146 let git2_repo = repo;
1147 let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1148 let repo_reinitialized = AtomicBool::default();
1149 let res = oxide::with_retry_and_progress(
1150 git2_repo.path(),
1151 gctx,
1152 remote_url,
1153 &|repo_path,
1154 should_interrupt,
1155 mut progress,
1156 url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1157 // The `fetch` operation here may fail spuriously due to a corrupt
1158 // repository. It could also fail, however, for a whole slew of other
1159 // reasons (aka network related reasons). We want Cargo to automatically
1160 // recover from corrupt repositories, but we don't want Cargo to stomp
1161 // over other legitimate errors.
1162 //
1163 // Consequently we save off the error of the `fetch` operation and if it
1164 // looks like a "corrupt repo" error then we blow away the repo and try
1165 // again. If it looks like any other kind of error, or if we've already
1166 // blown away the repository, then we want to return the error as-is.
1167 loop {
1168 let res = oxide::open_repo(
1169 repo_path,
1170 config_overrides.clone(),
1171 oxide::OpenMode::ForFetch,
1172 )
1173 .map_err(crate::sources::git::fetch::Error::from)
1174 .and_then(|repo| {
1175 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1176 let url_for_authentication = &mut *url_for_authentication;
1177 let remote = repo
1178 .remote_at(remote_url)?
1179 .with_fetch_tags(if tags {
1180 gix::remote::fetch::Tags::All
1181 } else {
1182 gix::remote::fetch::Tags::Included
1183 })
1184 .with_refspecs(
1185 refspecs.iter().map(|s| s.as_str()),
1186 gix::remote::Direction::Fetch,
1187 )
1188 .map_err(crate::sources::git::fetch::Error::Other)?;
1189 let url = remote
1190 .url(gix::remote::Direction::Fetch)
1191 .expect("set at init")
1192 .to_owned();
1193 let connection = remote.connect(gix::remote::Direction::Fetch)?;
1194 let mut authenticate = connection.configured_credentials(url)?;
1195 let connection = connection.with_credentials(
1196 move |action: gix::protocol::credentials::helper::Action| {
1197 if let Some(url) = action
1198 .context()
1199 .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1200 {
1201 url_for_authentication(url.as_ref());
1202 }
1203 authenticate(action)
1204 },
1205 );
1206 let outcome = connection
1207 .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1208 .with_shallow(shallow.clone())
1209 .receive(&mut progress, should_interrupt)?;
1210 Ok(outcome)
1211 });
1212 let err = match res {
1213 Ok(_) => break,
1214 Err(e) => e,
1215 };
1216 debug!("fetch failed: {}", err);
1217
1218 if !repo_reinitialized.load(Ordering::Relaxed)
1219 // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1220 // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1221 // folder before writing files into it, or else not even open a directory as git repository (which is
1222 // also handled here).
1223 && err.is_corrupted()
1224 || has_shallow_lock_file(&err)
1225 {
1226 repo_reinitialized.store(true, Ordering::Relaxed);
1227 debug!(
1228 "looks like this is a corrupt repository, reinitializing \
1229 and trying again"
1230 );
1231 if oxide::reinitialize(repo_path).is_ok() {
1232 continue;
1233 }
1234 }
1235
1236 return Err(err.into());
1237 }
1238 Ok(())
1239 },
1240 );
1241 if repo_reinitialized.load(Ordering::Relaxed) {
1242 *git2_repo = git2::Repository::open(git2_repo.path())?;
1243 }
1244 res
1245}
1246
1247fn fetch_with_libgit2(
1248 repo: &mut git2::Repository,
1249 remote_url: &str,
1250 refspecs: Vec<String>,
1251 tags: bool,
1252 shallow: gix::remote::fetch::Shallow,
1253 gctx: &GlobalContext,
1254) -> CargoResult<()> {
1255 debug!(target: "git-fetch", backend = "libgit2");
1256
1257 let git_config = git2::Config::open_default()?;
1258 with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1259 if tags {
1260 opts.download_tags(git2::AutotagOption::All);
1261 }
1262 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1263 opts.depth(0i32.saturating_add_unsigned(depth.get()));
1264 }
1265 // The `fetch` operation here may fail spuriously due to a corrupt
1266 // repository. It could also fail, however, for a whole slew of other
1267 // reasons (aka network related reasons). We want Cargo to automatically
1268 // recover from corrupt repositories, but we don't want Cargo to stomp
1269 // over other legitimate errors.
1270 //
1271 // Consequently we save off the error of the `fetch` operation and if it
1272 // looks like a "corrupt repo" error then we blow away the repo and try
1273 // again. If it looks like any other kind of error, or if we've already
1274 // blown away the repository, then we want to return the error as-is.
1275 let mut repo_reinitialized = false;
1276 loop {
1277 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1278 let res = repo
1279 .remote_anonymous(remote_url)?
1280 .fetch(&refspecs, Some(&mut opts), None);
1281 let err = match res {
1282 Ok(()) => break,
1283 Err(e) => e,
1284 };
1285 debug!("fetch failed: {}", err);
1286
1287 if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1288 {
1289 repo_reinitialized = true;
1290 debug!(
1291 "looks like this is a corrupt repository, reinitializing \
1292 and trying again"
1293 );
1294 if reinitialize(repo).is_ok() {
1295 continue;
1296 }
1297 }
1298
1299 return Err(err.into());
1300 }
1301 Ok(())
1302 })
1303}
1304
1305/// Attempts to `git gc` a repository.
1306///
1307/// Cargo has a bunch of long-lived git repositories in its global cache and
1308/// some, like the index, are updated very frequently. Right now each update
1309/// creates a new "pack file" inside the git database, and over time this can
1310/// cause bad performance and bad current behavior in libgit2.
1311///
1312/// One pathological use case today is where libgit2 opens hundreds of file
1313/// descriptors, getting us dangerously close to blowing out the OS limits of
1314/// how many fds we can have open. This is detailed in [#4403].
1315///
1316/// To try to combat this problem we attempt a `git gc` here. Note, though, that
1317/// we may not even have `git` installed on the system! As a result we
1318/// opportunistically try a `git gc` when the pack directory looks too big, and
1319/// failing that we just blow away the repository and start over.
1320///
1321/// In theory this shouldn't be too expensive compared to the network request
1322/// we're about to issue.
1323///
1324/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1325fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1326 // Here we arbitrarily declare that if you have more than 100 files in your
1327 // `pack` folder that we need to do a gc.
1328 let entries = match repo.path().join("objects/pack").read_dir() {
1329 Ok(e) => e.count(),
1330 Err(_) => {
1331 debug!("skipping gc as pack dir appears gone");
1332 return Ok(());
1333 }
1334 };
1335 let max = gctx
1336 .get_env("__CARGO_PACKFILE_LIMIT")
1337 .ok()
1338 .and_then(|s| s.parse::<usize>().ok())
1339 .unwrap_or(100);
1340 if entries < max {
1341 debug!("skipping gc as there's only {} pack files", entries);
1342 return Ok(());
1343 }
1344
1345 // First up, try a literal `git gc` by shelling out to git. This is pretty
1346 // likely to fail though as we may not have `git` installed. Note that
1347 // libgit2 doesn't currently implement the gc operation, so there's no
1348 // equivalent there.
1349 match Command::new("git")
1350 .arg("gc")
1351 .current_dir(repo.path())
1352 .output()
1353 {
1354 Ok(out) => {
1355 debug!(
1356 "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1357 out.status,
1358 String::from_utf8_lossy(&out.stdout),
1359 String::from_utf8_lossy(&out.stderr)
1360 );
1361 if out.status.success() {
1362 let new = git2::Repository::open(repo.path())?;
1363 *repo = new;
1364 return Ok(());
1365 }
1366 }
1367 Err(e) => debug!("git-gc failed to spawn: {}", e),
1368 }
1369
1370 // Alright all else failed, let's start over.
1371 reinitialize(repo)
1372}
1373
1374/// Removes temporary files left from previous activity.
1375///
1376/// If libgit2 is interrupted while indexing pack files, it will leave behind
1377/// some temporary files that it doesn't clean up. These can be quite large in
1378/// size, so this tries to clean things up.
1379///
1380/// This intentionally ignores errors. This is only an opportunistic cleaning,
1381/// and we don't really care if there are issues (there's unlikely anything
1382/// that can be done).
1383///
1384/// The git CLI has similar behavior (its temp files look like
1385/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1386/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1387/// filenames, so they never get cleaned up.
1388fn clean_repo_temp_files(repo: &git2::Repository) {
1389 let path = repo.path().join("objects/pack/pack_git2_*");
1390 let Some(pattern) = path.to_str() else {
1391 tracing::warn!("cannot convert {path:?} to a string");
1392 return;
1393 };
1394 let Ok(paths) = glob::glob(pattern) else {
1395 return;
1396 };
1397 for path in paths {
1398 if let Ok(path) = path {
1399 match paths::remove_file(&path) {
1400 Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1401 Err(e) => {
1402 tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1403 }
1404 }
1405 }
1406 }
1407}
1408
1409/// Reinitializes a given Git repository. This is useful when a Git repository
1410/// seems corrupted and we want to start over.
1411fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1412 // Here we want to drop the current repository object pointed to by `repo`,
1413 // so we initialize temporary repository in a sub-folder, blow away the
1414 // existing git folder, and then recreate the git repo. Finally we blow away
1415 // the `tmp` folder we allocated.
1416 let path = repo.path().to_path_buf();
1417 debug!("reinitializing git repo at {:?}", path);
1418 let tmp = path.join("tmp");
1419 let bare = !repo.path().ends_with(".git");
1420 *repo = init(&tmp, false)?;
1421 for entry in path.read_dir()? {
1422 let entry = entry?;
1423 if entry.file_name().to_str() == Some("tmp") {
1424 continue;
1425 }
1426 let path = entry.path();
1427 drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1428 }
1429 *repo = init(&path, bare)?;
1430 paths::remove_dir_all(&tmp)?;
1431 Ok(())
1432}
1433
1434/// Initializes a Git repository at `path`.
1435fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1436 let mut opts = git2::RepositoryInitOptions::new();
1437 // Skip anything related to templates, they just call all sorts of issues as
1438 // we really don't want to use them yet they insist on being used. See #6240
1439 // for an example issue that comes up.
1440 opts.external_template(false);
1441 opts.bare(bare);
1442 Ok(git2::Repository::init_opts(&path, &opts)?)
1443}
1444
1445/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1446enum FastPathRev {
1447 /// The local rev (determined by `reference.resolve(repo)`) is already up to
1448 /// date with what this rev resolves to on GitHub's server.
1449 UpToDate,
1450 /// The following SHA must be fetched in order for the local rev to become
1451 /// up to date.
1452 NeedsFetch(Oid),
1453 /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1454 /// and tags from the server and see what happens.
1455 Indeterminate,
1456}
1457
1458/// Attempts GitHub's special fast path for testing if we've already got an
1459/// up-to-date copy of the repository.
1460///
1461/// Updating the index is done pretty regularly so we want it to be as fast as
1462/// possible. For registries hosted on GitHub (like the crates.io index) there's
1463/// a fast path available to use[^1] to tell us that there's no updates to be
1464/// made.
1465///
1466/// Note that this function should never cause an actual failure because it's
1467/// just a fast path. As a result, a caller should ignore `Err` returned from
1468/// this function and move forward on the normal path.
1469///
1470/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1471fn github_fast_path(
1472 repo: &mut git2::Repository,
1473 url: &str,
1474 reference: &GitReference,
1475 gctx: &GlobalContext,
1476) -> CargoResult<FastPathRev> {
1477 let url = Url::parse(url)?;
1478 if !is_github(&url) {
1479 return Ok(FastPathRev::Indeterminate);
1480 }
1481
1482 let local_object = resolve_ref(reference, repo).ok();
1483
1484 let github_branch_name = match reference {
1485 GitReference::Branch(branch) => branch,
1486 GitReference::Tag(tag) => tag,
1487 GitReference::DefaultBranch => "HEAD",
1488 GitReference::Rev(rev) => {
1489 if rev.starts_with("refs/") {
1490 rev
1491 } else if looks_like_commit_hash(rev) {
1492 // `revparse_single` (used by `resolve`) is the only way to turn
1493 // short hash -> long hash, but it also parses other things,
1494 // like branch and tag names, which might coincidentally be
1495 // valid hex.
1496 //
1497 // We only return early if `rev` is a prefix of the object found
1498 // by `revparse_single`. Don't bother talking to GitHub in that
1499 // case, since commit hashes are permanent. If a commit with the
1500 // requested hash is already present in the local clone, its
1501 // contents must be the same as what is on the server for that
1502 // hash.
1503 //
1504 // If `rev` is not found locally by `revparse_single`, we'll
1505 // need GitHub to resolve it and get a hash. If `rev` is found
1506 // but is not a short hash of the found object, it's probably a
1507 // branch and we also need to get a hash from GitHub, in case
1508 // the branch has moved.
1509 if let Some(local_object) = local_object {
1510 if is_short_hash_of(rev, local_object) {
1511 debug!("github fast path already has {local_object}");
1512 return Ok(FastPathRev::UpToDate);
1513 }
1514 }
1515 // If `rev` is a full commit hash, the only thing it can resolve
1516 // to is itself. Don't bother talking to GitHub in that case
1517 // either. (This ensures that we always attempt to fetch the
1518 // commit directly even if we can't reach the GitHub API.)
1519 if let Some(oid) = rev_to_oid(rev) {
1520 debug!("github fast path is already a full commit hash {rev}");
1521 return Ok(FastPathRev::NeedsFetch(oid));
1522 }
1523 rev
1524 } else {
1525 debug!("can't use github fast path with `rev = \"{}\"`", rev);
1526 return Ok(FastPathRev::Indeterminate);
1527 }
1528 }
1529 };
1530
1531 // This expects GitHub urls in the form `github.com/user/repo` and nothing
1532 // else
1533 let mut pieces = url
1534 .path_segments()
1535 .ok_or_else(|| anyhow!("no path segments on url"))?;
1536 let username = pieces
1537 .next()
1538 .ok_or_else(|| anyhow!("couldn't find username"))?;
1539 let repository = pieces
1540 .next()
1541 .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1542 if pieces.next().is_some() {
1543 anyhow::bail!("too many segments on URL");
1544 }
1545
1546 // Trim off the `.git` from the repository, if present, since that's
1547 // optional for GitHub and won't work when we try to use the API as well.
1548 let repository = repository.strip_suffix(".git").unwrap_or(repository);
1549
1550 let url = format!(
1551 "https://api.github.com/repos/{}/{}/commits/{}",
1552 username, repository, github_branch_name,
1553 );
1554 let mut handle = gctx.http()?.lock().unwrap();
1555 debug!("attempting GitHub fast path for {}", url);
1556 handle.get(true)?;
1557 handle.url(&url)?;
1558 handle.useragent("cargo")?;
1559 handle.follow_location(true)?; // follow redirects
1560 handle.http_headers({
1561 let mut headers = List::new();
1562 headers.append("Accept: application/vnd.github.3.sha")?;
1563 if let Some(local_object) = local_object {
1564 headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1565 }
1566 headers
1567 })?;
1568
1569 let mut response_body = Vec::new();
1570 let mut transfer = handle.transfer();
1571 transfer.write_function(|data| {
1572 response_body.extend_from_slice(data);
1573 Ok(data.len())
1574 })?;
1575 transfer.perform()?;
1576 drop(transfer); // end borrow of handle so that response_code can be called
1577
1578 let response_code = handle.response_code()?;
1579 if response_code == 304 {
1580 debug!("github fast path up-to-date");
1581 Ok(FastPathRev::UpToDate)
1582 } else if response_code == 200 {
1583 let oid_to_fetch = str::from_utf8(&response_body)?.parse::<Oid>()?;
1584 debug!("github fast path fetch {oid_to_fetch}");
1585 Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1586 } else {
1587 // Usually response_code == 404 if the repository does not exist, and
1588 // response_code == 422 if exists but GitHub is unable to resolve the
1589 // requested rev.
1590 debug!("github fast path bad response code {response_code}");
1591 Ok(FastPathRev::Indeterminate)
1592 }
1593}
1594
1595/// Whether a `url` is one from GitHub.
1596fn is_github(url: &Url) -> bool {
1597 url.host_str() == Some("github.com")
1598}
1599
1600// Give some messages on GitHub PR URL given as is
1601pub(crate) fn note_github_pull_request(url: &str) -> Option<String> {
1602 if let Ok(url) = url.parse::<Url>()
1603 && is_github(&url)
1604 {
1605 let path_segments = url
1606 .path_segments()
1607 .map(|p| p.into_iter().collect::<Vec<_>>())
1608 .unwrap_or_default();
1609 if let [owner, repo, "pull", pr_number, ..] = path_segments[..] {
1610 let repo_url = format!("https://github.com/{owner}/{repo}.git");
1611 let rev = format!("refs/pull/{pr_number}/head");
1612 return Some(format!(
1613 concat!(
1614 "\n\nnote: GitHub url {} is not a repository. \n",
1615 "help: Replace the dependency with \n",
1616 " `git = \"{}\" rev = \"{}\"` \n",
1617 " to specify pull requests as dependencies' revision."
1618 ),
1619 url, repo_url, rev
1620 ));
1621 }
1622 }
1623
1624 None
1625}
1626
1627/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1628fn looks_like_commit_hash(rev: &str) -> bool {
1629 rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1630}
1631
1632/// Whether `rev` is a shorter hash of `oid`.
1633fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1634 let long_hash = oid.to_string();
1635 match long_hash.get(..rev.len()) {
1636 Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1637 None => false,
1638 }
1639}
1640
1641#[cfg(test)]
1642mod tests {
1643 use super::absolute_submodule_url;
1644
1645 #[test]
1646 fn test_absolute_submodule_url() {
1647 let cases = [
1648 (
1649 "ssh://git@gitub.com/rust-lang/cargo",
1650 "git@github.com:rust-lang/cargo.git",
1651 "git@github.com:rust-lang/cargo.git",
1652 ),
1653 (
1654 "ssh://git@gitub.com/rust-lang/cargo",
1655 "./",
1656 "ssh://git@gitub.com/rust-lang/cargo/",
1657 ),
1658 (
1659 "ssh://git@gitub.com/rust-lang/cargo",
1660 "../",
1661 "ssh://git@gitub.com/rust-lang/",
1662 ),
1663 (
1664 "ssh://git@gitub.com/rust-lang/cargo",
1665 "./foo",
1666 "ssh://git@gitub.com/rust-lang/cargo/foo",
1667 ),
1668 (
1669 "ssh://git@gitub.com/rust-lang/cargo/",
1670 "./foo",
1671 "ssh://git@gitub.com/rust-lang/cargo/foo",
1672 ),
1673 (
1674 "ssh://git@gitub.com/rust-lang/cargo/",
1675 "../foo",
1676 "ssh://git@gitub.com/rust-lang/foo",
1677 ),
1678 (
1679 "ssh://git@gitub.com/rust-lang/cargo",
1680 "../foo",
1681 "ssh://git@gitub.com/rust-lang/foo",
1682 ),
1683 (
1684 "ssh://git@gitub.com/rust-lang/cargo",
1685 "../foo/bar/../baz",
1686 "ssh://git@gitub.com/rust-lang/foo/baz",
1687 ),
1688 (
1689 "git@github.com:rust-lang/cargo.git",
1690 "ssh://git@gitub.com/rust-lang/cargo",
1691 "ssh://git@gitub.com/rust-lang/cargo",
1692 ),
1693 (
1694 "git@github.com:rust-lang/cargo.git",
1695 "./",
1696 "git@github.com:rust-lang/cargo.git/./",
1697 ),
1698 (
1699 "git@github.com:rust-lang/cargo.git",
1700 "../",
1701 "git@github.com:rust-lang/cargo.git/../",
1702 ),
1703 (
1704 "git@github.com:rust-lang/cargo.git",
1705 "./foo",
1706 "git@github.com:rust-lang/cargo.git/./foo",
1707 ),
1708 (
1709 "git@github.com:rust-lang/cargo.git/",
1710 "./foo",
1711 "git@github.com:rust-lang/cargo.git/./foo",
1712 ),
1713 (
1714 "git@github.com:rust-lang/cargo.git",
1715 "../foo",
1716 "git@github.com:rust-lang/cargo.git/../foo",
1717 ),
1718 (
1719 "git@github.com:rust-lang/cargo.git/",
1720 "../foo",
1721 "git@github.com:rust-lang/cargo.git/../foo",
1722 ),
1723 (
1724 "git@github.com:rust-lang/cargo.git",
1725 "../foo/bar/../baz",
1726 "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1727 ),
1728 ];
1729
1730 for (base_url, submodule_url, expected) in cases {
1731 let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1732 assert_eq!(
1733 expected, url,
1734 "base `{base_url}`; submodule `{submodule_url}`"
1735 );
1736 }
1737 }
1738}
1739
1740/// Turns a full commit hash revision into an oid.
1741///
1742/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1743/// Its length must be double to the underlying bytes (40 or 64),
1744/// otherwise libgit2 would happily zero-pad the returned oid.
1745///
1746/// See:
1747///
1748/// * <https://github.com/rust-lang/cargo/issues/13188>
1749/// * <https://github.com/rust-lang/cargo/issues/13968>
1750pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1751 Oid::from_str(rev)
1752 .ok()
1753 .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1754}