cargo/sources/git/utils.rs
1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, Verbosity};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::util::HumanBytes;
9use crate::util::errors::{CargoResult, GitCliError};
10use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
11use anyhow::{Context as _, anyhow};
12use cargo_util::{ProcessBuilder, paths};
13use curl::easy::List;
14use git2::{ErrorClass, ObjectType, Oid};
15use serde::Serialize;
16use serde::ser;
17use std::borrow::Cow;
18use std::fmt;
19use std::path::{Path, PathBuf};
20use std::process::Command;
21use std::str;
22use std::sync::atomic::{AtomicBool, Ordering};
23use std::time::{Duration, Instant};
24use tracing::{debug, info};
25use url::Url;
26
27/// A file indicates that if present, `git reset` has been done and a repo
28/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
29const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
30
31fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
32where
33 T: fmt::Display,
34 S: ser::Serializer,
35{
36 s.collect_str(t)
37}
38
39/// A short abbreviated OID.
40///
41/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
42pub struct GitShortID(git2::Buf);
43
44impl GitShortID {
45 /// Views the short ID as a `str`.
46 pub fn as_str(&self) -> &str {
47 self.0.as_str().unwrap()
48 }
49}
50
51/// A remote repository. It gets cloned into a local [`GitDatabase`].
52#[derive(PartialEq, Clone, Debug, Serialize)]
53pub struct GitRemote {
54 /// URL to a remote repository.
55 #[serde(serialize_with = "serialize_str")]
56 url: Url,
57}
58
59/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
60/// can be cloned from a single [`GitDatabase`].
61pub struct GitDatabase {
62 /// The remote repository where this database is fetched from.
63 remote: GitRemote,
64 /// Path to the root of the underlying Git repository on the local filesystem.
65 path: PathBuf,
66 /// Underlying Git repository instance for this database.
67 repo: git2::Repository,
68}
69
70/// A local checkout of a particular revision from a [`GitDatabase`].
71pub struct GitCheckout<'a> {
72 /// The git database where this checkout is cloned from.
73 database: &'a GitDatabase,
74 /// Path to the root of the underlying Git repository on the local filesystem.
75 path: PathBuf,
76 /// The git revision this checkout is for.
77 revision: git2::Oid,
78 /// Underlying Git repository instance for this checkout.
79 repo: git2::Repository,
80}
81
82impl GitRemote {
83 /// Creates an instance for a remote repository URL.
84 pub fn new(url: &Url) -> GitRemote {
85 GitRemote { url: url.clone() }
86 }
87
88 /// Gets the remote repository URL.
89 pub fn url(&self) -> &Url {
90 &self.url
91 }
92
93 /// Fetches and checkouts to a reference or a revision from this remote
94 /// into a local path.
95 ///
96 /// This ensures that it gets the up-to-date commit when a named reference
97 /// is given (tag, branch, refs/*). Thus, network connection is involved.
98 ///
99 /// If we have a previous instance of [`GitDatabase`] then fetch into that
100 /// if we can. If that can successfully load our revision then we've
101 /// populated the database with the latest version of `reference`, so
102 /// return that database and the rev we resolve to.
103 pub fn checkout(
104 &self,
105 into: &Path,
106 db: Option<GitDatabase>,
107 reference: &GitReference,
108 gctx: &GlobalContext,
109 ) -> CargoResult<(GitDatabase, git2::Oid)> {
110 if let Some(mut db) = db {
111 fetch(
112 &mut db.repo,
113 self.url.as_str(),
114 reference,
115 gctx,
116 RemoteKind::GitDependency,
117 )
118 .with_context(|| format!("failed to fetch into: {}", into.display()))?;
119
120 if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
121 return Ok((db, rev));
122 }
123 }
124
125 // Otherwise start from scratch to handle corrupt git repositories.
126 // After our fetch (which is interpreted as a clone now) we do the same
127 // resolution to figure out what we cloned.
128 if into.exists() {
129 paths::remove_dir_all(into)?;
130 }
131 paths::create_dir_all(into)?;
132 let mut repo = init(into, true)?;
133 fetch(
134 &mut repo,
135 self.url.as_str(),
136 reference,
137 gctx,
138 RemoteKind::GitDependency,
139 )
140 .with_context(|| format!("failed to clone into: {}", into.display()))?;
141 let rev = resolve_ref(reference, &repo)?;
142
143 Ok((
144 GitDatabase {
145 remote: self.clone(),
146 path: into.to_path_buf(),
147 repo,
148 },
149 rev,
150 ))
151 }
152
153 /// Creates a [`GitDatabase`] of this remote at `db_path`.
154 pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
155 let repo = git2::Repository::open(db_path)?;
156 Ok(GitDatabase {
157 remote: self.clone(),
158 path: db_path.to_path_buf(),
159 repo,
160 })
161 }
162}
163
164impl GitDatabase {
165 /// Checkouts to a revision at `dest`ination from this database.
166 #[tracing::instrument(skip(self, gctx))]
167 pub fn copy_to(
168 &self,
169 rev: git2::Oid,
170 dest: &Path,
171 gctx: &GlobalContext,
172 ) -> CargoResult<GitCheckout<'_>> {
173 // If the existing checkout exists, and it is fresh, use it.
174 // A non-fresh checkout can happen if the checkout operation was
175 // interrupted. In that case, the checkout gets deleted and a new
176 // clone is created.
177 let checkout = match git2::Repository::open(dest)
178 .ok()
179 .map(|repo| GitCheckout::new(self, rev, repo))
180 .filter(|co| co.is_fresh())
181 {
182 Some(co) => co,
183 None => {
184 let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
185 checkout.update_submodules(gctx)?;
186 guard.mark_ok()?;
187 checkout
188 }
189 };
190
191 Ok(checkout)
192 }
193
194 /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
195 pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
196 let obj = self.repo.find_object(revision, None)?;
197 Ok(GitShortID(obj.short_id()?))
198 }
199
200 /// Checks if the database contains the object of this `oid`..
201 pub fn contains(&self, oid: git2::Oid) -> bool {
202 self.repo.revparse_single(&oid.to_string()).is_ok()
203 }
204
205 /// [`resolve_ref`]s this reference with this database.
206 pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
207 resolve_ref(r, &self.repo)
208 }
209}
210
211/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
212pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
213 let id = match gitref {
214 // Note that we resolve the named tag here in sync with where it's
215 // fetched into via `fetch` below.
216 GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
217 let refname = format!("refs/remotes/origin/tags/{}", s);
218 let id = repo.refname_to_id(&refname)?;
219 let obj = repo.find_object(id, None)?;
220 let obj = obj.peel(ObjectType::Commit)?;
221 Ok(obj.id())
222 })()
223 .with_context(|| format!("failed to find tag `{}`", s))?,
224
225 // Resolve the remote name since that's all we're configuring in
226 // `fetch` below.
227 GitReference::Branch(s) => {
228 let name = format!("origin/{}", s);
229 let b = repo
230 .find_branch(&name, git2::BranchType::Remote)
231 .with_context(|| format!("failed to find branch `{}`", s))?;
232 b.get()
233 .target()
234 .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
235 }
236
237 // We'll be using the HEAD commit
238 GitReference::DefaultBranch => {
239 let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
240 let head = repo.find_object(head_id, None)?;
241 head.peel(ObjectType::Commit)?.id()
242 }
243
244 GitReference::Rev(s) => {
245 let obj = repo.revparse_single(s)?;
246 match obj.as_tag() {
247 Some(tag) => tag.target_id(),
248 None => obj.id(),
249 }
250 }
251 };
252 Ok(id)
253}
254
255impl<'a> GitCheckout<'a> {
256 /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
257 /// is done. Use [`GitCheckout::is_fresh`] to check.
258 ///
259 /// * The `database` is where this checkout is from.
260 /// * The `repo` will be the checked out Git repository.
261 fn new(
262 database: &'a GitDatabase,
263 revision: git2::Oid,
264 repo: git2::Repository,
265 ) -> GitCheckout<'a> {
266 let path = repo.workdir().unwrap_or_else(|| repo.path());
267 GitCheckout {
268 path: path.to_path_buf(),
269 database,
270 revision,
271 repo,
272 }
273 }
274
275 /// Gets the remote repository URL.
276 fn remote_url(&self) -> &Url {
277 &self.database.remote.url()
278 }
279
280 /// Clone a repo for a `revision` into a local path from a `database`.
281 /// This is a filesystem-to-filesystem clone.
282 fn clone_into(
283 into: &Path,
284 database: &'a GitDatabase,
285 revision: git2::Oid,
286 gctx: &GlobalContext,
287 ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
288 let dirname = into.parent().unwrap();
289 paths::create_dir_all(&dirname)?;
290 if into.exists() {
291 paths::remove_dir_all(into)?;
292 }
293
294 // we're doing a local filesystem-to-filesystem clone so there should
295 // be no need to respect global configuration options, so pass in
296 // an empty instance of `git2::Config` below.
297 let git_config = git2::Config::new()?;
298
299 // Clone the repository, but make sure we use the "local" option in
300 // libgit2 which will attempt to use hardlinks to set up the database.
301 // This should speed up the clone operation quite a bit if it works.
302 //
303 // Note that we still use the same fetch options because while we don't
304 // need authentication information we may want progress bars and such.
305 let url = database.path.into_url()?;
306 let mut repo = None;
307 with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
308 let mut checkout = git2::build::CheckoutBuilder::new();
309 checkout.dry_run(); // we'll do this below during a `reset`
310
311 let r = git2::build::RepoBuilder::new()
312 // use hard links and/or copy the database, we're doing a
313 // filesystem clone so this'll speed things up quite a bit.
314 .clone_local(git2::build::CloneLocal::Local)
315 .with_checkout(checkout)
316 .fetch_options(fopts)
317 .clone(url.as_str(), into)?;
318 // `git2` doesn't seem to handle shallow repos correctly when doing
319 // a local clone. Fortunately all that's needed is the copy of the
320 // one file that defines the shallow boundary, the commits which
321 // have their parents omitted as part of the shallow clone.
322 //
323 // TODO(git2): remove this when git2 supports shallow clone correctly
324 if database.repo.is_shallow() {
325 std::fs::copy(
326 database.repo.path().join("shallow"),
327 r.path().join("shallow"),
328 )?;
329 }
330 repo = Some(r);
331 Ok(())
332 })?;
333 let repo = repo.unwrap();
334
335 let checkout = GitCheckout::new(database, revision, repo);
336 let guard = checkout.reset(gctx)?;
337 Ok((checkout, guard))
338 }
339
340 /// Checks if the `HEAD` of this checkout points to the expected revision.
341 fn is_fresh(&self) -> bool {
342 match self.repo.revparse_single("HEAD") {
343 Ok(ref head) if head.id() == self.revision => {
344 // See comments in reset() for why we check this
345 self.path.join(CHECKOUT_READY_LOCK).exists()
346 }
347 _ => false,
348 }
349 }
350
351 /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
352 /// revision of this checkout, with additional interrupt protection by a
353 /// dummy file [`CHECKOUT_READY_LOCK`].
354 ///
355 /// If we're interrupted while performing a `git reset` (e.g., we die
356 /// because of a signal) Cargo needs to be sure to try to check out this
357 /// repo again on the next go-round.
358 ///
359 /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
360 /// which if present means that the repo has been successfully reset and is
361 /// ready to go. Hence if we start to do a reset, we make sure this file
362 /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
363 /// (e.g. submodule update) before marking the check-out as ready.
364 ///
365 /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
366 fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
367 let guard = CheckoutGuard::guard(&self.path);
368 info!("reset {} to {}", self.repo.path().display(), self.revision);
369
370 // Ensure libgit2 won't mess with newlines when we vendor.
371 if let Ok(mut git_config) = self.repo.config() {
372 git_config.set_bool("core.autocrlf", false)?;
373 }
374
375 let object = self.repo.find_object(self.revision, None)?;
376 reset(&self.repo, &object, gctx)?;
377
378 Ok(guard)
379 }
380
381 /// Like `git submodule update --recursive` but for this git checkout.
382 ///
383 /// This function respects `submodule.<name>.update = none`[^1] git config.
384 /// Submodules set to `none` won't be fetched.
385 ///
386 /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
387 fn update_submodules(&self, gctx: &GlobalContext) -> CargoResult<()> {
388 return update_submodules(&self.repo, gctx, self.remote_url().as_str());
389
390 /// Recursive helper for [`GitCheckout::update_submodules`].
391 fn update_submodules(
392 repo: &git2::Repository,
393 gctx: &GlobalContext,
394 parent_remote_url: &str,
395 ) -> CargoResult<()> {
396 debug!("update submodules for: {:?}", repo.workdir().unwrap());
397
398 for mut child in repo.submodules()? {
399 update_submodule(repo, &mut child, gctx, parent_remote_url).with_context(|| {
400 format!(
401 "failed to update submodule `{}`",
402 child.name().unwrap_or("")
403 )
404 })?;
405 }
406 Ok(())
407 }
408
409 /// Update a single Git submodule, and recurse into its submodules.
410 fn update_submodule(
411 parent: &git2::Repository,
412 child: &mut git2::Submodule<'_>,
413 gctx: &GlobalContext,
414 parent_remote_url: &str,
415 ) -> CargoResult<()> {
416 child.init(false)?;
417
418 let child_url_str = child.url().ok_or_else(|| {
419 anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
420 })?;
421
422 // Skip the submodule if the config says not to update it.
423 if child.update_strategy() == git2::SubmoduleUpdate::None {
424 gctx.shell().status(
425 "Skipping",
426 format!(
427 "git submodule `{}` due to update strategy in .gitmodules",
428 child_url_str
429 ),
430 )?;
431 return Ok(());
432 }
433
434 let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
435
436 // A submodule which is listed in .gitmodules but not actually
437 // checked out will not have a head id, so we should ignore it.
438 let Some(head) = child.head_id() else {
439 return Ok(());
440 };
441
442 // If the submodule hasn't been checked out yet, we need to
443 // clone it. If it has been checked out and the head is the same
444 // as the submodule's head, then we can skip an update and keep
445 // recursing.
446 let head_and_repo = child.open().and_then(|repo| {
447 let target = repo.head()?.target();
448 Ok((target, repo))
449 });
450 let mut repo = match head_and_repo {
451 Ok((head, repo)) => {
452 if child.head_id() == head {
453 return update_submodules(&repo, gctx, &child_remote_url);
454 }
455 repo
456 }
457 Err(..) => {
458 let path = parent.workdir().unwrap().join(child.path());
459 let _ = paths::remove_dir_all(&path);
460 init(&path, false)?
461 }
462 };
463 // Fetch data from origin and reset to the head commit
464 let reference = GitReference::Rev(head.to_string());
465 gctx.shell()
466 .status("Updating", format!("git submodule `{child_remote_url}`"))?;
467 fetch(
468 &mut repo,
469 &child_remote_url,
470 &reference,
471 gctx,
472 RemoteKind::GitDependency,
473 )
474 .with_context(|| {
475 let name = child.name().unwrap_or("");
476 format!("failed to fetch submodule `{name}` from {child_remote_url}",)
477 })?;
478
479 let obj = repo.find_object(head, None)?;
480 reset(&repo, &obj, gctx)?;
481 update_submodules(&repo, gctx, &child_remote_url)
482 }
483 }
484}
485
486/// See [`GitCheckout::reset`] for rationale on this type.
487#[must_use]
488struct CheckoutGuard {
489 ok_file: PathBuf,
490}
491
492impl CheckoutGuard {
493 fn guard(path: &Path) -> Self {
494 let ok_file = path.join(CHECKOUT_READY_LOCK);
495 let _ = paths::remove_file(&ok_file);
496 Self { ok_file }
497 }
498
499 fn mark_ok(self) -> CargoResult<()> {
500 let _ = paths::create(self.ok_file)?;
501 Ok(())
502 }
503}
504
505/// Constructs an absolute URL for a child submodule URL with its parent base URL.
506///
507/// Git only assumes a submodule URL is a relative path if it starts with `./`
508/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
509/// submodule URL.
510///
511/// At this moment it comes with some limitations:
512///
513/// * GitHub doesn't accept non-normalized URLs with relative paths.
514/// (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
515/// * `url` crate cannot parse SCP-like URLs.
516/// (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
517///
518/// To overcome these, this patch always tries [`Url::parse`] first to normalize
519/// the path. If it couldn't, append the relative path as the last resort and
520/// pray the remote git service supports non-normalized URLs.
521///
522/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
523///
524/// [^1]: <https://git-scm.com/docs/git-submodule>
525fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
526 let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
527 match Url::parse(base_url) {
528 Ok(mut base_url) => {
529 let path = base_url.path();
530 if !path.ends_with('/') {
531 base_url.set_path(&format!("{path}/"));
532 }
533 let absolute_url = base_url.join(submodule_url).with_context(|| {
534 format!(
535 "failed to parse relative child submodule url `{submodule_url}` \
536 using parent base url `{base_url}`"
537 )
538 })?;
539 Cow::from(absolute_url.to_string())
540 }
541 Err(_) => {
542 let mut absolute_url = base_url.to_string();
543 if !absolute_url.ends_with('/') {
544 absolute_url.push('/');
545 }
546 absolute_url.push_str(submodule_url);
547 Cow::from(absolute_url)
548 }
549 }
550 } else {
551 Cow::from(submodule_url)
552 };
553
554 Ok(absolute_url)
555}
556
557/// Prepare the authentication callbacks for cloning a git repository.
558///
559/// The main purpose of this function is to construct the "authentication
560/// callback" which is used to clone a repository. This callback will attempt to
561/// find the right authentication on the system (without user input) and will
562/// guide libgit2 in doing so.
563///
564/// The callback is provided `allowed` types of credentials, and we try to do as
565/// much as possible based on that:
566///
567/// * Prioritize SSH keys from the local ssh agent as they're likely the most
568/// reliable. The username here is prioritized from the credential
569/// callback, then from whatever is configured in git itself, and finally
570/// we fall back to the generic user of `git`.
571///
572/// * If a username/password is allowed, then we fallback to git2-rs's
573/// implementation of the credential helper. This is what is configured
574/// with `credential.helper` in git, and is the interface for the macOS
575/// keychain, for example.
576///
577/// * After the above two have failed, we just kinda grapple attempting to
578/// return *something*.
579///
580/// If any form of authentication fails, libgit2 will repeatedly ask us for
581/// credentials until we give it a reason to not do so. To ensure we don't
582/// just sit here looping forever we keep track of authentications we've
583/// attempted and we don't try the same ones again.
584fn with_authentication<T, F>(
585 gctx: &GlobalContext,
586 url: &str,
587 cfg: &git2::Config,
588 mut f: F,
589) -> CargoResult<T>
590where
591 F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
592{
593 let mut cred_helper = git2::CredentialHelper::new(url);
594 cred_helper.config(cfg);
595
596 let mut ssh_username_requested = false;
597 let mut cred_helper_bad = None;
598 let mut ssh_agent_attempts = Vec::new();
599 let mut any_attempts = false;
600 let mut tried_sshkey = false;
601 let mut url_attempt = None;
602
603 let orig_url = url;
604 let mut res = f(&mut |url, username, allowed| {
605 any_attempts = true;
606 if url != orig_url {
607 url_attempt = Some(url.to_string());
608 }
609 // libgit2's "USERNAME" authentication actually means that it's just
610 // asking us for a username to keep going. This is currently only really
611 // used for SSH authentication and isn't really an authentication type.
612 // The logic currently looks like:
613 //
614 // let user = ...;
615 // if (user.is_null())
616 // user = callback(USERNAME, null, ...);
617 //
618 // callback(SSH_KEY, user, ...)
619 //
620 // So if we're being called here then we know that (a) we're using ssh
621 // authentication and (b) no username was specified in the URL that
622 // we're trying to clone. We need to guess an appropriate username here,
623 // but that may involve a few attempts. Unfortunately we can't switch
624 // usernames during one authentication session with libgit2, so to
625 // handle this we bail out of this authentication session after setting
626 // the flag `ssh_username_requested`, and then we handle this below.
627 if allowed.contains(git2::CredentialType::USERNAME) {
628 debug_assert!(username.is_none());
629 ssh_username_requested = true;
630 return Err(git2::Error::from_str("gonna try usernames later"));
631 }
632
633 // An "SSH_KEY" authentication indicates that we need some sort of SSH
634 // authentication. This can currently either come from the ssh-agent
635 // process or from a raw in-memory SSH key. Cargo only supports using
636 // ssh-agent currently.
637 //
638 // If we get called with this then the only way that should be possible
639 // is if a username is specified in the URL itself (e.g., `username` is
640 // Some), hence the unwrap() here. We try custom usernames down below.
641 if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
642 // If ssh-agent authentication fails, libgit2 will keep
643 // calling this callback asking for other authentication
644 // methods to try. Make sure we only try ssh-agent once,
645 // to avoid looping forever.
646 tried_sshkey = true;
647 let username = username.unwrap();
648 debug_assert!(!ssh_username_requested);
649 ssh_agent_attempts.push(username.to_string());
650 return git2::Cred::ssh_key_from_agent(username);
651 }
652
653 // Sometimes libgit2 will ask for a username/password in plaintext. This
654 // is where Cargo would have an interactive prompt if we supported it,
655 // but we currently don't! Right now the only way we support fetching a
656 // plaintext password is through the `credential.helper` support, so
657 // fetch that here.
658 //
659 // If ssh-agent authentication fails, libgit2 will keep calling this
660 // callback asking for other authentication methods to try. Check
661 // cred_helper_bad to make sure we only try the git credential helper
662 // once, to avoid looping forever.
663 if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
664 {
665 let r = git2::Cred::credential_helper(cfg, url, username);
666 cred_helper_bad = Some(r.is_err());
667 return r;
668 }
669
670 // I'm... not sure what the DEFAULT kind of authentication is, but seems
671 // easy to support?
672 if allowed.contains(git2::CredentialType::DEFAULT) {
673 return git2::Cred::default();
674 }
675
676 // Whelp, we tried our best
677 Err(git2::Error::from_str("no authentication methods succeeded"))
678 });
679
680 // Ok, so if it looks like we're going to be doing ssh authentication, we
681 // want to try a few different usernames as one wasn't specified in the URL
682 // for us to use. In order, we'll try:
683 //
684 // * A credential helper's username for this URL, if available.
685 // * This account's username.
686 // * "git"
687 //
688 // We have to restart the authentication session each time (due to
689 // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
690 // call our callback, `f`, in a loop here.
691 if ssh_username_requested {
692 debug_assert!(res.is_err());
693 let mut attempts = vec![String::from("git")];
694 if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
695 attempts.push(s.to_string());
696 }
697 if let Some(ref s) = cred_helper.username {
698 attempts.push(s.clone());
699 }
700
701 while let Some(s) = attempts.pop() {
702 // We should get `USERNAME` first, where we just return our attempt,
703 // and then after that we should get `SSH_KEY`. If the first attempt
704 // fails we'll get called again, but we don't have another option so
705 // we bail out.
706 let mut attempts = 0;
707 res = f(&mut |_url, username, allowed| {
708 if allowed.contains(git2::CredentialType::USERNAME) {
709 return git2::Cred::username(&s);
710 }
711 if allowed.contains(git2::CredentialType::SSH_KEY) {
712 debug_assert_eq!(Some(&s[..]), username);
713 attempts += 1;
714 if attempts == 1 {
715 ssh_agent_attempts.push(s.to_string());
716 return git2::Cred::ssh_key_from_agent(&s);
717 }
718 }
719 Err(git2::Error::from_str("no authentication methods succeeded"))
720 });
721
722 // If we made two attempts then that means:
723 //
724 // 1. A username was requested, we returned `s`.
725 // 2. An ssh key was requested, we returned to look up `s` in the
726 // ssh agent.
727 // 3. For whatever reason that lookup failed, so we were asked again
728 // for another mode of authentication.
729 //
730 // Essentially, if `attempts == 2` then in theory the only error was
731 // that this username failed to authenticate (e.g., no other network
732 // errors happened). Otherwise something else is funny so we bail
733 // out.
734 if attempts != 2 {
735 break;
736 }
737 }
738 }
739 let mut err = match res {
740 Ok(e) => return Ok(e),
741 Err(e) => e,
742 };
743
744 // In the case of an authentication failure (where we tried something) then
745 // we try to give a more helpful error message about precisely what we
746 // tried.
747 if any_attempts {
748 let mut msg = "failed to authenticate when downloading \
749 repository"
750 .to_string();
751
752 if let Some(attempt) = &url_attempt {
753 if url != attempt {
754 msg.push_str(": ");
755 msg.push_str(attempt);
756 }
757 }
758 msg.push('\n');
759 if !ssh_agent_attempts.is_empty() {
760 let names = ssh_agent_attempts
761 .iter()
762 .map(|s| format!("`{}`", s))
763 .collect::<Vec<_>>()
764 .join(", ");
765 msg.push_str(&format!(
766 "\n* attempted ssh-agent authentication, but \
767 no usernames succeeded: {}",
768 names
769 ));
770 }
771 if let Some(failed_cred_helper) = cred_helper_bad {
772 if failed_cred_helper {
773 msg.push_str(
774 "\n* attempted to find username/password via \
775 git's `credential.helper` support, but failed",
776 );
777 } else {
778 msg.push_str(
779 "\n* attempted to find username/password via \
780 `credential.helper`, but maybe the found \
781 credentials were incorrect",
782 );
783 }
784 }
785 msg.push_str("\n\n");
786 msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
787 msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
788 err = err.context(msg);
789
790 // Otherwise if we didn't even get to the authentication phase them we may
791 // have failed to set up a connection, in these cases hint on the
792 // `net.git-fetch-with-cli` configuration option.
793 } else if let Some(e) = err.downcast_ref::<git2::Error>() {
794 match e.class() {
795 ErrorClass::Net
796 | ErrorClass::Ssl
797 | ErrorClass::Submodule
798 | ErrorClass::FetchHead
799 | ErrorClass::Ssh
800 | ErrorClass::Http => {
801 let msg = format!(
802 concat!(
803 "network failure seems to have happened\n",
804 "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
805 "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
806 "{}"
807 ),
808 note_github_pull_request(url).unwrap_or_default()
809 );
810 err = err.context(msg);
811 }
812 ErrorClass::Callback => {
813 // This unwraps the git2 error. We're using the callback error
814 // specifically to convey errors from Rust land through the C
815 // callback interface. We don't need the `; class=Callback
816 // (26)` that gets tacked on to the git2 error message.
817 err = anyhow::format_err!("{}", e.message());
818 }
819 _ => {}
820 }
821 }
822
823 Err(err)
824}
825
826/// `git reset --hard` to the given `obj` for the `repo`.
827///
828/// The `obj` is a commit-ish to which the head should be moved.
829fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
830 let mut pb = Progress::new("Checkout", gctx);
831 let mut opts = git2::build::CheckoutBuilder::new();
832 opts.progress(|_, cur, max| {
833 drop(pb.tick(cur, max, ""));
834 });
835 debug!("doing reset");
836 repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
837 debug!("reset done");
838 Ok(())
839}
840
841/// Prepares the callbacks for fetching a git repository.
842///
843/// The main purpose of this function is to construct everything before a fetch.
844/// This will attempt to setup a progress bar, the authentication for git,
845/// ssh known hosts check, and the network retry mechanism.
846///
847/// The callback is provided a fetch options, which can be used by the actual
848/// git fetch.
849pub fn with_fetch_options(
850 git_config: &git2::Config,
851 url: &str,
852 gctx: &GlobalContext,
853 cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
854) -> CargoResult<()> {
855 let mut progress = Progress::new("Fetch", gctx);
856 let ssh_config = gctx.net_config()?.ssh.as_ref();
857 let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
858 let diagnostic_home_config = gctx.diagnostic_home_config();
859 network::retry::with_retry(gctx, || {
860 // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
861 // so we store the error additionally and unwrap it later
862 let mut check_cb_result = Ok(());
863 let auth_result = with_authentication(gctx, url, git_config, |f| {
864 let port = Url::parse(url).ok().and_then(|url| url.port());
865 let mut last_update = Instant::now();
866 let mut rcb = git2::RemoteCallbacks::new();
867 // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
868 // sliding window for tracking the data transfer rate (in bytes/s).
869 let mut counter = MetricsCounter::<10>::new(0, last_update);
870 rcb.credentials(f);
871 rcb.certificate_check(|cert, host| {
872 match super::known_hosts::certificate_check(
873 gctx,
874 cert,
875 host,
876 port,
877 config_known_hosts,
878 &diagnostic_home_config,
879 ) {
880 Ok(status) => Ok(status),
881 Err(e) => {
882 check_cb_result = Err(e);
883 // This is not really used because it'll be overridden by libgit2
884 // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
885 Err(git2::Error::from_str(
886 "invalid or unknown remote ssh hostkey",
887 ))
888 }
889 }
890 });
891 rcb.transfer_progress(|stats| {
892 let indexed_deltas = stats.indexed_deltas();
893 let msg = if indexed_deltas > 0 {
894 // Resolving deltas.
895 format!(
896 ", ({}/{}) resolving deltas",
897 indexed_deltas,
898 stats.total_deltas()
899 )
900 } else {
901 // Receiving objects.
902 //
903 // # Caveat
904 //
905 // Progress bar relies on git2 calling `transfer_progress`
906 // to update its transfer rate, but we cannot guarantee a
907 // periodic call of that callback. Thus if we don't receive
908 // any data for, say, 10 seconds, the rate will get stuck
909 // and never go down to 0B/s.
910 // In the future, we need to find away to update the rate
911 // even when the callback is not called.
912 let now = Instant::now();
913 // Scrape a `received_bytes` to the counter every 300ms.
914 if now - last_update > Duration::from_millis(300) {
915 counter.add(stats.received_bytes(), now);
916 last_update = now;
917 }
918 let rate = HumanBytes(counter.rate() as u64);
919 format!(", {rate:.2}/s")
920 };
921 progress
922 .tick(stats.indexed_objects(), stats.total_objects(), &msg)
923 .is_ok()
924 });
925
926 // Create a local anonymous remote in the repository to fetch the
927 // url
928 let mut opts = git2::FetchOptions::new();
929 opts.remote_callbacks(rcb);
930 cb(opts)
931 });
932 if auth_result.is_err() {
933 check_cb_result?;
934 }
935 auth_result?;
936 Ok(())
937 })
938}
939
940/// Attempts to fetch the given git `reference` for a Git repository.
941///
942/// This is the main entry for git clone/fetch. It does the followings:
943///
944/// * Turns [`GitReference`] into refspecs accordingly.
945/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
946///
947/// The `remote_url` argument is the git remote URL where we want to fetch from.
948///
949/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
950/// at this time. It could be extended when libgit2 supports shallow clones.
951///
952/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
953pub fn fetch(
954 repo: &mut git2::Repository,
955 remote_url: &str,
956 reference: &GitReference,
957 gctx: &GlobalContext,
958 remote_kind: RemoteKind,
959) -> CargoResult<()> {
960 if let Some(offline_flag) = gctx.offline_flag() {
961 anyhow::bail!(
962 "attempting to update a git repository, but {offline_flag} \
963 was specified"
964 )
965 }
966
967 let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
968
969 // Flag to keep track if the rev is a full commit hash
970 let mut fast_path_rev: bool = false;
971
972 let oid_to_fetch = match github_fast_path(repo, remote_url, reference, gctx) {
973 Ok(FastPathRev::UpToDate) => return Ok(()),
974 Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
975 Ok(FastPathRev::Indeterminate) => None,
976 Err(e) => {
977 debug!("failed to check github {:?}", e);
978 None
979 }
980 };
981
982 maybe_gc_repo(repo, gctx)?;
983
984 clean_repo_temp_files(repo);
985
986 // Translate the reference desired here into an actual list of refspecs
987 // which need to get fetched. Additionally record if we're fetching tags.
988 let mut refspecs = Vec::new();
989 let mut tags = false;
990 // The `+` symbol on the refspec means to allow a forced (fast-forward)
991 // update which is needed if there is ever a force push that requires a
992 // fast-forward.
993 match reference {
994 // For branches and tags we can fetch simply one reference and copy it
995 // locally, no need to fetch other branches/tags.
996 GitReference::Branch(b) => {
997 refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
998 }
999
1000 GitReference::Tag(t) => {
1001 refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
1002 }
1003
1004 GitReference::DefaultBranch => {
1005 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1006 }
1007
1008 GitReference::Rev(rev) => {
1009 if rev.starts_with("refs/") {
1010 refspecs.push(format!("+{0}:{0}", rev));
1011 } else if let Some(oid_to_fetch) = oid_to_fetch {
1012 fast_path_rev = true;
1013 refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1014 } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1015 && rev.parse::<Oid>().is_ok()
1016 {
1017 // There is a specific commit to fetch and we will do so in shallow-mode only
1018 // to not disturb the previous logic.
1019 // Note that with typical settings for shallowing, we will just fetch a single `rev`
1020 // as single commit.
1021 // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1022 // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1023 refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1024 } else {
1025 // We don't know what the rev will point to. To handle this
1026 // situation we fetch all branches and tags, and then we pray
1027 // it's somewhere in there.
1028 refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1029 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1030 tags = true;
1031 }
1032 }
1033 }
1034
1035 debug!("doing a fetch for {remote_url}");
1036 let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1037 fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
1038 } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1039 fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1040 } else {
1041 fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1042 };
1043
1044 if fast_path_rev {
1045 if let Some(oid) = oid_to_fetch {
1046 return result.with_context(|| format!("revision {} not found", oid));
1047 }
1048 }
1049 result
1050}
1051
1052/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1053/// files atomically.
1054/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1055/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1056fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1057 matches!(
1058 err,
1059 gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1060 gix::protocol::fetch::Error::LockShallowFile(_)
1061 ))
1062 )
1063}
1064
1065/// Attempts to use `git` CLI installed on the system to fetch a repository,
1066/// when the config value [`net.git-fetch-with-cli`][1] is set.
1067///
1068/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1069/// when compared to the `git` command line. As a result, allow an escape
1070/// hatch for users that would prefer to use `git`-the-CLI for fetching
1071/// repositories instead of `libgit2`-the-library. This should make more
1072/// flavors of authentication possible while also still giving us all the
1073/// speed and portability of using `libgit2`.
1074///
1075/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1076fn fetch_with_cli(
1077 repo: &mut git2::Repository,
1078 url: &str,
1079 refspecs: &[String],
1080 tags: bool,
1081 shallow: gix::remote::fetch::Shallow,
1082 gctx: &GlobalContext,
1083) -> CargoResult<()> {
1084 debug!(target: "git-fetch", backend = "git-cli");
1085
1086 let mut cmd = ProcessBuilder::new("git");
1087 cmd.arg("fetch");
1088 if tags {
1089 cmd.arg("--tags");
1090 } else {
1091 cmd.arg("--no-tags");
1092 }
1093 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1094 let depth = 0i32.saturating_add_unsigned(depth.get());
1095 cmd.arg(format!("--depth={depth}"));
1096 }
1097 match gctx.shell().verbosity() {
1098 Verbosity::Normal => {}
1099 Verbosity::Verbose => {
1100 cmd.arg("--verbose");
1101 }
1102 Verbosity::Quiet => {
1103 cmd.arg("--quiet");
1104 }
1105 }
1106 cmd.arg("--force") // handle force pushes
1107 .arg("--update-head-ok") // see discussion in #2078
1108 .arg(url)
1109 .args(refspecs)
1110 // If cargo is run by git (for example, the `exec` command in `git
1111 // rebase`), the GIT_DIR is set by git and will point to the wrong
1112 // location. This makes sure GIT_DIR is always the repository path.
1113 .env("GIT_DIR", repo.path())
1114 // The reset of these may not be necessary, but I'm including them
1115 // just to be extra paranoid and avoid any issues.
1116 .env_remove("GIT_WORK_TREE")
1117 .env_remove("GIT_INDEX_FILE")
1118 .env_remove("GIT_OBJECT_DIRECTORY")
1119 .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1120 .cwd(repo.path());
1121 gctx.shell()
1122 .verbose(|s| s.status("Running", &cmd.to_string()))?;
1123 network::retry::with_retry(gctx, || {
1124 cmd.exec()
1125 .map_err(|error| GitCliError::new(error, true).into())
1126 })?;
1127
1128 Ok(())
1129}
1130
1131fn fetch_with_gitoxide(
1132 repo: &mut git2::Repository,
1133 remote_url: &str,
1134 refspecs: Vec<String>,
1135 tags: bool,
1136 shallow: gix::remote::fetch::Shallow,
1137 gctx: &GlobalContext,
1138) -> CargoResult<()> {
1139 debug!(target: "git-fetch", backend = "gitoxide");
1140
1141 let git2_repo = repo;
1142 let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1143 let repo_reinitialized = AtomicBool::default();
1144 let res = oxide::with_retry_and_progress(
1145 git2_repo.path(),
1146 gctx,
1147 remote_url,
1148 &|repo_path,
1149 should_interrupt,
1150 mut progress,
1151 url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1152 // The `fetch` operation here may fail spuriously due to a corrupt
1153 // repository. It could also fail, however, for a whole slew of other
1154 // reasons (aka network related reasons). We want Cargo to automatically
1155 // recover from corrupt repositories, but we don't want Cargo to stomp
1156 // over other legitimate errors.
1157 //
1158 // Consequently we save off the error of the `fetch` operation and if it
1159 // looks like a "corrupt repo" error then we blow away the repo and try
1160 // again. If it looks like any other kind of error, or if we've already
1161 // blown away the repository, then we want to return the error as-is.
1162 loop {
1163 let res = oxide::open_repo(
1164 repo_path,
1165 config_overrides.clone(),
1166 oxide::OpenMode::ForFetch,
1167 )
1168 .map_err(crate::sources::git::fetch::Error::from)
1169 .and_then(|repo| {
1170 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1171 let url_for_authentication = &mut *url_for_authentication;
1172 let remote = repo
1173 .remote_at(remote_url)?
1174 .with_fetch_tags(if tags {
1175 gix::remote::fetch::Tags::All
1176 } else {
1177 gix::remote::fetch::Tags::Included
1178 })
1179 .with_refspecs(
1180 refspecs.iter().map(|s| s.as_str()),
1181 gix::remote::Direction::Fetch,
1182 )
1183 .map_err(crate::sources::git::fetch::Error::Other)?;
1184 let url = remote
1185 .url(gix::remote::Direction::Fetch)
1186 .expect("set at init")
1187 .to_owned();
1188 let connection = remote.connect(gix::remote::Direction::Fetch)?;
1189 let mut authenticate = connection.configured_credentials(url)?;
1190 let connection = connection.with_credentials(
1191 move |action: gix::protocol::credentials::helper::Action| {
1192 if let Some(url) = action
1193 .context()
1194 .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1195 {
1196 url_for_authentication(url.as_ref());
1197 }
1198 authenticate(action)
1199 },
1200 );
1201 let outcome = connection
1202 .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1203 .with_shallow(shallow.clone())
1204 .receive(&mut progress, should_interrupt)?;
1205 Ok(outcome)
1206 });
1207 let err = match res {
1208 Ok(_) => break,
1209 Err(e) => e,
1210 };
1211 debug!("fetch failed: {}", err);
1212
1213 if !repo_reinitialized.load(Ordering::Relaxed)
1214 // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1215 // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1216 // folder before writing files into it, or else not even open a directory as git repository (which is
1217 // also handled here).
1218 && err.is_corrupted()
1219 || has_shallow_lock_file(&err)
1220 {
1221 repo_reinitialized.store(true, Ordering::Relaxed);
1222 debug!(
1223 "looks like this is a corrupt repository, reinitializing \
1224 and trying again"
1225 );
1226 if oxide::reinitialize(repo_path).is_ok() {
1227 continue;
1228 }
1229 }
1230
1231 return Err(err.into());
1232 }
1233 Ok(())
1234 },
1235 );
1236 if repo_reinitialized.load(Ordering::Relaxed) {
1237 *git2_repo = git2::Repository::open(git2_repo.path())?;
1238 }
1239 res
1240}
1241
1242fn fetch_with_libgit2(
1243 repo: &mut git2::Repository,
1244 remote_url: &str,
1245 refspecs: Vec<String>,
1246 tags: bool,
1247 shallow: gix::remote::fetch::Shallow,
1248 gctx: &GlobalContext,
1249) -> CargoResult<()> {
1250 debug!(target: "git-fetch", backend = "libgit2");
1251
1252 let git_config = git2::Config::open_default()?;
1253 with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1254 if tags {
1255 opts.download_tags(git2::AutotagOption::All);
1256 }
1257 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1258 opts.depth(0i32.saturating_add_unsigned(depth.get()));
1259 }
1260 // The `fetch` operation here may fail spuriously due to a corrupt
1261 // repository. It could also fail, however, for a whole slew of other
1262 // reasons (aka network related reasons). We want Cargo to automatically
1263 // recover from corrupt repositories, but we don't want Cargo to stomp
1264 // over other legitimate errors.
1265 //
1266 // Consequently we save off the error of the `fetch` operation and if it
1267 // looks like a "corrupt repo" error then we blow away the repo and try
1268 // again. If it looks like any other kind of error, or if we've already
1269 // blown away the repository, then we want to return the error as-is.
1270 let mut repo_reinitialized = false;
1271 loop {
1272 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1273 let res = repo
1274 .remote_anonymous(remote_url)?
1275 .fetch(&refspecs, Some(&mut opts), None);
1276 let err = match res {
1277 Ok(()) => break,
1278 Err(e) => e,
1279 };
1280 debug!("fetch failed: {}", err);
1281
1282 if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1283 {
1284 repo_reinitialized = true;
1285 debug!(
1286 "looks like this is a corrupt repository, reinitializing \
1287 and trying again"
1288 );
1289 if reinitialize(repo).is_ok() {
1290 continue;
1291 }
1292 }
1293
1294 return Err(err.into());
1295 }
1296 Ok(())
1297 })
1298}
1299
1300/// Attempts to `git gc` a repository.
1301///
1302/// Cargo has a bunch of long-lived git repositories in its global cache and
1303/// some, like the index, are updated very frequently. Right now each update
1304/// creates a new "pack file" inside the git database, and over time this can
1305/// cause bad performance and bad current behavior in libgit2.
1306///
1307/// One pathological use case today is where libgit2 opens hundreds of file
1308/// descriptors, getting us dangerously close to blowing out the OS limits of
1309/// how many fds we can have open. This is detailed in [#4403].
1310///
1311/// To try to combat this problem we attempt a `git gc` here. Note, though, that
1312/// we may not even have `git` installed on the system! As a result we
1313/// opportunistically try a `git gc` when the pack directory looks too big, and
1314/// failing that we just blow away the repository and start over.
1315///
1316/// In theory this shouldn't be too expensive compared to the network request
1317/// we're about to issue.
1318///
1319/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1320fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1321 // Here we arbitrarily declare that if you have more than 100 files in your
1322 // `pack` folder that we need to do a gc.
1323 let entries = match repo.path().join("objects/pack").read_dir() {
1324 Ok(e) => e.count(),
1325 Err(_) => {
1326 debug!("skipping gc as pack dir appears gone");
1327 return Ok(());
1328 }
1329 };
1330 let max = gctx
1331 .get_env("__CARGO_PACKFILE_LIMIT")
1332 .ok()
1333 .and_then(|s| s.parse::<usize>().ok())
1334 .unwrap_or(100);
1335 if entries < max {
1336 debug!("skipping gc as there's only {} pack files", entries);
1337 return Ok(());
1338 }
1339
1340 // First up, try a literal `git gc` by shelling out to git. This is pretty
1341 // likely to fail though as we may not have `git` installed. Note that
1342 // libgit2 doesn't currently implement the gc operation, so there's no
1343 // equivalent there.
1344 match Command::new("git")
1345 .arg("gc")
1346 .current_dir(repo.path())
1347 .output()
1348 {
1349 Ok(out) => {
1350 debug!(
1351 "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1352 out.status,
1353 String::from_utf8_lossy(&out.stdout),
1354 String::from_utf8_lossy(&out.stderr)
1355 );
1356 if out.status.success() {
1357 let new = git2::Repository::open(repo.path())?;
1358 *repo = new;
1359 return Ok(());
1360 }
1361 }
1362 Err(e) => debug!("git-gc failed to spawn: {}", e),
1363 }
1364
1365 // Alright all else failed, let's start over.
1366 reinitialize(repo)
1367}
1368
1369/// Removes temporary files left from previous activity.
1370///
1371/// If libgit2 is interrupted while indexing pack files, it will leave behind
1372/// some temporary files that it doesn't clean up. These can be quite large in
1373/// size, so this tries to clean things up.
1374///
1375/// This intentionally ignores errors. This is only an opportunistic cleaning,
1376/// and we don't really care if there are issues (there's unlikely anything
1377/// that can be done).
1378///
1379/// The git CLI has similar behavior (its temp files look like
1380/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1381/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1382/// filenames, so they never get cleaned up.
1383fn clean_repo_temp_files(repo: &git2::Repository) {
1384 let path = repo.path().join("objects/pack/pack_git2_*");
1385 let Some(pattern) = path.to_str() else {
1386 tracing::warn!("cannot convert {path:?} to a string");
1387 return;
1388 };
1389 let Ok(paths) = glob::glob(pattern) else {
1390 return;
1391 };
1392 for path in paths {
1393 if let Ok(path) = path {
1394 match paths::remove_file(&path) {
1395 Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1396 Err(e) => {
1397 tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1398 }
1399 }
1400 }
1401 }
1402}
1403
1404/// Reinitializes a given Git repository. This is useful when a Git repository
1405/// seems corrupted and we want to start over.
1406fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1407 // Here we want to drop the current repository object pointed to by `repo`,
1408 // so we initialize temporary repository in a sub-folder, blow away the
1409 // existing git folder, and then recreate the git repo. Finally we blow away
1410 // the `tmp` folder we allocated.
1411 let path = repo.path().to_path_buf();
1412 debug!("reinitializing git repo at {:?}", path);
1413 let tmp = path.join("tmp");
1414 let bare = !repo.path().ends_with(".git");
1415 *repo = init(&tmp, false)?;
1416 for entry in path.read_dir()? {
1417 let entry = entry?;
1418 if entry.file_name().to_str() == Some("tmp") {
1419 continue;
1420 }
1421 let path = entry.path();
1422 drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1423 }
1424 *repo = init(&path, bare)?;
1425 paths::remove_dir_all(&tmp)?;
1426 Ok(())
1427}
1428
1429/// Initializes a Git repository at `path`.
1430fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1431 let mut opts = git2::RepositoryInitOptions::new();
1432 // Skip anything related to templates, they just call all sorts of issues as
1433 // we really don't want to use them yet they insist on being used. See #6240
1434 // for an example issue that comes up.
1435 opts.external_template(false);
1436 opts.bare(bare);
1437 Ok(git2::Repository::init_opts(&path, &opts)?)
1438}
1439
1440/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1441enum FastPathRev {
1442 /// The local rev (determined by `reference.resolve(repo)`) is already up to
1443 /// date with what this rev resolves to on GitHub's server.
1444 UpToDate,
1445 /// The following SHA must be fetched in order for the local rev to become
1446 /// up to date.
1447 NeedsFetch(Oid),
1448 /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1449 /// and tags from the server and see what happens.
1450 Indeterminate,
1451}
1452
1453/// Attempts GitHub's special fast path for testing if we've already got an
1454/// up-to-date copy of the repository.
1455///
1456/// Updating the index is done pretty regularly so we want it to be as fast as
1457/// possible. For registries hosted on GitHub (like the crates.io index) there's
1458/// a fast path available to use[^1] to tell us that there's no updates to be
1459/// made.
1460///
1461/// Note that this function should never cause an actual failure because it's
1462/// just a fast path. As a result, a caller should ignore `Err` returned from
1463/// this function and move forward on the normal path.
1464///
1465/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1466fn github_fast_path(
1467 repo: &mut git2::Repository,
1468 url: &str,
1469 reference: &GitReference,
1470 gctx: &GlobalContext,
1471) -> CargoResult<FastPathRev> {
1472 let url = Url::parse(url)?;
1473 if !is_github(&url) {
1474 return Ok(FastPathRev::Indeterminate);
1475 }
1476
1477 let local_object = resolve_ref(reference, repo).ok();
1478
1479 let github_branch_name = match reference {
1480 GitReference::Branch(branch) => branch,
1481 GitReference::Tag(tag) => tag,
1482 GitReference::DefaultBranch => "HEAD",
1483 GitReference::Rev(rev) => {
1484 if rev.starts_with("refs/") {
1485 rev
1486 } else if looks_like_commit_hash(rev) {
1487 // `revparse_single` (used by `resolve`) is the only way to turn
1488 // short hash -> long hash, but it also parses other things,
1489 // like branch and tag names, which might coincidentally be
1490 // valid hex.
1491 //
1492 // We only return early if `rev` is a prefix of the object found
1493 // by `revparse_single`. Don't bother talking to GitHub in that
1494 // case, since commit hashes are permanent. If a commit with the
1495 // requested hash is already present in the local clone, its
1496 // contents must be the same as what is on the server for that
1497 // hash.
1498 //
1499 // If `rev` is not found locally by `revparse_single`, we'll
1500 // need GitHub to resolve it and get a hash. If `rev` is found
1501 // but is not a short hash of the found object, it's probably a
1502 // branch and we also need to get a hash from GitHub, in case
1503 // the branch has moved.
1504 if let Some(local_object) = local_object {
1505 if is_short_hash_of(rev, local_object) {
1506 debug!("github fast path already has {local_object}");
1507 return Ok(FastPathRev::UpToDate);
1508 }
1509 }
1510 // If `rev` is a full commit hash, the only thing it can resolve
1511 // to is itself. Don't bother talking to GitHub in that case
1512 // either. (This ensures that we always attempt to fetch the
1513 // commit directly even if we can't reach the GitHub API.)
1514 if let Some(oid) = rev_to_oid(rev) {
1515 debug!("github fast path is already a full commit hash {rev}");
1516 return Ok(FastPathRev::NeedsFetch(oid));
1517 }
1518 rev
1519 } else {
1520 debug!("can't use github fast path with `rev = \"{}\"`", rev);
1521 return Ok(FastPathRev::Indeterminate);
1522 }
1523 }
1524 };
1525
1526 // This expects GitHub urls in the form `github.com/user/repo` and nothing
1527 // else
1528 let mut pieces = url
1529 .path_segments()
1530 .ok_or_else(|| anyhow!("no path segments on url"))?;
1531 let username = pieces
1532 .next()
1533 .ok_or_else(|| anyhow!("couldn't find username"))?;
1534 let repository = pieces
1535 .next()
1536 .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1537 if pieces.next().is_some() {
1538 anyhow::bail!("too many segments on URL");
1539 }
1540
1541 // Trim off the `.git` from the repository, if present, since that's
1542 // optional for GitHub and won't work when we try to use the API as well.
1543 let repository = repository.strip_suffix(".git").unwrap_or(repository);
1544
1545 let url = format!(
1546 "https://api.github.com/repos/{}/{}/commits/{}",
1547 username, repository, github_branch_name,
1548 );
1549 let mut handle = gctx.http()?.lock().unwrap();
1550 debug!("attempting GitHub fast path for {}", url);
1551 handle.get(true)?;
1552 handle.url(&url)?;
1553 handle.useragent("cargo")?;
1554 handle.follow_location(true)?; // follow redirects
1555 handle.http_headers({
1556 let mut headers = List::new();
1557 headers.append("Accept: application/vnd.github.3.sha")?;
1558 if let Some(local_object) = local_object {
1559 headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1560 }
1561 headers
1562 })?;
1563
1564 let mut response_body = Vec::new();
1565 let mut transfer = handle.transfer();
1566 transfer.write_function(|data| {
1567 response_body.extend_from_slice(data);
1568 Ok(data.len())
1569 })?;
1570 transfer.perform()?;
1571 drop(transfer); // end borrow of handle so that response_code can be called
1572
1573 let response_code = handle.response_code()?;
1574 if response_code == 304 {
1575 debug!("github fast path up-to-date");
1576 Ok(FastPathRev::UpToDate)
1577 } else if response_code == 200 {
1578 let oid_to_fetch = str::from_utf8(&response_body)?.parse::<Oid>()?;
1579 debug!("github fast path fetch {oid_to_fetch}");
1580 Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1581 } else {
1582 // Usually response_code == 404 if the repository does not exist, and
1583 // response_code == 422 if exists but GitHub is unable to resolve the
1584 // requested rev.
1585 debug!("github fast path bad response code {response_code}");
1586 Ok(FastPathRev::Indeterminate)
1587 }
1588}
1589
1590/// Whether a `url` is one from GitHub.
1591fn is_github(url: &Url) -> bool {
1592 url.host_str() == Some("github.com")
1593}
1594
1595// Give some messages on GitHub PR URL given as is
1596pub(crate) fn note_github_pull_request(url: &str) -> Option<String> {
1597 if let Ok(url) = url.parse::<Url>()
1598 && is_github(&url)
1599 {
1600 let path_segments = url
1601 .path_segments()
1602 .map(|p| p.into_iter().collect::<Vec<_>>())
1603 .unwrap_or_default();
1604 if let [owner, repo, "pull", pr_number, ..] = path_segments[..] {
1605 let repo_url = format!("https://github.com/{owner}/{repo}.git");
1606 let rev = format!("refs/pull/{pr_number}/head");
1607 return Some(format!(
1608 concat!(
1609 "\n\nnote: GitHub url {} is not a repository. \n",
1610 "help: Replace the dependency with \n",
1611 " `git = \"{}\" rev = \"{}\"` \n",
1612 " to specify pull requests as dependencies' revision."
1613 ),
1614 url, repo_url, rev
1615 ));
1616 }
1617 }
1618
1619 None
1620}
1621
1622/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1623fn looks_like_commit_hash(rev: &str) -> bool {
1624 rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1625}
1626
1627/// Whether `rev` is a shorter hash of `oid`.
1628fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1629 let long_hash = oid.to_string();
1630 match long_hash.get(..rev.len()) {
1631 Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1632 None => false,
1633 }
1634}
1635
1636#[cfg(test)]
1637mod tests {
1638 use super::absolute_submodule_url;
1639
1640 #[test]
1641 fn test_absolute_submodule_url() {
1642 let cases = [
1643 (
1644 "ssh://git@gitub.com/rust-lang/cargo",
1645 "git@github.com:rust-lang/cargo.git",
1646 "git@github.com:rust-lang/cargo.git",
1647 ),
1648 (
1649 "ssh://git@gitub.com/rust-lang/cargo",
1650 "./",
1651 "ssh://git@gitub.com/rust-lang/cargo/",
1652 ),
1653 (
1654 "ssh://git@gitub.com/rust-lang/cargo",
1655 "../",
1656 "ssh://git@gitub.com/rust-lang/",
1657 ),
1658 (
1659 "ssh://git@gitub.com/rust-lang/cargo",
1660 "./foo",
1661 "ssh://git@gitub.com/rust-lang/cargo/foo",
1662 ),
1663 (
1664 "ssh://git@gitub.com/rust-lang/cargo/",
1665 "./foo",
1666 "ssh://git@gitub.com/rust-lang/cargo/foo",
1667 ),
1668 (
1669 "ssh://git@gitub.com/rust-lang/cargo/",
1670 "../foo",
1671 "ssh://git@gitub.com/rust-lang/foo",
1672 ),
1673 (
1674 "ssh://git@gitub.com/rust-lang/cargo",
1675 "../foo",
1676 "ssh://git@gitub.com/rust-lang/foo",
1677 ),
1678 (
1679 "ssh://git@gitub.com/rust-lang/cargo",
1680 "../foo/bar/../baz",
1681 "ssh://git@gitub.com/rust-lang/foo/baz",
1682 ),
1683 (
1684 "git@github.com:rust-lang/cargo.git",
1685 "ssh://git@gitub.com/rust-lang/cargo",
1686 "ssh://git@gitub.com/rust-lang/cargo",
1687 ),
1688 (
1689 "git@github.com:rust-lang/cargo.git",
1690 "./",
1691 "git@github.com:rust-lang/cargo.git/./",
1692 ),
1693 (
1694 "git@github.com:rust-lang/cargo.git",
1695 "../",
1696 "git@github.com:rust-lang/cargo.git/../",
1697 ),
1698 (
1699 "git@github.com:rust-lang/cargo.git",
1700 "./foo",
1701 "git@github.com:rust-lang/cargo.git/./foo",
1702 ),
1703 (
1704 "git@github.com:rust-lang/cargo.git/",
1705 "./foo",
1706 "git@github.com:rust-lang/cargo.git/./foo",
1707 ),
1708 (
1709 "git@github.com:rust-lang/cargo.git",
1710 "../foo",
1711 "git@github.com:rust-lang/cargo.git/../foo",
1712 ),
1713 (
1714 "git@github.com:rust-lang/cargo.git/",
1715 "../foo",
1716 "git@github.com:rust-lang/cargo.git/../foo",
1717 ),
1718 (
1719 "git@github.com:rust-lang/cargo.git",
1720 "../foo/bar/../baz",
1721 "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1722 ),
1723 ];
1724
1725 for (base_url, submodule_url, expected) in cases {
1726 let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1727 assert_eq!(
1728 expected, url,
1729 "base `{base_url}`; submodule `{submodule_url}`"
1730 );
1731 }
1732 }
1733}
1734
1735/// Turns a full commit hash revision into an oid.
1736///
1737/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1738/// Its length must be double to the underlying bytes (40 or 64),
1739/// otherwise libgit2 would happily zero-pad the returned oid.
1740///
1741/// See:
1742///
1743/// * <https://github.com/rust-lang/cargo/issues/13188>
1744/// * <https://github.com/rust-lang/cargo/issues/13968>
1745pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1746 Oid::from_str(rev)
1747 .ok()
1748 .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1749}