cargo/sources/git/utils.rs
1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, Verbosity};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::util::errors::CargoResult;
9use crate::util::{
10 human_readable_bytes, network, GlobalContext, IntoUrl, MetricsCounter, Progress,
11};
12use anyhow::{anyhow, Context as _};
13use cargo_util::{paths, ProcessBuilder};
14use curl::easy::List;
15use git2::{ErrorClass, ObjectType, Oid};
16use serde::ser;
17use serde::Serialize;
18use std::borrow::Cow;
19use std::fmt;
20use std::path::{Path, PathBuf};
21use std::process::Command;
22use std::str;
23use std::sync::atomic::{AtomicBool, Ordering};
24use std::time::{Duration, Instant};
25use tracing::{debug, info};
26use url::Url;
27
28/// A file indicates that if present, `git reset` has been done and a repo
29/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
30const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
31
32fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
33where
34 T: fmt::Display,
35 S: ser::Serializer,
36{
37 s.collect_str(t)
38}
39
40/// A short abbreviated OID.
41///
42/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
43pub struct GitShortID(git2::Buf);
44
45impl GitShortID {
46 /// Views the short ID as a `str`.
47 pub fn as_str(&self) -> &str {
48 self.0.as_str().unwrap()
49 }
50}
51
52/// A remote repository. It gets cloned into a local [`GitDatabase`].
53#[derive(PartialEq, Clone, Debug, Serialize)]
54pub struct GitRemote {
55 /// URL to a remote repository.
56 #[serde(serialize_with = "serialize_str")]
57 url: Url,
58}
59
60/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
61/// can be cloned from a single [`GitDatabase`].
62pub struct GitDatabase {
63 /// The remote repository where this database is fetched from.
64 remote: GitRemote,
65 /// Path to the root of the underlying Git repository on the local filesystem.
66 path: PathBuf,
67 /// Underlying Git repository instance for this database.
68 repo: git2::Repository,
69}
70
71/// A local checkout of a particular revision from a [`GitDatabase`].
72pub struct GitCheckout<'a> {
73 /// The git database where this checkout is cloned from.
74 database: &'a GitDatabase,
75 /// Path to the root of the underlying Git repository on the local filesystem.
76 path: PathBuf,
77 /// The git revision this checkout is for.
78 revision: git2::Oid,
79 /// Underlying Git repository instance for this checkout.
80 repo: git2::Repository,
81}
82
83impl GitRemote {
84 /// Creates an instance for a remote repository URL.
85 pub fn new(url: &Url) -> GitRemote {
86 GitRemote { url: url.clone() }
87 }
88
89 /// Gets the remote repository URL.
90 pub fn url(&self) -> &Url {
91 &self.url
92 }
93
94 /// Fetches and checkouts to a reference or a revision from this remote
95 /// into a local path.
96 ///
97 /// This ensures that it gets the up-to-date commit when a named reference
98 /// is given (tag, branch, refs/*). Thus, network connection is involved.
99 ///
100 /// If we have a previous instance of [`GitDatabase`] then fetch into that
101 /// if we can. If that can successfully load our revision then we've
102 /// populated the database with the latest version of `reference`, so
103 /// return that database and the rev we resolve to.
104 pub fn checkout(
105 &self,
106 into: &Path,
107 db: Option<GitDatabase>,
108 reference: &GitReference,
109 gctx: &GlobalContext,
110 ) -> CargoResult<(GitDatabase, git2::Oid)> {
111 if let Some(mut db) = db {
112 fetch(
113 &mut db.repo,
114 self.url.as_str(),
115 reference,
116 gctx,
117 RemoteKind::GitDependency,
118 )
119 .with_context(|| format!("failed to fetch into: {}", into.display()))?;
120
121 if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
122 return Ok((db, rev));
123 }
124 }
125
126 // Otherwise start from scratch to handle corrupt git repositories.
127 // After our fetch (which is interpreted as a clone now) we do the same
128 // resolution to figure out what we cloned.
129 if into.exists() {
130 paths::remove_dir_all(into)?;
131 }
132 paths::create_dir_all(into)?;
133 let mut repo = init(into, true)?;
134 fetch(
135 &mut repo,
136 self.url.as_str(),
137 reference,
138 gctx,
139 RemoteKind::GitDependency,
140 )
141 .with_context(|| format!("failed to clone into: {}", into.display()))?;
142 let rev = resolve_ref(reference, &repo)?;
143
144 Ok((
145 GitDatabase {
146 remote: self.clone(),
147 path: into.to_path_buf(),
148 repo,
149 },
150 rev,
151 ))
152 }
153
154 /// Creates a [`GitDatabase`] of this remote at `db_path`.
155 pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
156 let repo = git2::Repository::open(db_path)?;
157 Ok(GitDatabase {
158 remote: self.clone(),
159 path: db_path.to_path_buf(),
160 repo,
161 })
162 }
163}
164
165impl GitDatabase {
166 /// Checkouts to a revision at `dest`ination from this database.
167 #[tracing::instrument(skip(self, gctx))]
168 pub fn copy_to(
169 &self,
170 rev: git2::Oid,
171 dest: &Path,
172 gctx: &GlobalContext,
173 ) -> CargoResult<GitCheckout<'_>> {
174 // If the existing checkout exists, and it is fresh, use it.
175 // A non-fresh checkout can happen if the checkout operation was
176 // interrupted. In that case, the checkout gets deleted and a new
177 // clone is created.
178 let checkout = match git2::Repository::open(dest)
179 .ok()
180 .map(|repo| GitCheckout::new(self, rev, repo))
181 .filter(|co| co.is_fresh())
182 {
183 Some(co) => co,
184 None => {
185 let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
186 checkout.update_submodules(gctx)?;
187 guard.mark_ok()?;
188 checkout
189 }
190 };
191
192 Ok(checkout)
193 }
194
195 /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
196 pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
197 let obj = self.repo.find_object(revision, None)?;
198 Ok(GitShortID(obj.short_id()?))
199 }
200
201 /// Checks if the database contains the object of this `oid`..
202 pub fn contains(&self, oid: git2::Oid) -> bool {
203 self.repo.revparse_single(&oid.to_string()).is_ok()
204 }
205
206 /// [`resolve_ref`]s this reference with this database.
207 pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
208 resolve_ref(r, &self.repo)
209 }
210}
211
212/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
213pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
214 let id = match gitref {
215 // Note that we resolve the named tag here in sync with where it's
216 // fetched into via `fetch` below.
217 GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
218 let refname = format!("refs/remotes/origin/tags/{}", s);
219 let id = repo.refname_to_id(&refname)?;
220 let obj = repo.find_object(id, None)?;
221 let obj = obj.peel(ObjectType::Commit)?;
222 Ok(obj.id())
223 })()
224 .with_context(|| format!("failed to find tag `{}`", s))?,
225
226 // Resolve the remote name since that's all we're configuring in
227 // `fetch` below.
228 GitReference::Branch(s) => {
229 let name = format!("origin/{}", s);
230 let b = repo
231 .find_branch(&name, git2::BranchType::Remote)
232 .with_context(|| format!("failed to find branch `{}`", s))?;
233 b.get()
234 .target()
235 .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
236 }
237
238 // We'll be using the HEAD commit
239 GitReference::DefaultBranch => {
240 let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
241 let head = repo.find_object(head_id, None)?;
242 head.peel(ObjectType::Commit)?.id()
243 }
244
245 GitReference::Rev(s) => {
246 let obj = repo.revparse_single(s)?;
247 match obj.as_tag() {
248 Some(tag) => tag.target_id(),
249 None => obj.id(),
250 }
251 }
252 };
253 Ok(id)
254}
255
256impl<'a> GitCheckout<'a> {
257 /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
258 /// is done. Use [`GitCheckout::is_fresh`] to check.
259 ///
260 /// * The `database` is where this checkout is from.
261 /// * The `repo` will be the checked out Git repository.
262 fn new(
263 database: &'a GitDatabase,
264 revision: git2::Oid,
265 repo: git2::Repository,
266 ) -> GitCheckout<'a> {
267 let path = repo.workdir().unwrap_or_else(|| repo.path());
268 GitCheckout {
269 path: path.to_path_buf(),
270 database,
271 revision,
272 repo,
273 }
274 }
275
276 /// Gets the remote repository URL.
277 fn remote_url(&self) -> &Url {
278 &self.database.remote.url()
279 }
280
281 /// Clone a repo for a `revision` into a local path from a `datatabase`.
282 /// This is a filesystem-to-filesystem clone.
283 fn clone_into(
284 into: &Path,
285 database: &'a GitDatabase,
286 revision: git2::Oid,
287 gctx: &GlobalContext,
288 ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
289 let dirname = into.parent().unwrap();
290 paths::create_dir_all(&dirname)?;
291 if into.exists() {
292 paths::remove_dir_all(into)?;
293 }
294
295 // we're doing a local filesystem-to-filesystem clone so there should
296 // be no need to respect global configuration options, so pass in
297 // an empty instance of `git2::Config` below.
298 let git_config = git2::Config::new()?;
299
300 // Clone the repository, but make sure we use the "local" option in
301 // libgit2 which will attempt to use hardlinks to set up the database.
302 // This should speed up the clone operation quite a bit if it works.
303 //
304 // Note that we still use the same fetch options because while we don't
305 // need authentication information we may want progress bars and such.
306 let url = database.path.into_url()?;
307 let mut repo = None;
308 with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
309 let mut checkout = git2::build::CheckoutBuilder::new();
310 checkout.dry_run(); // we'll do this below during a `reset`
311
312 let r = git2::build::RepoBuilder::new()
313 // use hard links and/or copy the database, we're doing a
314 // filesystem clone so this'll speed things up quite a bit.
315 .clone_local(git2::build::CloneLocal::Local)
316 .with_checkout(checkout)
317 .fetch_options(fopts)
318 .clone(url.as_str(), into)?;
319 // `git2` doesn't seem to handle shallow repos correctly when doing
320 // a local clone. Fortunately all that's needed is the copy of the
321 // one file that defines the shallow boundary, the commits which
322 // have their parents omitted as part of the shallow clone.
323 //
324 // TODO(git2): remove this when git2 supports shallow clone correctly
325 if database.repo.is_shallow() {
326 std::fs::copy(
327 database.repo.path().join("shallow"),
328 r.path().join("shallow"),
329 )?;
330 }
331 repo = Some(r);
332 Ok(())
333 })?;
334 let repo = repo.unwrap();
335
336 let checkout = GitCheckout::new(database, revision, repo);
337 let guard = checkout.reset(gctx)?;
338 Ok((checkout, guard))
339 }
340
341 /// Checks if the `HEAD` of this checkout points to the expected revision.
342 fn is_fresh(&self) -> bool {
343 match self.repo.revparse_single("HEAD") {
344 Ok(ref head) if head.id() == self.revision => {
345 // See comments in reset() for why we check this
346 self.path.join(CHECKOUT_READY_LOCK).exists()
347 }
348 _ => false,
349 }
350 }
351
352 /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
353 /// revision of this checkout, with additional interrupt protection by a
354 /// dummy file [`CHECKOUT_READY_LOCK`].
355 ///
356 /// If we're interrupted while performing a `git reset` (e.g., we die
357 /// because of a signal) Cargo needs to be sure to try to check out this
358 /// repo again on the next go-round.
359 ///
360 /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
361 /// which if present means that the repo has been successfully reset and is
362 /// ready to go. Hence if we start to do a reset, we make sure this file
363 /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
364 /// (e.g. submodule update) before marking the check-out as ready.
365 ///
366 /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
367 fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
368 let guard = CheckoutGuard::guard(&self.path);
369 info!("reset {} to {}", self.repo.path().display(), self.revision);
370
371 // Ensure libgit2 won't mess with newlines when we vendor.
372 if let Ok(mut git_config) = self.repo.config() {
373 git_config.set_bool("core.autocrlf", false)?;
374 }
375
376 let object = self.repo.find_object(self.revision, None)?;
377 reset(&self.repo, &object, gctx)?;
378
379 Ok(guard)
380 }
381
382 /// Like `git submodule update --recursive` but for this git checkout.
383 ///
384 /// This function respects `submodule.<name>.update = none`[^1] git config.
385 /// Submodules set to `none` won't be fetched.
386 ///
387 /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
388 fn update_submodules(&self, gctx: &GlobalContext) -> CargoResult<()> {
389 return update_submodules(&self.repo, gctx, self.remote_url().as_str());
390
391 /// Recursive helper for [`GitCheckout::update_submodules`].
392 fn update_submodules(
393 repo: &git2::Repository,
394 gctx: &GlobalContext,
395 parent_remote_url: &str,
396 ) -> CargoResult<()> {
397 debug!("update submodules for: {:?}", repo.workdir().unwrap());
398
399 for mut child in repo.submodules()? {
400 update_submodule(repo, &mut child, gctx, parent_remote_url).with_context(|| {
401 format!(
402 "failed to update submodule `{}`",
403 child.name().unwrap_or("")
404 )
405 })?;
406 }
407 Ok(())
408 }
409
410 /// Update a single Git submodule, and recurse into its submodules.
411 fn update_submodule(
412 parent: &git2::Repository,
413 child: &mut git2::Submodule<'_>,
414 gctx: &GlobalContext,
415 parent_remote_url: &str,
416 ) -> CargoResult<()> {
417 child.init(false)?;
418
419 let child_url_str = child.url().ok_or_else(|| {
420 anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
421 })?;
422
423 // Skip the submodule if the config says not to update it.
424 if child.update_strategy() == git2::SubmoduleUpdate::None {
425 gctx.shell().status(
426 "Skipping",
427 format!(
428 "git submodule `{}` due to update strategy in .gitmodules",
429 child_url_str
430 ),
431 )?;
432 return Ok(());
433 }
434
435 let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
436
437 // A submodule which is listed in .gitmodules but not actually
438 // checked out will not have a head id, so we should ignore it.
439 let Some(head) = child.head_id() else {
440 return Ok(());
441 };
442
443 // If the submodule hasn't been checked out yet, we need to
444 // clone it. If it has been checked out and the head is the same
445 // as the submodule's head, then we can skip an update and keep
446 // recursing.
447 let head_and_repo = child.open().and_then(|repo| {
448 let target = repo.head()?.target();
449 Ok((target, repo))
450 });
451 let mut repo = match head_and_repo {
452 Ok((head, repo)) => {
453 if child.head_id() == head {
454 return update_submodules(&repo, gctx, &child_remote_url);
455 }
456 repo
457 }
458 Err(..) => {
459 let path = parent.workdir().unwrap().join(child.path());
460 let _ = paths::remove_dir_all(&path);
461 init(&path, false)?
462 }
463 };
464 // Fetch data from origin and reset to the head commit
465 let reference = GitReference::Rev(head.to_string());
466 gctx.shell()
467 .status("Updating", format!("git submodule `{child_remote_url}`"))?;
468 fetch(
469 &mut repo,
470 &child_remote_url,
471 &reference,
472 gctx,
473 RemoteKind::GitDependency,
474 )
475 .with_context(|| {
476 let name = child.name().unwrap_or("");
477 format!("failed to fetch submodule `{name}` from {child_remote_url}",)
478 })?;
479
480 let obj = repo.find_object(head, None)?;
481 reset(&repo, &obj, gctx)?;
482 update_submodules(&repo, gctx, &child_remote_url)
483 }
484 }
485}
486
487/// See [`GitCheckout::reset`] for rationale on this type.
488#[must_use]
489struct CheckoutGuard {
490 ok_file: PathBuf,
491}
492
493impl CheckoutGuard {
494 fn guard(path: &Path) -> Self {
495 let ok_file = path.join(CHECKOUT_READY_LOCK);
496 let _ = paths::remove_file(&ok_file);
497 Self { ok_file }
498 }
499
500 fn mark_ok(self) -> CargoResult<()> {
501 let _ = paths::create(self.ok_file)?;
502 Ok(())
503 }
504}
505
506/// Constructs an absolute URL for a child submodule URL with its parent base URL.
507///
508/// Git only assumes a submodule URL is a relative path if it starts with `./`
509/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
510/// submodule URL.
511///
512/// At this moment it comes with some limitations:
513///
514/// * GitHub doesn't accept non-normalized URLs with relative paths.
515/// (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
516/// * `url` crate cannot parse SCP-like URLs.
517/// (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
518///
519/// To overcome these, this patch always tries [`Url::parse`] first to normalize
520/// the path. If it couldn't, append the relative path as the last resort and
521/// pray the remote git service supports non-normalized URLs.
522///
523/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
524///
525/// [^1]: <https://git-scm.com/docs/git-submodule>
526fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
527 let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
528 match Url::parse(base_url) {
529 Ok(mut base_url) => {
530 let path = base_url.path();
531 if !path.ends_with('/') {
532 base_url.set_path(&format!("{path}/"));
533 }
534 let absolute_url = base_url.join(submodule_url).with_context(|| {
535 format!(
536 "failed to parse relative child submodule url `{submodule_url}` \
537 using parent base url `{base_url}`"
538 )
539 })?;
540 Cow::from(absolute_url.to_string())
541 }
542 Err(_) => {
543 let mut absolute_url = base_url.to_string();
544 if !absolute_url.ends_with('/') {
545 absolute_url.push('/');
546 }
547 absolute_url.push_str(submodule_url);
548 Cow::from(absolute_url)
549 }
550 }
551 } else {
552 Cow::from(submodule_url)
553 };
554
555 Ok(absolute_url)
556}
557
558/// Prepare the authentication callbacks for cloning a git repository.
559///
560/// The main purpose of this function is to construct the "authentication
561/// callback" which is used to clone a repository. This callback will attempt to
562/// find the right authentication on the system (without user input) and will
563/// guide libgit2 in doing so.
564///
565/// The callback is provided `allowed` types of credentials, and we try to do as
566/// much as possible based on that:
567///
568/// * Prioritize SSH keys from the local ssh agent as they're likely the most
569/// reliable. The username here is prioritized from the credential
570/// callback, then from whatever is configured in git itself, and finally
571/// we fall back to the generic user of `git`.
572///
573/// * If a username/password is allowed, then we fallback to git2-rs's
574/// implementation of the credential helper. This is what is configured
575/// with `credential.helper` in git, and is the interface for the macOS
576/// keychain, for example.
577///
578/// * After the above two have failed, we just kinda grapple attempting to
579/// return *something*.
580///
581/// If any form of authentication fails, libgit2 will repeatedly ask us for
582/// credentials until we give it a reason to not do so. To ensure we don't
583/// just sit here looping forever we keep track of authentications we've
584/// attempted and we don't try the same ones again.
585fn with_authentication<T, F>(
586 gctx: &GlobalContext,
587 url: &str,
588 cfg: &git2::Config,
589 mut f: F,
590) -> CargoResult<T>
591where
592 F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
593{
594 let mut cred_helper = git2::CredentialHelper::new(url);
595 cred_helper.config(cfg);
596
597 let mut ssh_username_requested = false;
598 let mut cred_helper_bad = None;
599 let mut ssh_agent_attempts = Vec::new();
600 let mut any_attempts = false;
601 let mut tried_sshkey = false;
602 let mut url_attempt = None;
603
604 let orig_url = url;
605 let mut res = f(&mut |url, username, allowed| {
606 any_attempts = true;
607 if url != orig_url {
608 url_attempt = Some(url.to_string());
609 }
610 // libgit2's "USERNAME" authentication actually means that it's just
611 // asking us for a username to keep going. This is currently only really
612 // used for SSH authentication and isn't really an authentication type.
613 // The logic currently looks like:
614 //
615 // let user = ...;
616 // if (user.is_null())
617 // user = callback(USERNAME, null, ...);
618 //
619 // callback(SSH_KEY, user, ...)
620 //
621 // So if we're being called here then we know that (a) we're using ssh
622 // authentication and (b) no username was specified in the URL that
623 // we're trying to clone. We need to guess an appropriate username here,
624 // but that may involve a few attempts. Unfortunately we can't switch
625 // usernames during one authentication session with libgit2, so to
626 // handle this we bail out of this authentication session after setting
627 // the flag `ssh_username_requested`, and then we handle this below.
628 if allowed.contains(git2::CredentialType::USERNAME) {
629 debug_assert!(username.is_none());
630 ssh_username_requested = true;
631 return Err(git2::Error::from_str("gonna try usernames later"));
632 }
633
634 // An "SSH_KEY" authentication indicates that we need some sort of SSH
635 // authentication. This can currently either come from the ssh-agent
636 // process or from a raw in-memory SSH key. Cargo only supports using
637 // ssh-agent currently.
638 //
639 // If we get called with this then the only way that should be possible
640 // is if a username is specified in the URL itself (e.g., `username` is
641 // Some), hence the unwrap() here. We try custom usernames down below.
642 if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
643 // If ssh-agent authentication fails, libgit2 will keep
644 // calling this callback asking for other authentication
645 // methods to try. Make sure we only try ssh-agent once,
646 // to avoid looping forever.
647 tried_sshkey = true;
648 let username = username.unwrap();
649 debug_assert!(!ssh_username_requested);
650 ssh_agent_attempts.push(username.to_string());
651 return git2::Cred::ssh_key_from_agent(username);
652 }
653
654 // Sometimes libgit2 will ask for a username/password in plaintext. This
655 // is where Cargo would have an interactive prompt if we supported it,
656 // but we currently don't! Right now the only way we support fetching a
657 // plaintext password is through the `credential.helper` support, so
658 // fetch that here.
659 //
660 // If ssh-agent authentication fails, libgit2 will keep calling this
661 // callback asking for other authentication methods to try. Check
662 // cred_helper_bad to make sure we only try the git credential helper
663 // once, to avoid looping forever.
664 if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
665 {
666 let r = git2::Cred::credential_helper(cfg, url, username);
667 cred_helper_bad = Some(r.is_err());
668 return r;
669 }
670
671 // I'm... not sure what the DEFAULT kind of authentication is, but seems
672 // easy to support?
673 if allowed.contains(git2::CredentialType::DEFAULT) {
674 return git2::Cred::default();
675 }
676
677 // Whelp, we tried our best
678 Err(git2::Error::from_str("no authentication methods succeeded"))
679 });
680
681 // Ok, so if it looks like we're going to be doing ssh authentication, we
682 // want to try a few different usernames as one wasn't specified in the URL
683 // for us to use. In order, we'll try:
684 //
685 // * A credential helper's username for this URL, if available.
686 // * This account's username.
687 // * "git"
688 //
689 // We have to restart the authentication session each time (due to
690 // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
691 // call our callback, `f`, in a loop here.
692 if ssh_username_requested {
693 debug_assert!(res.is_err());
694 let mut attempts = vec![String::from("git")];
695 if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
696 attempts.push(s.to_string());
697 }
698 if let Some(ref s) = cred_helper.username {
699 attempts.push(s.clone());
700 }
701
702 while let Some(s) = attempts.pop() {
703 // We should get `USERNAME` first, where we just return our attempt,
704 // and then after that we should get `SSH_KEY`. If the first attempt
705 // fails we'll get called again, but we don't have another option so
706 // we bail out.
707 let mut attempts = 0;
708 res = f(&mut |_url, username, allowed| {
709 if allowed.contains(git2::CredentialType::USERNAME) {
710 return git2::Cred::username(&s);
711 }
712 if allowed.contains(git2::CredentialType::SSH_KEY) {
713 debug_assert_eq!(Some(&s[..]), username);
714 attempts += 1;
715 if attempts == 1 {
716 ssh_agent_attempts.push(s.to_string());
717 return git2::Cred::ssh_key_from_agent(&s);
718 }
719 }
720 Err(git2::Error::from_str("no authentication methods succeeded"))
721 });
722
723 // If we made two attempts then that means:
724 //
725 // 1. A username was requested, we returned `s`.
726 // 2. An ssh key was requested, we returned to look up `s` in the
727 // ssh agent.
728 // 3. For whatever reason that lookup failed, so we were asked again
729 // for another mode of authentication.
730 //
731 // Essentially, if `attempts == 2` then in theory the only error was
732 // that this username failed to authenticate (e.g., no other network
733 // errors happened). Otherwise something else is funny so we bail
734 // out.
735 if attempts != 2 {
736 break;
737 }
738 }
739 }
740 let mut err = match res {
741 Ok(e) => return Ok(e),
742 Err(e) => e,
743 };
744
745 // In the case of an authentication failure (where we tried something) then
746 // we try to give a more helpful error message about precisely what we
747 // tried.
748 if any_attempts {
749 let mut msg = "failed to authenticate when downloading \
750 repository"
751 .to_string();
752
753 if let Some(attempt) = &url_attempt {
754 if url != attempt {
755 msg.push_str(": ");
756 msg.push_str(attempt);
757 }
758 }
759 msg.push('\n');
760 if !ssh_agent_attempts.is_empty() {
761 let names = ssh_agent_attempts
762 .iter()
763 .map(|s| format!("`{}`", s))
764 .collect::<Vec<_>>()
765 .join(", ");
766 msg.push_str(&format!(
767 "\n* attempted ssh-agent authentication, but \
768 no usernames succeeded: {}",
769 names
770 ));
771 }
772 if let Some(failed_cred_helper) = cred_helper_bad {
773 if failed_cred_helper {
774 msg.push_str(
775 "\n* attempted to find username/password via \
776 git's `credential.helper` support, but failed",
777 );
778 } else {
779 msg.push_str(
780 "\n* attempted to find username/password via \
781 `credential.helper`, but maybe the found \
782 credentials were incorrect",
783 );
784 }
785 }
786 msg.push_str("\n\n");
787 msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
788 msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
789 err = err.context(msg);
790
791 // Otherwise if we didn't even get to the authentication phase them we may
792 // have failed to set up a connection, in these cases hint on the
793 // `net.git-fetch-with-cli` configuration option.
794 } else if let Some(e) = err.downcast_ref::<git2::Error>() {
795 match e.class() {
796 ErrorClass::Net
797 | ErrorClass::Ssl
798 | ErrorClass::Submodule
799 | ErrorClass::FetchHead
800 | ErrorClass::Ssh
801 | ErrorClass::Http => {
802 let mut msg = "network failure seems to have happened\n".to_string();
803 msg.push_str(
804 "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
805 );
806 msg.push_str(
807 "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
808 );
809 err = err.context(msg);
810 }
811 ErrorClass::Callback => {
812 // This unwraps the git2 error. We're using the callback error
813 // specifically to convey errors from Rust land through the C
814 // callback interface. We don't need the `; class=Callback
815 // (26)` that gets tacked on to the git2 error message.
816 err = anyhow::format_err!("{}", e.message());
817 }
818 _ => {}
819 }
820 }
821
822 Err(err)
823}
824
825/// `git reset --hard` to the given `obj` for the `repo`.
826///
827/// The `obj` is a commit-ish to which the head should be moved.
828fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
829 let mut pb = Progress::new("Checkout", gctx);
830 let mut opts = git2::build::CheckoutBuilder::new();
831 opts.progress(|_, cur, max| {
832 drop(pb.tick(cur, max, ""));
833 });
834 debug!("doing reset");
835 repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
836 debug!("reset done");
837 Ok(())
838}
839
840/// Prepares the callbacks for fetching a git repository.
841///
842/// The main purpose of this function is to construct everything before a fetch.
843/// This will attempt to setup a progress bar, the authentication for git,
844/// ssh known hosts check, and the network retry mechanism.
845///
846/// The callback is provided a fetch options, which can be used by the actual
847/// git fetch.
848pub fn with_fetch_options(
849 git_config: &git2::Config,
850 url: &str,
851 gctx: &GlobalContext,
852 cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
853) -> CargoResult<()> {
854 let mut progress = Progress::new("Fetch", gctx);
855 let ssh_config = gctx.net_config()?.ssh.as_ref();
856 let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
857 let diagnostic_home_config = gctx.diagnostic_home_config();
858 network::retry::with_retry(gctx, || {
859 // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
860 // so we store the error additionally and unwrap it later
861 let mut check_cb_result = Ok(());
862 let auth_result = with_authentication(gctx, url, git_config, |f| {
863 let port = Url::parse(url).ok().and_then(|url| url.port());
864 let mut last_update = Instant::now();
865 let mut rcb = git2::RemoteCallbacks::new();
866 // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
867 // sliding window for tracking the data transfer rate (in bytes/s).
868 let mut counter = MetricsCounter::<10>::new(0, last_update);
869 rcb.credentials(f);
870 rcb.certificate_check(|cert, host| {
871 match super::known_hosts::certificate_check(
872 gctx,
873 cert,
874 host,
875 port,
876 config_known_hosts,
877 &diagnostic_home_config,
878 ) {
879 Ok(status) => Ok(status),
880 Err(e) => {
881 check_cb_result = Err(e);
882 // This is not really used because it'll be overridden by libgit2
883 // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
884 Err(git2::Error::from_str(
885 "invalid or unknown remote ssh hostkey",
886 ))
887 }
888 }
889 });
890 rcb.transfer_progress(|stats| {
891 let indexed_deltas = stats.indexed_deltas();
892 let msg = if indexed_deltas > 0 {
893 // Resolving deltas.
894 format!(
895 ", ({}/{}) resolving deltas",
896 indexed_deltas,
897 stats.total_deltas()
898 )
899 } else {
900 // Receiving objects.
901 //
902 // # Caveat
903 //
904 // Progress bar relies on git2 calling `transfer_progress`
905 // to update its transfer rate, but we cannot guarantee a
906 // periodic call of that callback. Thus if we don't receive
907 // any data for, say, 10 seconds, the rate will get stuck
908 // and never go down to 0B/s.
909 // In the future, we need to find away to update the rate
910 // even when the callback is not called.
911 let now = Instant::now();
912 // Scrape a `received_bytes` to the counter every 300ms.
913 if now - last_update > Duration::from_millis(300) {
914 counter.add(stats.received_bytes(), now);
915 last_update = now;
916 }
917 let (rate, unit) = human_readable_bytes(counter.rate() as u64);
918 format!(", {:.2}{}/s", rate, unit)
919 };
920 progress
921 .tick(stats.indexed_objects(), stats.total_objects(), &msg)
922 .is_ok()
923 });
924
925 // Create a local anonymous remote in the repository to fetch the
926 // url
927 let mut opts = git2::FetchOptions::new();
928 opts.remote_callbacks(rcb);
929 cb(opts)
930 });
931 if auth_result.is_err() {
932 check_cb_result?;
933 }
934 auth_result?;
935 Ok(())
936 })
937}
938
939/// Attempts to fetch the given git `reference` for a Git repository.
940///
941/// This is the main entry for git clone/fetch. It does the followings:
942///
943/// * Turns [`GitReference`] into refspecs accordingly.
944/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
945///
946/// The `remote_url` argument is the git remote URL where we want to fetch from.
947///
948/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
949/// at this time. It could be extended when libgit2 supports shallow clones.
950///
951/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
952pub fn fetch(
953 repo: &mut git2::Repository,
954 remote_url: &str,
955 reference: &GitReference,
956 gctx: &GlobalContext,
957 remote_kind: RemoteKind,
958) -> CargoResult<()> {
959 if gctx.frozen() {
960 anyhow::bail!(
961 "attempting to update a git repository, but --frozen \
962 was specified"
963 )
964 }
965 if !gctx.network_allowed() {
966 anyhow::bail!("can't update a git repository in the offline mode")
967 }
968
969 let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
970
971 // Flag to keep track if the rev is a full commit hash
972 let mut fast_path_rev: bool = false;
973
974 let oid_to_fetch = match github_fast_path(repo, remote_url, reference, gctx) {
975 Ok(FastPathRev::UpToDate) => return Ok(()),
976 Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
977 Ok(FastPathRev::Indeterminate) => None,
978 Err(e) => {
979 debug!("failed to check github {:?}", e);
980 None
981 }
982 };
983
984 maybe_gc_repo(repo, gctx)?;
985
986 clean_repo_temp_files(repo);
987
988 // Translate the reference desired here into an actual list of refspecs
989 // which need to get fetched. Additionally record if we're fetching tags.
990 let mut refspecs = Vec::new();
991 let mut tags = false;
992 // The `+` symbol on the refspec means to allow a forced (fast-forward)
993 // update which is needed if there is ever a force push that requires a
994 // fast-forward.
995 match reference {
996 // For branches and tags we can fetch simply one reference and copy it
997 // locally, no need to fetch other branches/tags.
998 GitReference::Branch(b) => {
999 refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
1000 }
1001
1002 GitReference::Tag(t) => {
1003 refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
1004 }
1005
1006 GitReference::DefaultBranch => {
1007 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1008 }
1009
1010 GitReference::Rev(rev) => {
1011 if rev.starts_with("refs/") {
1012 refspecs.push(format!("+{0}:{0}", rev));
1013 } else if let Some(oid_to_fetch) = oid_to_fetch {
1014 fast_path_rev = true;
1015 refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1016 } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1017 && rev.parse::<Oid>().is_ok()
1018 {
1019 // There is a specific commit to fetch and we will do so in shallow-mode only
1020 // to not disturb the previous logic.
1021 // Note that with typical settings for shallowing, we will just fetch a single `rev`
1022 // as single commit.
1023 // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1024 // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1025 refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1026 } else {
1027 // We don't know what the rev will point to. To handle this
1028 // situation we fetch all branches and tags, and then we pray
1029 // it's somewhere in there.
1030 refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1031 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1032 tags = true;
1033 }
1034 }
1035 }
1036
1037 let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1038 fetch_with_cli(repo, remote_url, &refspecs, tags, gctx)
1039 } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1040 fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1041 } else {
1042 fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1043 };
1044
1045 if fast_path_rev {
1046 if let Some(oid) = oid_to_fetch {
1047 return result.with_context(|| format!("revision {} not found", oid));
1048 }
1049 }
1050 result
1051}
1052
1053/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1054/// files atomically.
1055/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1056/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1057fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1058 matches!(
1059 err,
1060 gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1061 gix::protocol::fetch::Error::LockShallowFile(_)
1062 ))
1063 )
1064}
1065
1066/// Attempts to use `git` CLI installed on the system to fetch a repository,
1067/// when the config value [`net.git-fetch-with-cli`][1] is set.
1068///
1069/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1070/// when compared to the `git` command line. As a result, allow an escape
1071/// hatch for users that would prefer to use `git`-the-CLI for fetching
1072/// repositories instead of `libgit2`-the-library. This should make more
1073/// flavors of authentication possible while also still giving us all the
1074/// speed and portability of using `libgit2`.
1075///
1076/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1077fn fetch_with_cli(
1078 repo: &mut git2::Repository,
1079 url: &str,
1080 refspecs: &[String],
1081 tags: bool,
1082 gctx: &GlobalContext,
1083) -> CargoResult<()> {
1084 let mut cmd = ProcessBuilder::new("git");
1085 cmd.arg("fetch");
1086 if tags {
1087 cmd.arg("--tags");
1088 } else {
1089 cmd.arg("--no-tags");
1090 }
1091 match gctx.shell().verbosity() {
1092 Verbosity::Normal => {}
1093 Verbosity::Verbose => {
1094 cmd.arg("--verbose");
1095 }
1096 Verbosity::Quiet => {
1097 cmd.arg("--quiet");
1098 }
1099 }
1100 cmd.arg("--force") // handle force pushes
1101 .arg("--update-head-ok") // see discussion in #2078
1102 .arg(url)
1103 .args(refspecs)
1104 // If cargo is run by git (for example, the `exec` command in `git
1105 // rebase`), the GIT_DIR is set by git and will point to the wrong
1106 // location. This makes sure GIT_DIR is always the repository path.
1107 .env("GIT_DIR", repo.path())
1108 // The reset of these may not be necessary, but I'm including them
1109 // just to be extra paranoid and avoid any issues.
1110 .env_remove("GIT_WORK_TREE")
1111 .env_remove("GIT_INDEX_FILE")
1112 .env_remove("GIT_OBJECT_DIRECTORY")
1113 .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1114 .cwd(repo.path());
1115 gctx.shell()
1116 .verbose(|s| s.status("Running", &cmd.to_string()))?;
1117 cmd.exec()?;
1118 Ok(())
1119}
1120
1121fn fetch_with_gitoxide(
1122 repo: &mut git2::Repository,
1123 remote_url: &str,
1124 refspecs: Vec<String>,
1125 tags: bool,
1126 shallow: gix::remote::fetch::Shallow,
1127 gctx: &GlobalContext,
1128) -> CargoResult<()> {
1129 let git2_repo = repo;
1130 let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1131 let repo_reinitialized = AtomicBool::default();
1132 let res = oxide::with_retry_and_progress(
1133 &git2_repo.path().to_owned(),
1134 gctx,
1135 &|repo_path,
1136 should_interrupt,
1137 mut progress,
1138 url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1139 // The `fetch` operation here may fail spuriously due to a corrupt
1140 // repository. It could also fail, however, for a whole slew of other
1141 // reasons (aka network related reasons). We want Cargo to automatically
1142 // recover from corrupt repositories, but we don't want Cargo to stomp
1143 // over other legitimate errors.
1144 //
1145 // Consequently we save off the error of the `fetch` operation and if it
1146 // looks like a "corrupt repo" error then we blow away the repo and try
1147 // again. If it looks like any other kind of error, or if we've already
1148 // blown away the repository, then we want to return the error as-is.
1149 loop {
1150 let res = oxide::open_repo(
1151 repo_path,
1152 config_overrides.clone(),
1153 oxide::OpenMode::ForFetch,
1154 )
1155 .map_err(crate::sources::git::fetch::Error::from)
1156 .and_then(|repo| {
1157 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1158 let url_for_authentication = &mut *url_for_authentication;
1159 let remote = repo
1160 .remote_at(remote_url)?
1161 .with_fetch_tags(if tags {
1162 gix::remote::fetch::Tags::All
1163 } else {
1164 gix::remote::fetch::Tags::Included
1165 })
1166 .with_refspecs(
1167 refspecs.iter().map(|s| s.as_str()),
1168 gix::remote::Direction::Fetch,
1169 )
1170 .map_err(crate::sources::git::fetch::Error::Other)?;
1171 let url = remote
1172 .url(gix::remote::Direction::Fetch)
1173 .expect("set at init")
1174 .to_owned();
1175 let connection = remote.connect(gix::remote::Direction::Fetch)?;
1176 let mut authenticate = connection.configured_credentials(url)?;
1177 let connection = connection.with_credentials(
1178 move |action: gix::protocol::credentials::helper::Action| {
1179 if let Some(url) = action
1180 .context()
1181 .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1182 {
1183 url_for_authentication(url.as_ref());
1184 }
1185 authenticate(action)
1186 },
1187 );
1188 let outcome = connection
1189 .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1190 .with_shallow(shallow.clone())
1191 .receive(&mut progress, should_interrupt)?;
1192 Ok(outcome)
1193 });
1194 let err = match res {
1195 Ok(_) => break,
1196 Err(e) => e,
1197 };
1198 debug!("fetch failed: {}", err);
1199
1200 if !repo_reinitialized.load(Ordering::Relaxed)
1201 // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1202 // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1203 // folder before writing files into it, or else not even open a directory as git repository (which is
1204 // also handled here).
1205 && err.is_corrupted()
1206 || has_shallow_lock_file(&err)
1207 {
1208 repo_reinitialized.store(true, Ordering::Relaxed);
1209 debug!(
1210 "looks like this is a corrupt repository, reinitializing \
1211 and trying again"
1212 );
1213 if oxide::reinitialize(repo_path).is_ok() {
1214 continue;
1215 }
1216 }
1217
1218 return Err(err.into());
1219 }
1220 Ok(())
1221 },
1222 );
1223 if repo_reinitialized.load(Ordering::Relaxed) {
1224 *git2_repo = git2::Repository::open(git2_repo.path())?;
1225 }
1226 res
1227}
1228
1229fn fetch_with_libgit2(
1230 repo: &mut git2::Repository,
1231 remote_url: &str,
1232 refspecs: Vec<String>,
1233 tags: bool,
1234 shallow: gix::remote::fetch::Shallow,
1235 gctx: &GlobalContext,
1236) -> CargoResult<()> {
1237 debug!("doing a fetch for {remote_url}");
1238 let git_config = git2::Config::open_default()?;
1239 with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1240 if tags {
1241 opts.download_tags(git2::AutotagOption::All);
1242 }
1243 if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1244 opts.depth(0i32.saturating_add_unsigned(depth.get()));
1245 }
1246 // The `fetch` operation here may fail spuriously due to a corrupt
1247 // repository. It could also fail, however, for a whole slew of other
1248 // reasons (aka network related reasons). We want Cargo to automatically
1249 // recover from corrupt repositories, but we don't want Cargo to stomp
1250 // over other legitimate errors.
1251 //
1252 // Consequently we save off the error of the `fetch` operation and if it
1253 // looks like a "corrupt repo" error then we blow away the repo and try
1254 // again. If it looks like any other kind of error, or if we've already
1255 // blown away the repository, then we want to return the error as-is.
1256 let mut repo_reinitialized = false;
1257 loop {
1258 debug!("initiating fetch of {refspecs:?} from {remote_url}");
1259 let res = repo
1260 .remote_anonymous(remote_url)?
1261 .fetch(&refspecs, Some(&mut opts), None);
1262 let err = match res {
1263 Ok(()) => break,
1264 Err(e) => e,
1265 };
1266 debug!("fetch failed: {}", err);
1267
1268 if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1269 {
1270 repo_reinitialized = true;
1271 debug!(
1272 "looks like this is a corrupt repository, reinitializing \
1273 and trying again"
1274 );
1275 if reinitialize(repo).is_ok() {
1276 continue;
1277 }
1278 }
1279
1280 return Err(err.into());
1281 }
1282 Ok(())
1283 })
1284}
1285
1286/// Attempts to `git gc` a repository.
1287///
1288/// Cargo has a bunch of long-lived git repositories in its global cache and
1289/// some, like the index, are updated very frequently. Right now each update
1290/// creates a new "pack file" inside the git database, and over time this can
1291/// cause bad performance and bad current behavior in libgit2.
1292///
1293/// One pathological use case today is where libgit2 opens hundreds of file
1294/// descriptors, getting us dangerously close to blowing out the OS limits of
1295/// how many fds we can have open. This is detailed in [#4403].
1296///
1297/// To try to combat this problem we attempt a `git gc` here. Note, though, that
1298/// we may not even have `git` installed on the system! As a result we
1299/// opportunistically try a `git gc` when the pack directory looks too big, and
1300/// failing that we just blow away the repository and start over.
1301///
1302/// In theory this shouldn't be too expensive compared to the network request
1303/// we're about to issue.
1304///
1305/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1306fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1307 // Here we arbitrarily declare that if you have more than 100 files in your
1308 // `pack` folder that we need to do a gc.
1309 let entries = match repo.path().join("objects/pack").read_dir() {
1310 Ok(e) => e.count(),
1311 Err(_) => {
1312 debug!("skipping gc as pack dir appears gone");
1313 return Ok(());
1314 }
1315 };
1316 let max = gctx
1317 .get_env("__CARGO_PACKFILE_LIMIT")
1318 .ok()
1319 .and_then(|s| s.parse::<usize>().ok())
1320 .unwrap_or(100);
1321 if entries < max {
1322 debug!("skipping gc as there's only {} pack files", entries);
1323 return Ok(());
1324 }
1325
1326 // First up, try a literal `git gc` by shelling out to git. This is pretty
1327 // likely to fail though as we may not have `git` installed. Note that
1328 // libgit2 doesn't currently implement the gc operation, so there's no
1329 // equivalent there.
1330 match Command::new("git")
1331 .arg("gc")
1332 .current_dir(repo.path())
1333 .output()
1334 {
1335 Ok(out) => {
1336 debug!(
1337 "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1338 out.status,
1339 String::from_utf8_lossy(&out.stdout),
1340 String::from_utf8_lossy(&out.stderr)
1341 );
1342 if out.status.success() {
1343 let new = git2::Repository::open(repo.path())?;
1344 *repo = new;
1345 return Ok(());
1346 }
1347 }
1348 Err(e) => debug!("git-gc failed to spawn: {}", e),
1349 }
1350
1351 // Alright all else failed, let's start over.
1352 reinitialize(repo)
1353}
1354
1355/// Removes temporary files left from previous activity.
1356///
1357/// If libgit2 is interrupted while indexing pack files, it will leave behind
1358/// some temporary files that it doesn't clean up. These can be quite large in
1359/// size, so this tries to clean things up.
1360///
1361/// This intentionally ignores errors. This is only an opportunistic cleaning,
1362/// and we don't really care if there are issues (there's unlikely anything
1363/// that can be done).
1364///
1365/// The git CLI has similar behavior (its temp files look like
1366/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1367/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1368/// filenames, so they never get cleaned up.
1369fn clean_repo_temp_files(repo: &git2::Repository) {
1370 let path = repo.path().join("objects/pack/pack_git2_*");
1371 let Some(pattern) = path.to_str() else {
1372 tracing::warn!("cannot convert {path:?} to a string");
1373 return;
1374 };
1375 let Ok(paths) = glob::glob(pattern) else {
1376 return;
1377 };
1378 for path in paths {
1379 if let Ok(path) = path {
1380 match paths::remove_file(&path) {
1381 Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1382 Err(e) => {
1383 tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1384 }
1385 }
1386 }
1387 }
1388}
1389
1390/// Reinitializes a given Git repository. This is useful when a Git repository
1391/// seems corrupted and we want to start over.
1392fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1393 // Here we want to drop the current repository object pointed to by `repo`,
1394 // so we initialize temporary repository in a sub-folder, blow away the
1395 // existing git folder, and then recreate the git repo. Finally we blow away
1396 // the `tmp` folder we allocated.
1397 let path = repo.path().to_path_buf();
1398 debug!("reinitializing git repo at {:?}", path);
1399 let tmp = path.join("tmp");
1400 let bare = !repo.path().ends_with(".git");
1401 *repo = init(&tmp, false)?;
1402 for entry in path.read_dir()? {
1403 let entry = entry?;
1404 if entry.file_name().to_str() == Some("tmp") {
1405 continue;
1406 }
1407 let path = entry.path();
1408 drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1409 }
1410 *repo = init(&path, bare)?;
1411 paths::remove_dir_all(&tmp)?;
1412 Ok(())
1413}
1414
1415/// Initializes a Git repository at `path`.
1416fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1417 let mut opts = git2::RepositoryInitOptions::new();
1418 // Skip anything related to templates, they just call all sorts of issues as
1419 // we really don't want to use them yet they insist on being used. See #6240
1420 // for an example issue that comes up.
1421 opts.external_template(false);
1422 opts.bare(bare);
1423 Ok(git2::Repository::init_opts(&path, &opts)?)
1424}
1425
1426/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1427enum FastPathRev {
1428 /// The local rev (determined by `reference.resolve(repo)`) is already up to
1429 /// date with what this rev resolves to on GitHub's server.
1430 UpToDate,
1431 /// The following SHA must be fetched in order for the local rev to become
1432 /// up to date.
1433 NeedsFetch(Oid),
1434 /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1435 /// and tags from the server and see what happens.
1436 Indeterminate,
1437}
1438
1439/// Attempts GitHub's special fast path for testing if we've already got an
1440/// up-to-date copy of the repository.
1441///
1442/// Updating the index is done pretty regularly so we want it to be as fast as
1443/// possible. For registries hosted on GitHub (like the crates.io index) there's
1444/// a fast path available to use[^1] to tell us that there's no updates to be
1445/// made.
1446///
1447/// Note that this function should never cause an actual failure because it's
1448/// just a fast path. As a result, a caller should ignore `Err` returned from
1449/// this function and move forward on the normal path.
1450///
1451/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1452fn github_fast_path(
1453 repo: &mut git2::Repository,
1454 url: &str,
1455 reference: &GitReference,
1456 gctx: &GlobalContext,
1457) -> CargoResult<FastPathRev> {
1458 let url = Url::parse(url)?;
1459 if !is_github(&url) {
1460 return Ok(FastPathRev::Indeterminate);
1461 }
1462
1463 let local_object = resolve_ref(reference, repo).ok();
1464
1465 let github_branch_name = match reference {
1466 GitReference::Branch(branch) => branch,
1467 GitReference::Tag(tag) => tag,
1468 GitReference::DefaultBranch => "HEAD",
1469 GitReference::Rev(rev) => {
1470 if rev.starts_with("refs/") {
1471 rev
1472 } else if looks_like_commit_hash(rev) {
1473 // `revparse_single` (used by `resolve`) is the only way to turn
1474 // short hash -> long hash, but it also parses other things,
1475 // like branch and tag names, which might coincidentally be
1476 // valid hex.
1477 //
1478 // We only return early if `rev` is a prefix of the object found
1479 // by `revparse_single`. Don't bother talking to GitHub in that
1480 // case, since commit hashes are permanent. If a commit with the
1481 // requested hash is already present in the local clone, its
1482 // contents must be the same as what is on the server for that
1483 // hash.
1484 //
1485 // If `rev` is not found locally by `revparse_single`, we'll
1486 // need GitHub to resolve it and get a hash. If `rev` is found
1487 // but is not a short hash of the found object, it's probably a
1488 // branch and we also need to get a hash from GitHub, in case
1489 // the branch has moved.
1490 if let Some(local_object) = local_object {
1491 if is_short_hash_of(rev, local_object) {
1492 debug!("github fast path already has {local_object}");
1493 return Ok(FastPathRev::UpToDate);
1494 }
1495 }
1496 // If `rev` is a full commit hash, the only thing it can resolve
1497 // to is itself. Don't bother talking to GitHub in that case
1498 // either. (This ensures that we always attempt to fetch the
1499 // commit directly even if we can't reach the GitHub API.)
1500 if let Some(oid) = rev_to_oid(rev) {
1501 debug!("github fast path is already a full commit hash {rev}");
1502 return Ok(FastPathRev::NeedsFetch(oid));
1503 }
1504 rev
1505 } else {
1506 debug!("can't use github fast path with `rev = \"{}\"`", rev);
1507 return Ok(FastPathRev::Indeterminate);
1508 }
1509 }
1510 };
1511
1512 // This expects GitHub urls in the form `github.com/user/repo` and nothing
1513 // else
1514 let mut pieces = url
1515 .path_segments()
1516 .ok_or_else(|| anyhow!("no path segments on url"))?;
1517 let username = pieces
1518 .next()
1519 .ok_or_else(|| anyhow!("couldn't find username"))?;
1520 let repository = pieces
1521 .next()
1522 .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1523 if pieces.next().is_some() {
1524 anyhow::bail!("too many segments on URL");
1525 }
1526
1527 // Trim off the `.git` from the repository, if present, since that's
1528 // optional for GitHub and won't work when we try to use the API as well.
1529 let repository = repository.strip_suffix(".git").unwrap_or(repository);
1530
1531 let url = format!(
1532 "https://api.github.com/repos/{}/{}/commits/{}",
1533 username, repository, github_branch_name,
1534 );
1535 let mut handle = gctx.http()?.borrow_mut();
1536 debug!("attempting GitHub fast path for {}", url);
1537 handle.get(true)?;
1538 handle.url(&url)?;
1539 handle.useragent("cargo")?;
1540 handle.follow_location(true)?; // follow redirects
1541 handle.http_headers({
1542 let mut headers = List::new();
1543 headers.append("Accept: application/vnd.github.3.sha")?;
1544 if let Some(local_object) = local_object {
1545 headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1546 }
1547 headers
1548 })?;
1549
1550 let mut response_body = Vec::new();
1551 let mut transfer = handle.transfer();
1552 transfer.write_function(|data| {
1553 response_body.extend_from_slice(data);
1554 Ok(data.len())
1555 })?;
1556 transfer.perform()?;
1557 drop(transfer); // end borrow of handle so that response_code can be called
1558
1559 let response_code = handle.response_code()?;
1560 if response_code == 304 {
1561 debug!("github fast path up-to-date");
1562 Ok(FastPathRev::UpToDate)
1563 } else if response_code == 200 {
1564 let oid_to_fetch = str::from_utf8(&response_body)?.parse::<Oid>()?;
1565 debug!("github fast path fetch {oid_to_fetch}");
1566 Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1567 } else {
1568 // Usually response_code == 404 if the repository does not exist, and
1569 // response_code == 422 if exists but GitHub is unable to resolve the
1570 // requested rev.
1571 debug!("github fast path bad response code {response_code}");
1572 Ok(FastPathRev::Indeterminate)
1573 }
1574}
1575
1576/// Whether a `url` is one from GitHub.
1577fn is_github(url: &Url) -> bool {
1578 url.host_str() == Some("github.com")
1579}
1580
1581/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1582fn looks_like_commit_hash(rev: &str) -> bool {
1583 rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1584}
1585
1586/// Whether `rev` is a shorter hash of `oid`.
1587fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1588 let long_hash = oid.to_string();
1589 match long_hash.get(..rev.len()) {
1590 Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1591 None => false,
1592 }
1593}
1594
1595#[cfg(test)]
1596mod tests {
1597 use super::absolute_submodule_url;
1598
1599 #[test]
1600 fn test_absolute_submodule_url() {
1601 let cases = [
1602 (
1603 "ssh://git@gitub.com/rust-lang/cargo",
1604 "git@github.com:rust-lang/cargo.git",
1605 "git@github.com:rust-lang/cargo.git",
1606 ),
1607 (
1608 "ssh://git@gitub.com/rust-lang/cargo",
1609 "./",
1610 "ssh://git@gitub.com/rust-lang/cargo/",
1611 ),
1612 (
1613 "ssh://git@gitub.com/rust-lang/cargo",
1614 "../",
1615 "ssh://git@gitub.com/rust-lang/",
1616 ),
1617 (
1618 "ssh://git@gitub.com/rust-lang/cargo",
1619 "./foo",
1620 "ssh://git@gitub.com/rust-lang/cargo/foo",
1621 ),
1622 (
1623 "ssh://git@gitub.com/rust-lang/cargo/",
1624 "./foo",
1625 "ssh://git@gitub.com/rust-lang/cargo/foo",
1626 ),
1627 (
1628 "ssh://git@gitub.com/rust-lang/cargo/",
1629 "../foo",
1630 "ssh://git@gitub.com/rust-lang/foo",
1631 ),
1632 (
1633 "ssh://git@gitub.com/rust-lang/cargo",
1634 "../foo",
1635 "ssh://git@gitub.com/rust-lang/foo",
1636 ),
1637 (
1638 "ssh://git@gitub.com/rust-lang/cargo",
1639 "../foo/bar/../baz",
1640 "ssh://git@gitub.com/rust-lang/foo/baz",
1641 ),
1642 (
1643 "git@github.com:rust-lang/cargo.git",
1644 "ssh://git@gitub.com/rust-lang/cargo",
1645 "ssh://git@gitub.com/rust-lang/cargo",
1646 ),
1647 (
1648 "git@github.com:rust-lang/cargo.git",
1649 "./",
1650 "git@github.com:rust-lang/cargo.git/./",
1651 ),
1652 (
1653 "git@github.com:rust-lang/cargo.git",
1654 "../",
1655 "git@github.com:rust-lang/cargo.git/../",
1656 ),
1657 (
1658 "git@github.com:rust-lang/cargo.git",
1659 "./foo",
1660 "git@github.com:rust-lang/cargo.git/./foo",
1661 ),
1662 (
1663 "git@github.com:rust-lang/cargo.git/",
1664 "./foo",
1665 "git@github.com:rust-lang/cargo.git/./foo",
1666 ),
1667 (
1668 "git@github.com:rust-lang/cargo.git",
1669 "../foo",
1670 "git@github.com:rust-lang/cargo.git/../foo",
1671 ),
1672 (
1673 "git@github.com:rust-lang/cargo.git/",
1674 "../foo",
1675 "git@github.com:rust-lang/cargo.git/../foo",
1676 ),
1677 (
1678 "git@github.com:rust-lang/cargo.git",
1679 "../foo/bar/../baz",
1680 "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1681 ),
1682 ];
1683
1684 for (base_url, submodule_url, expected) in cases {
1685 let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1686 assert_eq!(
1687 expected, url,
1688 "base `{base_url}`; submodule `{submodule_url}`"
1689 );
1690 }
1691 }
1692}
1693
1694/// Turns a full commit hash revision into an oid.
1695///
1696/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1697/// Its length must be double to the underlying bytes (40 or 64),
1698/// otherwise libgit2 would happily zero-pad the returned oid.
1699///
1700/// See:
1701///
1702/// * <https://github.com/rust-lang/cargo/issues/13188>
1703/// * <https://github.com/rust-lang/cargo/issues/13968>
1704pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1705 Oid::from_str(rev)
1706 .ok()
1707 .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1708}