1//! Helpers to gather the VCS information for `cargo package`.
23use std::collections::HashSet;
4use std::path::Path;
5use std::path::PathBuf;
67use anyhow::Contextas _;
8use cargo_util::paths;
9use serde::Serialize;
10use tracing::debug;
1112use crate::core::Package;
13use crate::core::Workspace;
14use crate::ops::lockfile::LOCKFILE_NAME;
15use crate::sources::PathEntry;
16use crate::CargoResult;
17use crate::GlobalContext;
1819use super::PackageOpts;
2021/// Represents the VCS information when packaging.
22#[derive(Serialize)]
23pub struct VcsInfo {
24 git: GitVcsInfo,
25/// Path to the package within repo (empty string if root).
26path_in_vcs: String,
27}
2829/// Represents the Git VCS information when packaging.
30#[derive(Serialize)]
31pub struct GitVcsInfo {
32 sha1: String,
33/// Indicate whether or not the Git worktree is dirty.
34#[serde(skip_serializing_if = "std::ops::Not::not")]
35dirty: bool,
36}
3738/// Checks if the package source is in a *git* DVCS repository.
39///
40/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
41/// and `--allow-dirty` has not been passed,
42/// then `bail!` with an informative message.
43/// Otherwise return the sha1 hash of the current *HEAD* commit,
44/// or `None` if no repo is found.
45#[tracing::instrument(skip_all)]
46pub fn check_repo_state(
47 p: &Package,
48 src_files: &[PathEntry],
49 ws: &Workspace<'_>,
50 opts: &PackageOpts<'_>,
51) -> CargoResult<Option<VcsInfo>> {
52let gctx = ws.gctx();
53let Ok(repo) = git2::Repository::discover(p.root()) else {
54 gctx.shell().verbose(|shell| {
55 shell.warn(format_args!(
56"no (git) VCS found for `{}`",
57 p.root().display()
58 ))
59 })?;
60// No Git repo found. Have to assume it is clean.
61return Ok(None);
62 };
6364let Some(workdir) = repo.workdir() else {
65debug!(
66"no (git) workdir found for repo at `{}`",
67 repo.path().display()
68 );
69// No git workdir. Have to assume it is clean.
70return Ok(None);
71 };
7273debug!("found a git repo at `{}`", workdir.display());
74let path = p.manifest_path();
75let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
76let Ok(status) = repo.status_file(&path) else {
77 gctx.shell().verbose(|shell| {
78 shell.warn(format_args!(
79"no (git) Cargo.toml found at `{}` in workdir `{}`",
80 path.display(),
81 workdir.display()
82 ))
83 })?;
84// No checked-in `Cargo.toml` found. This package may be irrelevant.
85 // Have to assume it is clean.
86return Ok(None);
87 };
8889if !(status & git2::Status::IGNORED).is_empty() {
90 gctx.shell().verbose(|shell| {
91 shell.warn(format_args!(
92"found (git) Cargo.toml ignored at `{}` in workdir `{}`",
93 path.display(),
94 workdir.display()
95 ))
96 })?;
97// An ignored `Cargo.toml` found. This package may be irrelevant.
98 // Have to assume it is clean.
99return Ok(None);
100 }
101102 warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
103104debug!(
105"found (git) Cargo.toml at `{}` in workdir `{}`",
106 path.display(),
107 workdir.display(),
108 );
109let Some(git) = git(ws, p, src_files, &repo, &opts)? else {
110// If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
111 // then don't generate the corresponding file in order to maintain consistency with past behavior.
112return Ok(None);
113 };
114115let path_in_vcs = path
116 .parent()
117 .and_then(|p| p.to_str())
118 .unwrap_or("")
119 .replace("\\", "/");
120121return Ok(Some(VcsInfo { git, path_in_vcs }));
122}
123124/// Warns if any symlinks were checked out as plain text files.
125///
126/// Git config [`core.symlinks`] defaults to true when unset.
127/// In git-for-windows (and git as well),
128/// the config should be set to false explicitly when the repo was created,
129/// if symlink support wasn't detected [^1].
130///
131/// We assume the config was always set at creation time and never changed.
132/// So, if it is true, we don't bother users with any warning.
133///
134/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
135///
136/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
137fn warn_symlink_checked_out_as_plain_text_file(
138 gctx: &GlobalContext,
139 src_files: &[PathEntry],
140 repo: &git2::Repository,
141) -> CargoResult<()> {
142if repo
143 .config()
144 .and_then(|c| c.get_bool("core.symlinks"))
145 .unwrap_or(true)
146 {
147return Ok(());
148 }
149150if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
151let mut shell = gctx.shell();
152shell.warn(format_args!(
153"found symbolic links that may be checked out as regular files for git repo at `{}`\n\
154 This might cause the `.crate` file to include incorrect or incomplete files",
155 repo.workdir().unwrap().display(),
156 ))?;
157let extra_note = if cfg!(windows) {
158"\nAnd on Windows, enable the Developer Mode to support symlinks"
159} else {
160""
161};
162shell.note(format_args!(
163"to avoid this, set the Git config `core.symlinks` to `true`{extra_note}",
164 ))?;
165 }
166167Ok(())
168}
169170/// The real git status check starts from here.
171fn git(
172 ws: &Workspace<'_>,
173 pkg: &Package,
174 src_files: &[PathEntry],
175 repo: &git2::Repository,
176 opts: &PackageOpts<'_>,
177) -> CargoResult<Option<GitVcsInfo>> {
178// This is a collection of any dirty or untracked files. This covers:
179 // - new/modified/deleted/renamed/type change (index or worktree)
180 // - untracked files (which are "new" worktree files)
181 // - ignored (in case the user has an `include` directive that
182 // conflicts with .gitignore).
183let mut dirty_files = Vec::new();
184let pathspec = relative_pathspec(repo, pkg.root());
185collect_statuses(repo, &[pathspec.as_str()], &mut dirty_files)?;
186187// Include each submodule so that the error message can provide
188 // specifically *which* files in a submodule are modified.
189status_submodules(repo, &mut dirty_files)?;
190191// Find the intersection of dirty in git, and the src_files that would
192 // be packaged. This is a lazy n^2 check, but seems fine with
193 // thousands of files.
194let cwd = ws.gctx().cwd();
195let mut dirty_src_files: Vec<_> = src_files196 .iter()
197 .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
198 .map(|p| p.as_ref())
199 .chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
200 .map(|path| {
201 pathdiff::diff_paths(path, cwd)
202 .as_ref()
203 .unwrap_or(path)
204 .display()
205 .to_string()
206 })
207 .collect();
208let dirty = !dirty_src_files.is_empty();
209if !dirty || opts.allow_dirty {
210// Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
211 // Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
212if repo.is_empty()? {
213return Ok(None);
214 }
215let rev_obj = repo.revparse_single("HEAD")?;
216Ok(Some(GitVcsInfo {
217 sha1: rev_obj.id().to_string(),
218dirty,
219 }))
220 } else {
221dirty_src_files.sort_unstable();
222anyhow::bail!(
223"{} files in the working directory contain changes that were \
224 not yet committed into git:\n\n{}\n\n\
225 to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
226 dirty_src_files.len(),
227 dirty_src_files.join("\n")
228 )
229 }
230}
231232/// Checks whether "included" source files outside package root have been modified.
233///
234/// This currently looks at
235///
236/// * `package.readme` and `package.license-file` pointing to paths outside package root
237/// * symlinks targets reside outside package root
238/// * Any change in the root workspace manifest, regardless of what has changed.
239/// * Changes in the lockfile [^1].
240///
241/// This is required because those paths may link to a file outside the
242/// current package root, but still under the git workdir, affecting the
243/// final packaged `.crate` file.
244///
245/// [^1]: Lockfile might be re-generated if it is too out of sync with the manifest.
246/// Therefore, even you have a modified lockfile,
247/// you might still get a new fresh one that matches what is in git index.
248fn dirty_files_outside_pkg_root(
249 ws: &Workspace<'_>,
250 pkg: &Package,
251 repo: &git2::Repository,
252 src_files: &[PathEntry],
253) -> CargoResult<HashSet<PathBuf>> {
254let pkg_root = pkg.root();
255let workdir = repo.workdir().unwrap();
256257let mut dirty_files = HashSet::new();
258259let meta = pkg.manifest().metadata();
260let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
261 .into_iter()
262 .filter_map(|p| p.as_deref())
263 .map(|path| paths::normalize_path(&pkg_root.join(path)))
264 .collect();
265266// Unlike other files, lockfile is allowed to be missing,
267 // and can be generated during packaging.
268 // We skip checking when it is missing in both workdir and git index,
269 // otherwise cargo will fail with git2 not found error.
270let lockfile_path = ws.lock_root().as_path_unlocked().join(LOCKFILE_NAME);
271let lockfile_path = if lockfile_path.exists() {
272Some(lockfile_path)
273 } else if let Ok(rel_path) = paths::normalize_path(&lockfile_path).strip_prefix(workdir) {
274// We don't canonicalize here because non-existing path can't be canonicalized.
275match repo.status_file(&rel_path) {
276Ok(s) if s != git2::Status::CURRENT => {
277dirty_files.insert(lockfile_path);
278 }
279// Unmodified
280Ok(_) => {}
281Err(e) => {
282debug!(
283"check git status failed for `{}` in workdir `{}`: {e}",
284 rel_path.display(),
285 workdir.display(),
286 );
287 }
288 }
289None290 } else {
291None292 };
293294for rel_path in src_files
295 .iter()
296 .filter(|p| p.is_symlink_or_under_symlink())
297 .map(|p| p.as_ref().as_path())
298 .chain(metadata_paths.iter().map(AsRef::as_ref))
299 .chain([ws.root_manifest()])
300 .chain(lockfile_path.as_deref().into_iter())
301// If inside package root. Don't bother checking git status.
302.filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
303// Handle files outside package root but under git workdir,
304.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
305 {
306if repo.status_file(&rel_path)? != git2::Status::CURRENT {
307 dirty_files.insert(workdir.join(rel_path));
308 }
309 }
310Ok(dirty_files)
311}
312313/// Helper to collect dirty statuses for a single repo.
314fn collect_statuses(
315 repo: &git2::Repository,
316 pathspecs: &[&str],
317 dirty_files: &mut Vec<PathBuf>,
318) -> CargoResult<()> {
319let mut status_opts = git2::StatusOptions::new();
320// Exclude submodules, as they are being handled manually by recursing
321 // into each one so that details about specific files can be
322 // retrieved.
323pathspecs324 .iter()
325 .fold(&mut status_opts, git2::StatusOptions::pathspec)
326 .exclude_submodules(true)
327 .include_ignored(true)
328 .include_untracked(true);
329let repo_statuses = repo.statuses(Some(&mut status_opts)).with_context(|| {
330format!(
331"failed to retrieve git status from repo {}",
332 repo.path().display()
333 )
334 })?;
335let workdir = repo.workdir().unwrap();
336let this_dirty = repo_statuses.iter().filter_map(|entry| {
337let path = entry.path().expect("valid utf-8 path");
338if path.ends_with("Cargo.lock") && entry.status() == git2::Status::IGNORED {
339// It is OK to include Cargo.lock even if it is ignored.
340return None;
341 }
342// Use an absolute path, so that comparing paths is easier
343 // (particularly with submodules).
344Some(workdir.join(path))
345 });
346dirty_files.extend(this_dirty);
347Ok(())
348}
349350/// Helper to collect dirty statuses while recursing into submodules.
351fn status_submodules(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
352for submodule in repo.submodules()? {
353// Ignore submodules that don't open, they are probably not initialized.
354 // If its files are required, then the verification step should fail.
355if let Ok(sub_repo) = submodule.open() {
356 status_submodules(&sub_repo, dirty_files)?;
357 collect_statuses(&sub_repo, &[], dirty_files)?;
358 }
359 }
360Ok(())
361}
362363/// Use pathspec so git only matches a certain path prefix
364fn relative_pathspec(repo: &git2::Repository, pkg_root: &Path) -> String {
365let workdir = repo.workdir().unwrap();
366let relpath = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
367// to unix separators
368relpath.to_str().unwrap().replace('\\', "/")
369}