1//! Helpers to gather the VCS information for `cargo package`.
23use std::collections::HashSet;
4use std::path::Path;
5use std::path::PathBuf;
67use anyhow::Context as _;
8use cargo_util::paths;
9use serde::Serialize;
10use tracing::debug;
1112use crate::core::Package;
13use crate::sources::PathEntry;
14use crate::CargoResult;
15use crate::GlobalContext;
1617use super::PackageOpts;
1819/// Represents the VCS information when packaging.
20#[derive(Serialize)]
21pub struct VcsInfo {
22 git: GitVcsInfo,
23/// Path to the package within repo (empty string if root).
24path_in_vcs: String,
25}
2627/// Represents the Git VCS information when packaging.
28#[derive(Serialize)]
29pub struct GitVcsInfo {
30 sha1: String,
31/// Indicate whether or not the Git worktree is dirty.
32#[serde(skip_serializing_if = "std::ops::Not::not")]
33dirty: bool,
34}
3536/// Checks if the package source is in a *git* DVCS repository.
37///
38/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
39/// and `--allow-dirty` has not been passed,
40/// then `bail!` with an informative message.
41/// Otherwise return the sha1 hash of the current *HEAD* commit,
42/// or `None` if no repo is found.
43#[tracing::instrument(skip_all)]
44pub fn check_repo_state(
45 p: &Package,
46 src_files: &[PathEntry],
47 gctx: &GlobalContext,
48 opts: &PackageOpts<'_>,
49) -> CargoResult<Option<VcsInfo>> {
50let Ok(repo) = git2::Repository::discover(p.root()) else {
51 gctx.shell().verbose(|shell| {
52 shell.warn(format!("no (git) VCS found for `{}`", p.root().display()))
53 })?;
54// No Git repo found. Have to assume it is clean.
55return Ok(None);
56 };
5758let Some(workdir) = repo.workdir() else {
59debug!(
60"no (git) workdir found for repo at `{}`",
61 repo.path().display()
62 );
63// No git workdir. Have to assume it is clean.
64return Ok(None);
65 };
6667debug!("found a git repo at `{}`", workdir.display());
68let path = p.manifest_path();
69let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
70let Ok(status) = repo.status_file(&path) else {
71 gctx.shell().verbose(|shell| {
72 shell.warn(format!(
73"no (git) Cargo.toml found at `{}` in workdir `{}`",
74 path.display(),
75 workdir.display()
76 ))
77 })?;
78// No checked-in `Cargo.toml` found. This package may be irrelevant.
79 // Have to assume it is clean.
80return Ok(None);
81 };
8283if !(status & git2::Status::IGNORED).is_empty() {
84 gctx.shell().verbose(|shell| {
85 shell.warn(format!(
86"found (git) Cargo.toml ignored at `{}` in workdir `{}`",
87 path.display(),
88 workdir.display()
89 ))
90 })?;
91// An ignored `Cargo.toml` found. This package may be irrelevant.
92 // Have to assume it is clean.
93return Ok(None);
94 }
9596 warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
9798debug!(
99"found (git) Cargo.toml at `{}` in workdir `{}`",
100 path.display(),
101 workdir.display(),
102 );
103let path_in_vcs = path
104 .parent()
105 .and_then(|p| p.to_str())
106 .unwrap_or("")
107 .replace("\\", "/");
108let Some(git) = git(p, gctx, src_files, &repo, &opts)? else {
109// If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
110 // then don't generate the corresponding file in order to maintain consistency with past behavior.
111return Ok(None);
112 };
113114return Ok(Some(VcsInfo { git, path_in_vcs }));
115}
116117/// Warns if any symlinks were checked out as plain text files.
118///
119/// Git config [`core.symlinks`] defaults to true when unset.
120/// In git-for-windows (and git as well),
121/// the config should be set to false explicitly when the repo was created,
122/// if symlink support wasn't detected [^1].
123///
124/// We assume the config was always set at creation time and never changed.
125/// So, if it is true, we don't bother users with any warning.
126///
127/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
128///
129/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
130fn warn_symlink_checked_out_as_plain_text_file(
131 gctx: &GlobalContext,
132 src_files: &[PathEntry],
133 repo: &git2::Repository,
134) -> CargoResult<()> {
135if repo
136 .config()
137 .and_then(|c| c.get_bool("core.symlinks"))
138 .unwrap_or(true)
139 {
140return Ok(());
141 }
142143if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
144let mut shell = gctx.shell();
145 shell.warn(format_args!(
146"found symbolic links that may be checked out as regular files for git repo at `{}`\n\
147 This might cause the `.crate` file to include incorrect or incomplete files",
148 repo.workdir().unwrap().display(),
149 ))?;
150let extra_note = if cfg!(windows) {
151"\nAnd on Windows, enable the Developer Mode to support symlinks"
152} else {
153""
154};
155 shell.note(format_args!(
156"to avoid this, set the Git config `core.symlinks` to `true`{extra_note}",
157 ))?;
158 }
159160Ok(())
161}
162163/// The real git status check starts from here.
164fn git(
165 pkg: &Package,
166 gctx: &GlobalContext,
167 src_files: &[PathEntry],
168 repo: &git2::Repository,
169 opts: &PackageOpts<'_>,
170) -> CargoResult<Option<GitVcsInfo>> {
171// This is a collection of any dirty or untracked files. This covers:
172 // - new/modified/deleted/renamed/type change (index or worktree)
173 // - untracked files (which are "new" worktree files)
174 // - ignored (in case the user has an `include` directive that
175 // conflicts with .gitignore).
176let mut dirty_files = Vec::new();
177let pathspec = relative_pathspec(repo, pkg.root());
178 collect_statuses(repo, &[pathspec.as_str()], &mut dirty_files)?;
179180// Include each submodule so that the error message can provide
181 // specifically *which* files in a submodule are modified.
182status_submodules(repo, &mut dirty_files)?;
183184// Find the intersection of dirty in git, and the src_files that would
185 // be packaged. This is a lazy n^2 check, but seems fine with
186 // thousands of files.
187let cwd = gctx.cwd();
188let mut dirty_src_files: Vec<_> = src_files
189 .iter()
190 .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
191 .map(|p| p.as_ref())
192 .chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
193 .map(|path| {
194 pathdiff::diff_paths(path, cwd)
195 .as_ref()
196 .unwrap_or(path)
197 .display()
198 .to_string()
199 })
200 .collect();
201let dirty = !dirty_src_files.is_empty();
202if !dirty || opts.allow_dirty {
203// Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
204 // Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
205if repo.is_empty()? {
206return Ok(None);
207 }
208let rev_obj = repo.revparse_single("HEAD")?;
209Ok(Some(GitVcsInfo {
210 sha1: rev_obj.id().to_string(),
211 dirty,
212 }))
213 } else {
214 dirty_src_files.sort_unstable();
215anyhow::bail!(
216"{} files in the working directory contain changes that were \
217 not yet committed into git:\n\n{}\n\n\
218 to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
219 dirty_src_files.len(),
220 dirty_src_files.join("\n")
221 )
222 }
223}
224225/// Checks whether "included" source files outside package root have been modified.
226///
227/// This currently looks at
228///
229/// * `package.readme` and `package.license-file` pointing to paths outside package root
230/// * symlinks targets reside outside package root
231///
232/// This is required because those paths may link to a file outside the
233/// current package root, but still under the git workdir, affecting the
234/// final packaged `.crate` file.
235fn dirty_files_outside_pkg_root(
236 pkg: &Package,
237 repo: &git2::Repository,
238 src_files: &[PathEntry],
239) -> CargoResult<HashSet<PathBuf>> {
240let pkg_root = pkg.root();
241let workdir = repo.workdir().unwrap();
242243let meta = pkg.manifest().metadata();
244let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
245 .into_iter()
246 .filter_map(|p| p.as_deref())
247 .map(|path| paths::normalize_path(&pkg_root.join(path)))
248 .collect();
249250let mut dirty_symlinks = HashSet::new();
251for rel_path in src_files
252 .iter()
253 .filter(|p| p.is_symlink_or_under_symlink())
254 .map(|p| p.as_ref())
255 .chain(metadata_paths.iter())
256// If inside package root. Don't bother checking git status.
257.filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
258// Handle files outside package root but under git workdir,
259.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
260 {
261if repo.status_file(&rel_path)? != git2::Status::CURRENT {
262 dirty_symlinks.insert(workdir.join(rel_path));
263 }
264 }
265Ok(dirty_symlinks)
266}
267268/// Helper to collect dirty statuses for a single repo.
269fn collect_statuses(
270 repo: &git2::Repository,
271 pathspecs: &[&str],
272 dirty_files: &mut Vec<PathBuf>,
273) -> CargoResult<()> {
274let mut status_opts = git2::StatusOptions::new();
275// Exclude submodules, as they are being handled manually by recursing
276 // into each one so that details about specific files can be
277 // retrieved.
278pathspecs
279 .iter()
280 .fold(&mut status_opts, git2::StatusOptions::pathspec)
281 .exclude_submodules(true)
282 .include_ignored(true)
283 .include_untracked(true);
284let repo_statuses = repo.statuses(Some(&mut status_opts)).with_context(|| {
285format!(
286"failed to retrieve git status from repo {}",
287 repo.path().display()
288 )
289 })?;
290let workdir = repo.workdir().unwrap();
291let this_dirty = repo_statuses.iter().filter_map(|entry| {
292let path = entry.path().expect("valid utf-8 path");
293if path.ends_with("Cargo.lock") && entry.status() == git2::Status::IGNORED {
294// It is OK to include Cargo.lock even if it is ignored.
295return None;
296 }
297// Use an absolute path, so that comparing paths is easier
298 // (particularly with submodules).
299Some(workdir.join(path))
300 });
301 dirty_files.extend(this_dirty);
302Ok(())
303}
304305/// Helper to collect dirty statuses while recursing into submodules.
306fn status_submodules(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
307for submodule in repo.submodules()? {
308// Ignore submodules that don't open, they are probably not initialized.
309 // If its files are required, then the verification step should fail.
310if let Ok(sub_repo) = submodule.open() {
311 status_submodules(&sub_repo, dirty_files)?;
312 collect_statuses(&sub_repo, &[], dirty_files)?;
313 }
314 }
315Ok(())
316}
317318/// Use pathspec so git only matches a certain path prefix
319fn relative_pathspec(repo: &git2::Repository, pkg_root: &Path) -> String {
320let workdir = repo.workdir().unwrap();
321let relpath = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
322// to unix separators
323relpath.to_str().unwrap().replace('\\', "/")
324}