cargo/ops/cargo_package/
vcs.rs

1//! Helpers to gather the VCS information for `cargo package`.
2
3use std::collections::HashSet;
4use std::path::Path;
5use std::path::PathBuf;
6
7use anyhow::Context as _;
8use cargo_util::paths;
9use serde::Serialize;
10use tracing::debug;
11
12use crate::core::Package;
13use crate::core::Workspace;
14use crate::ops::lockfile::LOCKFILE_NAME;
15use crate::sources::PathEntry;
16use crate::CargoResult;
17use crate::GlobalContext;
18
19use super::PackageOpts;
20
21/// Represents the VCS information when packaging.
22#[derive(Serialize)]
23pub struct VcsInfo {
24    git: GitVcsInfo,
25    /// Path to the package within repo (empty string if root).
26    path_in_vcs: String,
27}
28
29/// Represents the Git VCS information when packaging.
30#[derive(Serialize)]
31pub struct GitVcsInfo {
32    sha1: String,
33    /// Indicate whether or not the Git worktree is dirty.
34    #[serde(skip_serializing_if = "std::ops::Not::not")]
35    dirty: bool,
36}
37
38/// Checks if the package source is in a *git* DVCS repository.
39///
40/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
41/// and `--allow-dirty` has not been passed,
42/// then `bail!` with an informative message.
43/// Otherwise return the sha1 hash of the current *HEAD* commit,
44/// or `None` if no repo is found.
45#[tracing::instrument(skip_all)]
46pub fn check_repo_state(
47    p: &Package,
48    src_files: &[PathEntry],
49    ws: &Workspace<'_>,
50    opts: &PackageOpts<'_>,
51) -> CargoResult<Option<VcsInfo>> {
52    let gctx = ws.gctx();
53    let Ok(repo) = git2::Repository::discover(p.root()) else {
54        gctx.shell().verbose(|shell| {
55            shell.warn(format_args!(
56                "no (git) VCS found for `{}`",
57                p.root().display()
58            ))
59        })?;
60        // No Git repo found. Have to assume it is clean.
61        return Ok(None);
62    };
63
64    let Some(workdir) = repo.workdir() else {
65        debug!(
66            "no (git) workdir found for repo at `{}`",
67            repo.path().display()
68        );
69        // No git workdir. Have to assume it is clean.
70        return Ok(None);
71    };
72
73    debug!("found a git repo at `{}`", workdir.display());
74    let path = p.manifest_path();
75    let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
76    let Ok(status) = repo.status_file(&path) else {
77        gctx.shell().verbose(|shell| {
78            shell.warn(format_args!(
79                "no (git) Cargo.toml found at `{}` in workdir `{}`",
80                path.display(),
81                workdir.display()
82            ))
83        })?;
84        // No checked-in `Cargo.toml` found. This package may be irrelevant.
85        // Have to assume it is clean.
86        return Ok(None);
87    };
88
89    if !(status & git2::Status::IGNORED).is_empty() {
90        gctx.shell().verbose(|shell| {
91            shell.warn(format_args!(
92                "found (git) Cargo.toml ignored at `{}` in workdir `{}`",
93                path.display(),
94                workdir.display()
95            ))
96        })?;
97        // An ignored `Cargo.toml` found. This package may be irrelevant.
98        // Have to assume it is clean.
99        return Ok(None);
100    }
101
102    warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
103
104    debug!(
105        "found (git) Cargo.toml at `{}` in workdir `{}`",
106        path.display(),
107        workdir.display(),
108    );
109    let Some(git) = git(ws, p, src_files, &repo, &opts)? else {
110        // If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
111        // then don't generate the corresponding file in order to maintain consistency with past behavior.
112        return Ok(None);
113    };
114
115    let path_in_vcs = path
116        .parent()
117        .and_then(|p| p.to_str())
118        .unwrap_or("")
119        .replace("\\", "/");
120
121    return Ok(Some(VcsInfo { git, path_in_vcs }));
122}
123
124/// Warns if any symlinks were checked out as plain text files.
125///
126/// Git config [`core.symlinks`] defaults to true when unset.
127/// In git-for-windows (and git as well),
128/// the config should be set to false explicitly when the repo was created,
129/// if symlink support wasn't detected [^1].
130///
131/// We assume the config was always set at creation time and never changed.
132/// So, if it is true, we don't bother users with any warning.
133///
134/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
135///
136/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
137fn warn_symlink_checked_out_as_plain_text_file(
138    gctx: &GlobalContext,
139    src_files: &[PathEntry],
140    repo: &git2::Repository,
141) -> CargoResult<()> {
142    if repo
143        .config()
144        .and_then(|c| c.get_bool("core.symlinks"))
145        .unwrap_or(true)
146    {
147        return Ok(());
148    }
149
150    if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
151        let mut shell = gctx.shell();
152        shell.warn(format_args!(
153            "found symbolic links that may be checked out as regular files for git repo at `{}`\n\
154            This might cause the `.crate` file to include incorrect or incomplete files",
155            repo.workdir().unwrap().display(),
156        ))?;
157        let extra_note = if cfg!(windows) {
158            "\nAnd on Windows, enable the Developer Mode to support symlinks"
159        } else {
160            ""
161        };
162        shell.note(format_args!(
163            "to avoid this, set the Git config `core.symlinks` to `true`{extra_note}",
164        ))?;
165    }
166
167    Ok(())
168}
169
170/// The real git status check starts from here.
171fn git(
172    ws: &Workspace<'_>,
173    pkg: &Package,
174    src_files: &[PathEntry],
175    repo: &git2::Repository,
176    opts: &PackageOpts<'_>,
177) -> CargoResult<Option<GitVcsInfo>> {
178    // This is a collection of any dirty or untracked files. This covers:
179    // - new/modified/deleted/renamed/type change (index or worktree)
180    // - untracked files (which are "new" worktree files)
181    // - ignored (in case the user has an `include` directive that
182    //   conflicts with .gitignore).
183    let mut dirty_files = Vec::new();
184    let pathspec = relative_pathspec(repo, pkg.root());
185    collect_statuses(repo, &[pathspec.as_str()], &mut dirty_files)?;
186
187    // Include each submodule so that the error message can provide
188    // specifically *which* files in a submodule are modified.
189    status_submodules(repo, &mut dirty_files)?;
190
191    // Find the intersection of dirty in git, and the src_files that would
192    // be packaged. This is a lazy n^2 check, but seems fine with
193    // thousands of files.
194    let cwd = ws.gctx().cwd();
195    let mut dirty_src_files: Vec<_> = src_files
196        .iter()
197        .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
198        .map(|p| p.as_ref())
199        .chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
200        .map(|path| {
201            pathdiff::diff_paths(path, cwd)
202                .as_ref()
203                .unwrap_or(path)
204                .display()
205                .to_string()
206        })
207        .collect();
208    let dirty = !dirty_src_files.is_empty();
209    if !dirty || opts.allow_dirty {
210        // Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
211        // Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
212        if repo.is_empty()? {
213            return Ok(None);
214        }
215        let rev_obj = repo.revparse_single("HEAD")?;
216        Ok(Some(GitVcsInfo {
217            sha1: rev_obj.id().to_string(),
218            dirty,
219        }))
220    } else {
221        dirty_src_files.sort_unstable();
222        anyhow::bail!(
223            "{} files in the working directory contain changes that were \
224             not yet committed into git:\n\n{}\n\n\
225             to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
226            dirty_src_files.len(),
227            dirty_src_files.join("\n")
228        )
229    }
230}
231
232/// Checks whether "included" source files outside package root have been modified.
233///
234/// This currently looks at
235///
236/// * `package.readme` and `package.license-file` pointing to paths outside package root
237/// * symlinks targets reside outside package root
238/// * Any change in the root workspace manifest, regardless of what has changed.
239/// * Changes in the lockfile [^1].
240///
241/// This is required because those paths may link to a file outside the
242/// current package root, but still under the git workdir, affecting the
243/// final packaged `.crate` file.
244///
245/// [^1]: Lockfile might be re-generated if it is too out of sync with the manifest.
246///       Therefore, even you have a modified lockfile,
247///       you might still get a new fresh one that matches what is in git index.
248fn dirty_files_outside_pkg_root(
249    ws: &Workspace<'_>,
250    pkg: &Package,
251    repo: &git2::Repository,
252    src_files: &[PathEntry],
253) -> CargoResult<HashSet<PathBuf>> {
254    let pkg_root = pkg.root();
255    let workdir = repo.workdir().unwrap();
256
257    let mut dirty_files = HashSet::new();
258
259    let meta = pkg.manifest().metadata();
260    let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
261        .into_iter()
262        .filter_map(|p| p.as_deref())
263        .map(|path| paths::normalize_path(&pkg_root.join(path)))
264        .collect();
265
266    // Unlike other files, lockfile is allowed to be missing,
267    // and can be generated during packaging.
268    // We skip checking when it is missing in both workdir and git index,
269    // otherwise cargo will fail with git2 not found error.
270    let lockfile_path = ws.lock_root().as_path_unlocked().join(LOCKFILE_NAME);
271    let lockfile_path = if lockfile_path.exists() {
272        Some(lockfile_path)
273    } else if let Ok(rel_path) = paths::normalize_path(&lockfile_path).strip_prefix(workdir) {
274        // We don't canonicalize here because non-existing path can't be canonicalized.
275        match repo.status_file(&rel_path) {
276            Ok(s) if s != git2::Status::CURRENT => {
277                dirty_files.insert(lockfile_path);
278            }
279            // Unmodified
280            Ok(_) => {}
281            Err(e) => {
282                debug!(
283                    "check git status failed for `{}` in workdir `{}`: {e}",
284                    rel_path.display(),
285                    workdir.display(),
286                );
287            }
288        }
289        None
290    } else {
291        None
292    };
293
294    for rel_path in src_files
295        .iter()
296        .filter(|p| p.is_symlink_or_under_symlink())
297        .map(|p| p.as_ref().as_path())
298        .chain(metadata_paths.iter().map(AsRef::as_ref))
299        .chain([ws.root_manifest()])
300        .chain(lockfile_path.as_deref().into_iter())
301        // If inside package root. Don't bother checking git status.
302        .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
303        // Handle files outside package root but under git workdir,
304        .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
305    {
306        if repo.status_file(&rel_path)? != git2::Status::CURRENT {
307            dirty_files.insert(workdir.join(rel_path));
308        }
309    }
310    Ok(dirty_files)
311}
312
313/// Helper to collect dirty statuses for a single repo.
314fn collect_statuses(
315    repo: &git2::Repository,
316    pathspecs: &[&str],
317    dirty_files: &mut Vec<PathBuf>,
318) -> CargoResult<()> {
319    let mut status_opts = git2::StatusOptions::new();
320    // Exclude submodules, as they are being handled manually by recursing
321    // into each one so that details about specific files can be
322    // retrieved.
323    pathspecs
324        .iter()
325        .fold(&mut status_opts, git2::StatusOptions::pathspec)
326        .exclude_submodules(true)
327        .include_ignored(true)
328        .include_untracked(true);
329    let repo_statuses = repo.statuses(Some(&mut status_opts)).with_context(|| {
330        format!(
331            "failed to retrieve git status from repo {}",
332            repo.path().display()
333        )
334    })?;
335    let workdir = repo.workdir().unwrap();
336    let this_dirty = repo_statuses.iter().filter_map(|entry| {
337        let path = entry.path().expect("valid utf-8 path");
338        if path.ends_with("Cargo.lock") && entry.status() == git2::Status::IGNORED {
339            // It is OK to include Cargo.lock even if it is ignored.
340            return None;
341        }
342        // Use an absolute path, so that comparing paths is easier
343        // (particularly with submodules).
344        Some(workdir.join(path))
345    });
346    dirty_files.extend(this_dirty);
347    Ok(())
348}
349
350/// Helper to collect dirty statuses while recursing into submodules.
351fn status_submodules(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
352    for submodule in repo.submodules()? {
353        // Ignore submodules that don't open, they are probably not initialized.
354        // If its files are required, then the verification step should fail.
355        if let Ok(sub_repo) = submodule.open() {
356            status_submodules(&sub_repo, dirty_files)?;
357            collect_statuses(&sub_repo, &[], dirty_files)?;
358        }
359    }
360    Ok(())
361}
362
363/// Use pathspec so git only matches a certain path prefix
364fn relative_pathspec(repo: &git2::Repository, pkg_root: &Path) -> String {
365    let workdir = repo.workdir().unwrap();
366    let relpath = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
367    // to unix separators
368    relpath.to_str().unwrap().replace('\\', "/")
369}