cargo/ops/cargo_package/
vcs.rs

1//! Helpers to gather the VCS information for `cargo package`.
2
3use std::collections::HashSet;
4use std::path::Path;
5use std::path::PathBuf;
6
7use anyhow::Context as _;
8use cargo_util::paths;
9use serde::Serialize;
10use tracing::debug;
11
12use crate::core::Package;
13use crate::sources::PathEntry;
14use crate::CargoResult;
15use crate::GlobalContext;
16
17use super::PackageOpts;
18
19/// Represents the VCS information when packaging.
20#[derive(Serialize)]
21pub struct VcsInfo {
22    git: GitVcsInfo,
23    /// Path to the package within repo (empty string if root).
24    path_in_vcs: String,
25}
26
27/// Represents the Git VCS information when packaging.
28#[derive(Serialize)]
29pub struct GitVcsInfo {
30    sha1: String,
31    /// Indicate whether or not the Git worktree is dirty.
32    #[serde(skip_serializing_if = "std::ops::Not::not")]
33    dirty: bool,
34}
35
36/// Checks if the package source is in a *git* DVCS repository.
37///
38/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
39/// and `--allow-dirty` has not been passed,
40/// then `bail!` with an informative message.
41/// Otherwise return the sha1 hash of the current *HEAD* commit,
42/// or `None` if no repo is found.
43#[tracing::instrument(skip_all)]
44pub fn check_repo_state(
45    p: &Package,
46    src_files: &[PathEntry],
47    gctx: &GlobalContext,
48    opts: &PackageOpts<'_>,
49) -> CargoResult<Option<VcsInfo>> {
50    let Ok(repo) = git2::Repository::discover(p.root()) else {
51        gctx.shell().verbose(|shell| {
52            shell.warn(format!("no (git) VCS found for `{}`", p.root().display()))
53        })?;
54        // No Git repo found. Have to assume it is clean.
55        return Ok(None);
56    };
57
58    let Some(workdir) = repo.workdir() else {
59        debug!(
60            "no (git) workdir found for repo at `{}`",
61            repo.path().display()
62        );
63        // No git workdir. Have to assume it is clean.
64        return Ok(None);
65    };
66
67    debug!("found a git repo at `{}`", workdir.display());
68    let path = p.manifest_path();
69    let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
70    let Ok(status) = repo.status_file(&path) else {
71        gctx.shell().verbose(|shell| {
72            shell.warn(format!(
73                "no (git) Cargo.toml found at `{}` in workdir `{}`",
74                path.display(),
75                workdir.display()
76            ))
77        })?;
78        // No checked-in `Cargo.toml` found. This package may be irrelevant.
79        // Have to assume it is clean.
80        return Ok(None);
81    };
82
83    if !(status & git2::Status::IGNORED).is_empty() {
84        gctx.shell().verbose(|shell| {
85            shell.warn(format!(
86                "found (git) Cargo.toml ignored at `{}` in workdir `{}`",
87                path.display(),
88                workdir.display()
89            ))
90        })?;
91        // An ignored `Cargo.toml` found. This package may be irrelevant.
92        // Have to assume it is clean.
93        return Ok(None);
94    }
95
96    warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
97
98    debug!(
99        "found (git) Cargo.toml at `{}` in workdir `{}`",
100        path.display(),
101        workdir.display(),
102    );
103    let path_in_vcs = path
104        .parent()
105        .and_then(|p| p.to_str())
106        .unwrap_or("")
107        .replace("\\", "/");
108    let Some(git) = git(p, gctx, src_files, &repo, &opts)? else {
109        // If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
110        // then don't generate the corresponding file in order to maintain consistency with past behavior.
111        return Ok(None);
112    };
113
114    return Ok(Some(VcsInfo { git, path_in_vcs }));
115}
116
117/// Warns if any symlinks were checked out as plain text files.
118///
119/// Git config [`core.symlinks`] defaults to true when unset.
120/// In git-for-windows (and git as well),
121/// the config should be set to false explicitly when the repo was created,
122/// if symlink support wasn't detected [^1].
123///
124/// We assume the config was always set at creation time and never changed.
125/// So, if it is true, we don't bother users with any warning.
126///
127/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
128///
129/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
130fn warn_symlink_checked_out_as_plain_text_file(
131    gctx: &GlobalContext,
132    src_files: &[PathEntry],
133    repo: &git2::Repository,
134) -> CargoResult<()> {
135    if repo
136        .config()
137        .and_then(|c| c.get_bool("core.symlinks"))
138        .unwrap_or(true)
139    {
140        return Ok(());
141    }
142
143    if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
144        let mut shell = gctx.shell();
145        shell.warn(format_args!(
146            "found symbolic links that may be checked out as regular files for git repo at `{}`\n\
147            This might cause the `.crate` file to include incorrect or incomplete files",
148            repo.workdir().unwrap().display(),
149        ))?;
150        let extra_note = if cfg!(windows) {
151            "\nAnd on Windows, enable the Developer Mode to support symlinks"
152        } else {
153            ""
154        };
155        shell.note(format_args!(
156            "to avoid this, set the Git config `core.symlinks` to `true`{extra_note}",
157        ))?;
158    }
159
160    Ok(())
161}
162
163/// The real git status check starts from here.
164fn git(
165    pkg: &Package,
166    gctx: &GlobalContext,
167    src_files: &[PathEntry],
168    repo: &git2::Repository,
169    opts: &PackageOpts<'_>,
170) -> CargoResult<Option<GitVcsInfo>> {
171    // This is a collection of any dirty or untracked files. This covers:
172    // - new/modified/deleted/renamed/type change (index or worktree)
173    // - untracked files (which are "new" worktree files)
174    // - ignored (in case the user has an `include` directive that
175    //   conflicts with .gitignore).
176    let mut dirty_files = Vec::new();
177    let pathspec = relative_pathspec(repo, pkg.root());
178    collect_statuses(repo, &[pathspec.as_str()], &mut dirty_files)?;
179
180    // Include each submodule so that the error message can provide
181    // specifically *which* files in a submodule are modified.
182    status_submodules(repo, &mut dirty_files)?;
183
184    // Find the intersection of dirty in git, and the src_files that would
185    // be packaged. This is a lazy n^2 check, but seems fine with
186    // thousands of files.
187    let cwd = gctx.cwd();
188    let mut dirty_src_files: Vec<_> = src_files
189        .iter()
190        .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
191        .map(|p| p.as_ref())
192        .chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
193        .map(|path| {
194            pathdiff::diff_paths(path, cwd)
195                .as_ref()
196                .unwrap_or(path)
197                .display()
198                .to_string()
199        })
200        .collect();
201    let dirty = !dirty_src_files.is_empty();
202    if !dirty || opts.allow_dirty {
203        // Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
204        // Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
205        if repo.is_empty()? {
206            return Ok(None);
207        }
208        let rev_obj = repo.revparse_single("HEAD")?;
209        Ok(Some(GitVcsInfo {
210            sha1: rev_obj.id().to_string(),
211            dirty,
212        }))
213    } else {
214        dirty_src_files.sort_unstable();
215        anyhow::bail!(
216            "{} files in the working directory contain changes that were \
217             not yet committed into git:\n\n{}\n\n\
218             to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
219            dirty_src_files.len(),
220            dirty_src_files.join("\n")
221        )
222    }
223}
224
225/// Checks whether "included" source files outside package root have been modified.
226///
227/// This currently looks at
228///
229/// * `package.readme` and `package.license-file` pointing to paths outside package root
230/// * symlinks targets reside outside package root
231///
232/// This is required because those paths may link to a file outside the
233/// current package root, but still under the git workdir, affecting the
234/// final packaged `.crate` file.
235fn dirty_files_outside_pkg_root(
236    pkg: &Package,
237    repo: &git2::Repository,
238    src_files: &[PathEntry],
239) -> CargoResult<HashSet<PathBuf>> {
240    let pkg_root = pkg.root();
241    let workdir = repo.workdir().unwrap();
242
243    let meta = pkg.manifest().metadata();
244    let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
245        .into_iter()
246        .filter_map(|p| p.as_deref())
247        .map(|path| paths::normalize_path(&pkg_root.join(path)))
248        .collect();
249
250    let mut dirty_symlinks = HashSet::new();
251    for rel_path in src_files
252        .iter()
253        .filter(|p| p.is_symlink_or_under_symlink())
254        .map(|p| p.as_ref())
255        .chain(metadata_paths.iter())
256        // If inside package root. Don't bother checking git status.
257        .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
258        // Handle files outside package root but under git workdir,
259        .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
260    {
261        if repo.status_file(&rel_path)? != git2::Status::CURRENT {
262            dirty_symlinks.insert(workdir.join(rel_path));
263        }
264    }
265    Ok(dirty_symlinks)
266}
267
268/// Helper to collect dirty statuses for a single repo.
269fn collect_statuses(
270    repo: &git2::Repository,
271    pathspecs: &[&str],
272    dirty_files: &mut Vec<PathBuf>,
273) -> CargoResult<()> {
274    let mut status_opts = git2::StatusOptions::new();
275    // Exclude submodules, as they are being handled manually by recursing
276    // into each one so that details about specific files can be
277    // retrieved.
278    pathspecs
279        .iter()
280        .fold(&mut status_opts, git2::StatusOptions::pathspec)
281        .exclude_submodules(true)
282        .include_ignored(true)
283        .include_untracked(true);
284    let repo_statuses = repo.statuses(Some(&mut status_opts)).with_context(|| {
285        format!(
286            "failed to retrieve git status from repo {}",
287            repo.path().display()
288        )
289    })?;
290    let workdir = repo.workdir().unwrap();
291    let this_dirty = repo_statuses.iter().filter_map(|entry| {
292        let path = entry.path().expect("valid utf-8 path");
293        if path.ends_with("Cargo.lock") && entry.status() == git2::Status::IGNORED {
294            // It is OK to include Cargo.lock even if it is ignored.
295            return None;
296        }
297        // Use an absolute path, so that comparing paths is easier
298        // (particularly with submodules).
299        Some(workdir.join(path))
300    });
301    dirty_files.extend(this_dirty);
302    Ok(())
303}
304
305/// Helper to collect dirty statuses while recursing into submodules.
306fn status_submodules(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
307    for submodule in repo.submodules()? {
308        // Ignore submodules that don't open, they are probably not initialized.
309        // If its files are required, then the verification step should fail.
310        if let Ok(sub_repo) = submodule.open() {
311            status_submodules(&sub_repo, dirty_files)?;
312            collect_statuses(&sub_repo, &[], dirty_files)?;
313        }
314    }
315    Ok(())
316}
317
318/// Use pathspec so git only matches a certain path prefix
319fn relative_pathspec(repo: &git2::Repository, pkg_root: &Path) -> String {
320    let workdir = repo.workdir().unwrap();
321    let relpath = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
322    // to unix separators
323    relpath.to_str().unwrap().replace('\\', "/")
324}