cargo/ops/cargo_package/
vcs.rs

1//! Helpers to gather the VCS information for `cargo package`.
2
3use crate::core::{Package, Workspace};
4use crate::ops::PackageOpts;
5use crate::sources::PathEntry;
6use crate::{CargoResult, GlobalContext};
7
8use annotate_snippets::Level;
9use anyhow::Context;
10use cargo_util::paths;
11use gix::bstr::BString;
12use gix::bstr::ByteSlice;
13use gix::bstr::ByteVec;
14use gix::dir::walk::EmissionMode;
15use gix::dirwalk::Options;
16use gix::index::entry::Mode;
17use gix::status::tree_index::TrackRenames;
18use gix::worktree::stack::state::ignore::Source;
19use serde::Serialize;
20use std::path::{Path, PathBuf};
21use tracing::debug;
22
23/// Represents the VCS information when packaging.
24#[derive(Serialize)]
25pub struct VcsInfo {
26    git: GitVcsInfo,
27    /// Path to the package within repo (empty string if root).
28    path_in_vcs: String,
29}
30
31/// Represents the Git VCS information when packaging.
32#[derive(Serialize)]
33pub struct GitVcsInfo {
34    sha1: String,
35    /// Indicate whether the Git worktree is dirty.
36    #[serde(skip_serializing_if = "std::ops::Not::not")]
37    dirty: bool,
38}
39
40/// Checks if the package source is in a *git* DVCS repository.
41///
42/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
43/// and `--allow-dirty` has not been passed,
44/// then `bail!` with an informative message.
45/// Otherwise, return the sha1 hash of the current *HEAD* commit,
46/// or `None` if no repo is found.
47#[tracing::instrument(skip_all)]
48pub fn check_repo_state(
49    p: &Package,
50    src_files: &[PathEntry],
51    ws: &Workspace<'_>,
52    opts: &PackageOpts<'_>,
53) -> CargoResult<Option<VcsInfo>> {
54    let gctx = ws.gctx();
55    let Ok(mut repo) = gix::discover(p.root()) else {
56        gctx.shell().verbose(|shell| {
57            shell.warn(format_args!(
58                "no (git) VCS found for `{}`",
59                p.root().display()
60            ))
61        })?;
62        // No Git repo found. Have to assume it is clean.
63        return Ok(None);
64    };
65
66    let Some(workdir) = repo.workdir() else {
67        debug!(
68            "no (git) workdir found for repo at `{}`",
69            repo.path().display()
70        );
71        // No git workdir. Have to assume it is clean.
72        return Ok(None);
73    };
74
75    debug!("found a git repo at `{}`", workdir.display());
76    let path = p.manifest_path();
77
78    let manifest_exists = path.exists();
79    let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
80    let rela_path =
81        gix::path::to_unix_separators_on_windows(gix::path::os_str_into_bstr(path.as_os_str())?);
82    if !manifest_exists {
83        gctx.shell().verbose(|shell| {
84            shell.warn(format_args!(
85                "Cargo.toml not found at `{}` in workdir `{}`",
86                path.display(),
87                workdir.display()
88            ))
89        })?;
90        // No `Cargo.toml` found. This package may be irrelevant.
91        // Have to assume it is clean.
92        return Ok(None);
93    };
94
95    let manifest_is_ignored = {
96        let index = repo.index_or_empty()?;
97        let mut excludes =
98            repo.excludes(&index, None, Source::WorktreeThenIdMappingIfNotSkipped)?;
99        excludes
100            .at_entry(rela_path.as_bstr(), Some(Mode::FILE))?
101            .is_excluded()
102    };
103    if manifest_is_ignored {
104        gctx.shell().verbose(|shell| {
105            shell.warn(format_args!(
106                "found (git) Cargo.toml ignored at `{}` in workdir `{}`",
107                path.display(),
108                workdir.display()
109            ))
110        })?;
111        // An ignored `Cargo.toml` found. This package may be irrelevant.
112        // Have to assume it is clean.
113        return Ok(None);
114    }
115
116    warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
117
118    debug!(
119        "found (git) Cargo.toml at `{}` in workdir `{}`",
120        path.display(),
121        workdir.display(),
122    );
123    let Some(git) = git(ws, p, src_files, &mut repo, &opts)? else {
124        // If the git repo lacks essential field like `sha1`, and since this field exists from the beginning,
125        // then don't generate the corresponding file in order to maintain consistency with past behavior.
126        return Ok(None);
127    };
128
129    let path_in_vcs = path
130        .parent()
131        .and_then(|p| p.to_str())
132        .unwrap_or("")
133        .replace("\\", "/");
134
135    Ok(Some(VcsInfo { git, path_in_vcs }))
136}
137
138/// Warns if any symlinks were checked out as plain text files.
139///
140/// Git config [`core.symlinks`] defaults to true when unset.
141/// In git-for-windows (and git as well),
142/// the config should be set to false explicitly when the repo was created,
143/// if symlink support wasn't detected [^1].
144///
145/// We assume the config was always set at creation time and never changed.
146/// So, if it is true, we don't bother users with any warning.
147///
148/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
149///
150/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
151fn warn_symlink_checked_out_as_plain_text_file(
152    gctx: &GlobalContext,
153    src_files: &[PathEntry],
154    repo: &gix::Repository,
155) -> CargoResult<()> {
156    if repo
157        .config_snapshot()
158        .boolean(&gix::config::tree::Core::SYMLINKS)
159        .unwrap_or(true)
160    {
161        return Ok(());
162    }
163
164    if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
165        let msg = format!(
166            "found symbolic links that may be checked out as regular files for git repo at `{}/`",
167            repo.workdir().unwrap().display()
168        );
169        let mut notes = vec![
170            Level::NOTE.message(
171                "this might cause the `.crate` file to include incorrect or incomplete files",
172            ),
173            Level::HELP.message("to avoid this, set the Git config `core.symlinks` to `true`"),
174        ];
175        if cfg!(windows) {
176            notes.push(
177                Level::HELP.message("on Windows, enable the Developer Mode to support symlinks"),
178            );
179        };
180        gctx.shell().print_report(
181            &[Level::WARNING
182                .secondary_title(msg)
183                .elements(notes.into_iter())],
184            false,
185        )?;
186    }
187
188    Ok(())
189}
190
191/// The real git status check starts from here.
192fn git(
193    ws: &Workspace<'_>,
194    pkg: &Package,
195    src_files: &[PathEntry],
196    repo: &mut gix::Repository,
197    opts: &PackageOpts<'_>,
198) -> CargoResult<Option<GitVcsInfo>> {
199    {
200        let mut config = repo.config_snapshot_mut();
201        // This currently is only a very minor speedup for the biggest repositories,
202        // but might trigger creating many threads.
203        config.set_value(&gix::config::tree::Index::THREADS, "false")?;
204    }
205    // This is a collection of any dirty or untracked files. This covers:
206    // - new/modified/deleted/renamed/type change (index or worktree)
207    // - untracked files (which are "new" worktree files)
208    // - ignored (in case the user has an `include` directive that
209    //   conflicts with .gitignore).
210    let mut dirty_files = Vec::new();
211    let workdir = repo.workdir().unwrap();
212    collect_statuses(
213        repo,
214        workdir,
215        relative_package_root(repo, pkg.root()).as_deref(),
216        &mut dirty_files,
217    )?;
218
219    // Include each submodule so that the error message can provide
220    // specifically *which* files in a submodule are modified.
221    status_submodules(repo, &mut dirty_files)?;
222
223    // Find the intersection of dirty in git, and the src_files that would
224    // be packaged. This is a lazy n^2 check, but seems fine with
225    // thousands of files.
226    let cwd = ws.gctx().cwd();
227    let mut dirty_src_files: Vec<_> = src_files
228        .iter()
229        .filter(|src_file| {
230            if let Some(canon_src_file) = src_file.is_symlink_or_under_symlink().then(|| {
231                gix::path::realpath_opts(
232                    &src_file,
233                    ws.gctx().cwd(),
234                    gix::path::realpath::MAX_SYMLINKS,
235                )
236                .unwrap_or_else(|_| src_file.to_path_buf())
237            }) {
238                dirty_files
239                    .iter()
240                    .any(|path| canon_src_file.starts_with(path))
241            } else {
242                dirty_files.iter().any(|path| src_file.starts_with(path))
243            }
244        })
245        .map(|p| p.as_ref())
246        .chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
247        .map(|path| {
248            pathdiff::diff_paths(path, cwd)
249                .as_ref()
250                .unwrap_or(path)
251                .display()
252                .to_string()
253        })
254        .collect();
255    let dirty = !dirty_src_files.is_empty();
256    if !dirty || opts.allow_dirty {
257        let maybe_head_id = repo.head()?.try_peel_to_id()?;
258        Ok(maybe_head_id.map(|id| GitVcsInfo {
259            sha1: id.to_string(),
260            dirty,
261        }))
262    } else {
263        dirty_src_files.sort_unstable();
264        anyhow::bail!(
265            "{} files in the working directory contain changes that were \
266             not yet committed into git:\n\n{}\n\n\
267             to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
268            dirty_src_files.len(),
269            dirty_src_files.join("\n")
270        )
271    }
272}
273
274/// Helper to collect dirty statuses for a single repo.
275/// `relative_package_root` is `Some` if the root is a sub-directory of the workdir.
276/// Writes dirty files outside `relative_package_root` into `dirty_files_outside_package_root`,
277/// and all *everything else* into `dirty_files`.
278#[must_use]
279fn collect_statuses(
280    repo: &gix::Repository,
281    workdir: &Path,
282    relative_package_root: Option<&Path>,
283    dirty_files: &mut Vec<PathBuf>,
284) -> CargoResult<()> {
285    let statuses = repo
286        .status(gix::progress::Discard)?
287        .dirwalk_options(configure_dirwalk)
288        .tree_index_track_renames(TrackRenames::Disabled)
289        .index_worktree_submodules(None)
290        .into_iter(
291            relative_package_root.map(|rela_pkg_root| {
292                // Use `:(top)` magic signature to make the pathspec relative to
293                // the repo root, not the current working directory.
294                let mut pathspec = BString::from(":(top)");
295                let prefix =
296                    gix::path::to_unix_separators_on_windows(gix::path::into_bstr(rela_pkg_root));
297                pathspec.push_str(prefix.as_ref());
298                pathspec
299            }), /* pathspec patterns */
300        )
301        .with_context(|| {
302            format!(
303                "failed to begin git status for repo {}",
304                repo.path().display()
305            )
306        })?;
307
308    for status in statuses {
309        let status = status.with_context(|| {
310            format!(
311                "failed to retrieve git status from repo {}",
312                repo.path().display()
313            )
314        })?;
315
316        let rel_path = gix::path::from_bstr(status.location());
317        let path = workdir.join(&rel_path);
318        // It is OK to include Cargo.lock even if it is ignored.
319        if path.ends_with("Cargo.lock")
320            && matches!(
321                &status,
322                gix::status::Item::IndexWorktree(
323                    gix::status::index_worktree::Item::DirectoryContents { entry, .. }
324                ) if matches!(entry.status, gix::dir::entry::Status::Ignored(_))
325            )
326        {
327            continue;
328        }
329
330        dirty_files.push(path);
331    }
332    Ok(())
333}
334
335/// Helper to collect dirty statuses while recursing into submodules.
336fn status_submodules(repo: &gix::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
337    let Some(submodules) = repo.submodules()? else {
338        return Ok(());
339    };
340    for submodule in submodules {
341        // Ignore submodules that don't open, they are probably not initialized.
342        // If its files are required, then the verification step should fail.
343        if let Some(sub_repo) = submodule.open()? {
344            let Some(workdir) = sub_repo.workdir() else {
345                continue;
346            };
347            status_submodules(&sub_repo, dirty_files)?;
348            collect_statuses(&sub_repo, workdir, None, dirty_files)?;
349        }
350    }
351    Ok(())
352}
353
354/// Make `pkg_root` relative to the `repo` workdir.
355fn relative_package_root(repo: &gix::Repository, pkg_root: &Path) -> Option<PathBuf> {
356    let workdir = repo.workdir().unwrap();
357    let rela_root = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
358    if rela_root.as_os_str().is_empty() {
359        None
360    } else {
361        rela_root.to_owned().into()
362    }
363}
364
365/// Checks whether "included" source files outside package root have been modified.
366///
367/// This currently looks at
368///
369/// * `package.readme` and `package.license-file` pointing to paths outside package root
370/// * symlinks targets residing outside package root
371/// * Any change in the root workspace manifest, regardless of what has changed.
372///
373/// This is required because those paths may link to a file outside the
374/// current package root, but still under the git workdir, affecting the
375/// final packaged `.crate` file.
376fn dirty_files_outside_pkg_root(
377    ws: &Workspace<'_>,
378    pkg: &Package,
379    repo: &gix::Repository,
380    src_files: &[PathEntry],
381) -> CargoResult<Vec<PathBuf>> {
382    let pkg_root = pkg.root();
383    let workdir = repo.workdir().unwrap();
384
385    let meta = pkg.manifest().metadata();
386    let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
387        .into_iter()
388        .filter_map(|p| p.as_deref())
389        .map(|path| paths::normalize_path(&pkg_root.join(path)))
390        .collect();
391
392    let linked_files_outside_package_root: Vec<_> = src_files
393        .iter()
394        .filter(|p| p.is_symlink_or_under_symlink())
395        .map(|p| p.as_ref().as_path())
396        .chain(metadata_paths.iter().map(AsRef::as_ref))
397        .chain([ws.root_manifest()])
398        // If inside package root. Don't bother checking git status.
399        .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
400        // Handle files outside package root but under git workdir,
401        .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
402        .collect();
403
404    if linked_files_outside_package_root.is_empty() {
405        return Ok(Vec::new());
406    }
407
408    let statuses = repo
409        .status(gix::progress::Discard)?
410        .dirwalk_options(configure_dirwalk)
411        // Limit the amount of threads for used for the worktree status, as the pathspec will
412        // prevent most paths from being visited anyway there is not much work.
413        .index_worktree_options_mut(|opts| opts.thread_limit = Some(1))
414        .tree_index_track_renames(TrackRenames::Disabled)
415        .index_worktree_submodules(None)
416        .into_iter(
417            linked_files_outside_package_root
418                .into_iter()
419                .map(|p| gix::path::into_bstr(p).into_owned()),
420        )
421        .with_context(|| {
422            format!(
423                "failed to begin git status for outfor repo {}",
424                repo.path().display()
425            )
426        })?;
427
428    let mut dirty_files = Vec::new();
429    for status in statuses {
430        let status = status.with_context(|| {
431            format!(
432                "failed to retrieve git status from repo {}",
433                repo.path().display()
434            )
435        })?;
436
437        let rel_path = gix::path::from_bstr(status.location());
438        let path = workdir.join(&rel_path);
439        dirty_files.push(path);
440    }
441    Ok(dirty_files)
442}
443
444fn configure_dirwalk(opts: Options) -> Options {
445    opts.emit_untracked(gix::dir::walk::EmissionMode::Matching)
446        // Also pick up ignored files or whole directories
447        // to specifically catch overzealously ignored source files.
448        // Later we will match these dirs by prefix, which is why collapsing
449        // them is desirable here.
450        .emit_ignored(Some(EmissionMode::CollapseDirectory))
451        .emit_tracked(false)
452        .recurse_repositories(false)
453        .symlinks_to_directories_are_ignored_like_directories(true)
454        .emit_empty_directories(false)
455}