cargo/ops/cargo_package/
vcs.rs

1//! Helpers to gather the VCS information for `cargo package`.
2
3use crate::core::{Package, Workspace};
4use crate::ops::PackageOpts;
5use crate::sources::PathEntry;
6use crate::{CargoResult, GlobalContext};
7use annotate_snippets::Level;
8use anyhow::Context;
9use cargo_util::paths;
10use gix::bstr::ByteSlice;
11use gix::dir::walk::EmissionMode;
12use gix::dirwalk::Options;
13use gix::index::entry::Mode;
14use gix::status::tree_index::TrackRenames;
15use gix::worktree::stack::state::ignore::Source;
16use serde::Serialize;
17use std::path::{Path, PathBuf};
18use tracing::debug;
19
20/// Represents the VCS information when packaging.
21#[derive(Serialize)]
22pub struct VcsInfo {
23    git: GitVcsInfo,
24    /// Path to the package within repo (empty string if root).
25    path_in_vcs: String,
26}
27
28/// Represents the Git VCS information when packaging.
29#[derive(Serialize)]
30pub struct GitVcsInfo {
31    sha1: String,
32    /// Indicate whether the Git worktree is dirty.
33    #[serde(skip_serializing_if = "std::ops::Not::not")]
34    dirty: bool,
35}
36
37/// Checks if the package source is in a *git* DVCS repository.
38///
39/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
40/// and `--allow-dirty` has not been passed,
41/// then `bail!` with an informative message.
42/// Otherwise, return the sha1 hash of the current *HEAD* commit,
43/// or `None` if no repo is found.
44#[tracing::instrument(skip_all)]
45pub fn check_repo_state(
46    p: &Package,
47    src_files: &[PathEntry],
48    ws: &Workspace<'_>,
49    opts: &PackageOpts<'_>,
50) -> CargoResult<Option<VcsInfo>> {
51    let gctx = ws.gctx();
52    let Ok(mut repo) = gix::discover(p.root()) else {
53        gctx.shell().verbose(|shell| {
54            shell.warn(format_args!(
55                "no (git) VCS found for `{}`",
56                p.root().display()
57            ))
58        })?;
59        // No Git repo found. Have to assume it is clean.
60        return Ok(None);
61    };
62
63    let Some(workdir) = repo.workdir() else {
64        debug!(
65            "no (git) workdir found for repo at `{}`",
66            repo.path().display()
67        );
68        // No git workdir. Have to assume it is clean.
69        return Ok(None);
70    };
71
72    debug!("found a git repo at `{}`", workdir.display());
73    let path = p.manifest_path();
74
75    let manifest_exists = path.exists();
76    let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
77    let rela_path =
78        gix::path::to_unix_separators_on_windows(gix::path::os_str_into_bstr(path.as_os_str())?);
79    if !manifest_exists {
80        gctx.shell().verbose(|shell| {
81            shell.warn(format_args!(
82                "Cargo.toml not found at `{}` in workdir `{}`",
83                path.display(),
84                workdir.display()
85            ))
86        })?;
87        // No `Cargo.toml` found. This package may be irrelevant.
88        // Have to assume it is clean.
89        return Ok(None);
90    };
91
92    let manifest_is_ignored = {
93        let index = repo.index_or_empty()?;
94        let mut excludes =
95            repo.excludes(&index, None, Source::WorktreeThenIdMappingIfNotSkipped)?;
96        excludes
97            .at_entry(rela_path.as_bstr(), Some(Mode::FILE))?
98            .is_excluded()
99    };
100    if manifest_is_ignored {
101        gctx.shell().verbose(|shell| {
102            shell.warn(format_args!(
103                "found (git) Cargo.toml ignored at `{}` in workdir `{}`",
104                path.display(),
105                workdir.display()
106            ))
107        })?;
108        // An ignored `Cargo.toml` found. This package may be irrelevant.
109        // Have to assume it is clean.
110        return Ok(None);
111    }
112
113    warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
114
115    debug!(
116        "found (git) Cargo.toml at `{}` in workdir `{}`",
117        path.display(),
118        workdir.display(),
119    );
120    let Some(git) = git(ws, p, src_files, &mut repo, &opts)? else {
121        // If the git repo lacks essential field like `sha1`, and since this field exists from the beginning,
122        // then don't generate the corresponding file in order to maintain consistency with past behavior.
123        return Ok(None);
124    };
125
126    let path_in_vcs = path
127        .parent()
128        .and_then(|p| p.to_str())
129        .unwrap_or("")
130        .replace("\\", "/");
131
132    Ok(Some(VcsInfo { git, path_in_vcs }))
133}
134
135/// Warns if any symlinks were checked out as plain text files.
136///
137/// Git config [`core.symlinks`] defaults to true when unset.
138/// In git-for-windows (and git as well),
139/// the config should be set to false explicitly when the repo was created,
140/// if symlink support wasn't detected [^1].
141///
142/// We assume the config was always set at creation time and never changed.
143/// So, if it is true, we don't bother users with any warning.
144///
145/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
146///
147/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
148fn warn_symlink_checked_out_as_plain_text_file(
149    gctx: &GlobalContext,
150    src_files: &[PathEntry],
151    repo: &gix::Repository,
152) -> CargoResult<()> {
153    if repo
154        .config_snapshot()
155        .boolean(&gix::config::tree::Core::SYMLINKS)
156        .unwrap_or(true)
157    {
158        return Ok(());
159    }
160
161    if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
162        let msg = format!(
163            "found symbolic links that may be checked out as regular files for git repo at `{}/`",
164            repo.workdir().unwrap().display()
165        );
166        let mut notes = vec![
167            Level::NOTE.message(
168                "this might cause the `.crate` file to include incorrect or incomplete files",
169            ),
170            Level::HELP.message("to avoid this, set the Git config `core.symlinks` to `true`"),
171        ];
172        if cfg!(windows) {
173            notes.push(
174                Level::HELP.message("on Windows, enable the Developer Mode to support symlinks"),
175            );
176        };
177        gctx.shell().print_report(
178            &[Level::WARNING
179                .secondary_title(msg)
180                .elements(notes.into_iter())],
181            false,
182        )?;
183    }
184
185    Ok(())
186}
187
188/// The real git status check starts from here.
189fn git(
190    ws: &Workspace<'_>,
191    pkg: &Package,
192    src_files: &[PathEntry],
193    repo: &mut gix::Repository,
194    opts: &PackageOpts<'_>,
195) -> CargoResult<Option<GitVcsInfo>> {
196    {
197        let mut config = repo.config_snapshot_mut();
198        // This currently is only a very minor speedup for the biggest repositories,
199        // but might trigger creating many threads.
200        config.set_value(&gix::config::tree::Index::THREADS, "false")?;
201    }
202    // This is a collection of any dirty or untracked files. This covers:
203    // - new/modified/deleted/renamed/type change (index or worktree)
204    // - untracked files (which are "new" worktree files)
205    // - ignored (in case the user has an `include` directive that
206    //   conflicts with .gitignore).
207    let mut dirty_files = Vec::new();
208    let workdir = repo.workdir().unwrap();
209    collect_statuses(
210        repo,
211        workdir,
212        relative_package_root(repo, pkg.root()).as_deref(),
213        &mut dirty_files,
214    )?;
215
216    // Include each submodule so that the error message can provide
217    // specifically *which* files in a submodule are modified.
218    status_submodules(repo, &mut dirty_files)?;
219
220    // Find the intersection of dirty in git, and the src_files that would
221    // be packaged. This is a lazy n^2 check, but seems fine with
222    // thousands of files.
223    let cwd = ws.gctx().cwd();
224    let mut dirty_src_files: Vec<_> = src_files
225        .iter()
226        .filter(|src_file| {
227            if let Some(canon_src_file) = src_file.is_symlink_or_under_symlink().then(|| {
228                gix::path::realpath_opts(
229                    &src_file,
230                    ws.gctx().cwd(),
231                    gix::path::realpath::MAX_SYMLINKS,
232                )
233                .unwrap_or_else(|_| src_file.to_path_buf())
234            }) {
235                dirty_files
236                    .iter()
237                    .any(|path| canon_src_file.starts_with(path))
238            } else {
239                dirty_files.iter().any(|path| src_file.starts_with(path))
240            }
241        })
242        .map(|p| p.as_ref())
243        .chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
244        .map(|path| {
245            pathdiff::diff_paths(path, cwd)
246                .as_ref()
247                .unwrap_or(path)
248                .display()
249                .to_string()
250        })
251        .collect();
252    let dirty = !dirty_src_files.is_empty();
253    if !dirty || opts.allow_dirty {
254        let maybe_head_id = repo.head()?.try_peel_to_id_in_place()?;
255        Ok(maybe_head_id.map(|id| GitVcsInfo {
256            sha1: id.to_string(),
257            dirty,
258        }))
259    } else {
260        dirty_src_files.sort_unstable();
261        anyhow::bail!(
262            "{} files in the working directory contain changes that were \
263             not yet committed into git:\n\n{}\n\n\
264             to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
265            dirty_src_files.len(),
266            dirty_src_files.join("\n")
267        )
268    }
269}
270
271/// Helper to collect dirty statuses for a single repo.
272/// `relative_package_root` is `Some` if the root is a sub-directory of the workdir.
273/// Writes dirty files outside `relative_package_root` into `dirty_files_outside_package_root`,
274/// and all *everything else* into `dirty_files`.
275#[must_use]
276fn collect_statuses(
277    repo: &gix::Repository,
278    workdir: &Path,
279    relative_package_root: Option<&Path>,
280    dirty_files: &mut Vec<PathBuf>,
281) -> CargoResult<()> {
282    let statuses = repo
283        .status(gix::progress::Discard)?
284        .dirwalk_options(configure_dirwalk)
285        .tree_index_track_renames(TrackRenames::Disabled)
286        .index_worktree_submodules(None)
287        .into_iter(
288            relative_package_root.map(|rela_pkg_root| {
289                gix::path::into_bstr(rela_pkg_root).into_owned()
290            }), /* pathspec patterns */
291        )
292        .with_context(|| {
293            format!(
294                "failed to begin git status for repo {}",
295                repo.path().display()
296            )
297        })?;
298
299    for status in statuses {
300        let status = status.with_context(|| {
301            format!(
302                "failed to retrieve git status from repo {}",
303                repo.path().display()
304            )
305        })?;
306
307        let rel_path = gix::path::from_bstr(status.location());
308        let path = workdir.join(&rel_path);
309        // It is OK to include Cargo.lock even if it is ignored.
310        if path.ends_with("Cargo.lock")
311            && matches!(
312                &status,
313                gix::status::Item::IndexWorktree(
314                    gix::status::index_worktree::Item::DirectoryContents { entry, .. }
315                ) if matches!(entry.status, gix::dir::entry::Status::Ignored(_))
316            )
317        {
318            continue;
319        }
320
321        dirty_files.push(path);
322    }
323    Ok(())
324}
325
326/// Helper to collect dirty statuses while recursing into submodules.
327fn status_submodules(repo: &gix::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
328    let Some(submodules) = repo.submodules()? else {
329        return Ok(());
330    };
331    for submodule in submodules {
332        // Ignore submodules that don't open, they are probably not initialized.
333        // If its files are required, then the verification step should fail.
334        if let Some(sub_repo) = submodule.open()? {
335            let Some(workdir) = sub_repo.workdir() else {
336                continue;
337            };
338            status_submodules(&sub_repo, dirty_files)?;
339            collect_statuses(&sub_repo, workdir, None, dirty_files)?;
340        }
341    }
342    Ok(())
343}
344
345/// Make `pkg_root` relative to the `repo` workdir.
346fn relative_package_root(repo: &gix::Repository, pkg_root: &Path) -> Option<PathBuf> {
347    let workdir = repo.workdir().unwrap();
348    let rela_root = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
349    if rela_root.as_os_str().is_empty() {
350        None
351    } else {
352        rela_root.to_owned().into()
353    }
354}
355
356/// Checks whether "included" source files outside package root have been modified.
357///
358/// This currently looks at
359///
360/// * `package.readme` and `package.license-file` pointing to paths outside package root
361/// * symlinks targets residing outside package root
362/// * Any change in the root workspace manifest, regardless of what has changed.
363///
364/// This is required because those paths may link to a file outside the
365/// current package root, but still under the git workdir, affecting the
366/// final packaged `.crate` file.
367fn dirty_files_outside_pkg_root(
368    ws: &Workspace<'_>,
369    pkg: &Package,
370    repo: &gix::Repository,
371    src_files: &[PathEntry],
372) -> CargoResult<Vec<PathBuf>> {
373    let pkg_root = pkg.root();
374    let workdir = repo.workdir().unwrap();
375
376    let meta = pkg.manifest().metadata();
377    let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
378        .into_iter()
379        .filter_map(|p| p.as_deref())
380        .map(|path| paths::normalize_path(&pkg_root.join(path)))
381        .collect();
382
383    let linked_files_outside_package_root: Vec<_> = src_files
384        .iter()
385        .filter(|p| p.is_symlink_or_under_symlink())
386        .map(|p| p.as_ref().as_path())
387        .chain(metadata_paths.iter().map(AsRef::as_ref))
388        .chain([ws.root_manifest()])
389        // If inside package root. Don't bother checking git status.
390        .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
391        // Handle files outside package root but under git workdir,
392        .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
393        .collect();
394
395    if linked_files_outside_package_root.is_empty() {
396        return Ok(Vec::new());
397    }
398
399    let statuses = repo
400        .status(gix::progress::Discard)?
401        .dirwalk_options(configure_dirwalk)
402        // Limit the amount of threads for used for the worktree status, as the pathspec will
403        // prevent most paths from being visited anyway there is not much work.
404        .index_worktree_options_mut(|opts| opts.thread_limit = Some(1))
405        .tree_index_track_renames(TrackRenames::Disabled)
406        .index_worktree_submodules(None)
407        .into_iter(
408            linked_files_outside_package_root
409                .into_iter()
410                .map(|p| gix::path::into_bstr(p).into_owned()),
411        )
412        .with_context(|| {
413            format!(
414                "failed to begin git status for outfor repo {}",
415                repo.path().display()
416            )
417        })?;
418
419    let mut dirty_files = Vec::new();
420    for status in statuses {
421        let status = status.with_context(|| {
422            format!(
423                "failed to retrieve git status from repo {}",
424                repo.path().display()
425            )
426        })?;
427
428        let rel_path = gix::path::from_bstr(status.location());
429        let path = workdir.join(&rel_path);
430        dirty_files.push(path);
431    }
432    Ok(dirty_files)
433}
434
435fn configure_dirwalk(opts: Options) -> Options {
436    opts.emit_untracked(gix::dir::walk::EmissionMode::Matching)
437        // Also pick up ignored files or whole directories
438        // to specifically catch overzealously ignored source files.
439        // Later we will match these dirs by prefix, which is why collapsing
440        // them is desirable here.
441        .emit_ignored(Some(EmissionMode::CollapseDirectory))
442        .emit_tracked(false)
443        .recurse_repositories(false)
444        .symlinks_to_directories_are_ignored_like_directories(true)
445        .emit_empty_directories(false)
446}