cargo/ops/cargo_package/
vcs.rs

1//! Helpers to gather the VCS information for `cargo package`.
2
3use crate::core::{Package, Workspace};
4use crate::ops::PackageOpts;
5use crate::sources::PathEntry;
6use crate::{CargoResult, GlobalContext};
7use anyhow::Context;
8use cargo_util::paths;
9use gix::bstr::ByteSlice;
10use gix::dir::walk::EmissionMode;
11use gix::dirwalk::Options;
12use gix::index::entry::Mode;
13use gix::status::tree_index::TrackRenames;
14use gix::worktree::stack::state::ignore::Source;
15use serde::Serialize;
16use std::path::{Path, PathBuf};
17use tracing::debug;
18
19/// Represents the VCS information when packaging.
20#[derive(Serialize)]
21pub struct VcsInfo {
22    git: GitVcsInfo,
23    /// Path to the package within repo (empty string if root).
24    path_in_vcs: String,
25}
26
27/// Represents the Git VCS information when packaging.
28#[derive(Serialize)]
29pub struct GitVcsInfo {
30    sha1: String,
31    /// Indicate whether the Git worktree is dirty.
32    #[serde(skip_serializing_if = "std::ops::Not::not")]
33    dirty: bool,
34}
35
36/// Checks if the package source is in a *git* DVCS repository.
37///
38/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
39/// and `--allow-dirty` has not been passed,
40/// then `bail!` with an informative message.
41/// Otherwise, return the sha1 hash of the current *HEAD* commit,
42/// or `None` if no repo is found.
43#[tracing::instrument(skip_all)]
44pub fn check_repo_state(
45    p: &Package,
46    src_files: &[PathEntry],
47    ws: &Workspace<'_>,
48    opts: &PackageOpts<'_>,
49) -> CargoResult<Option<VcsInfo>> {
50    let gctx = ws.gctx();
51    let Ok(mut repo) = gix::discover(p.root()) else {
52        gctx.shell().verbose(|shell| {
53            shell.warn(format_args!(
54                "no (git) VCS found for `{}`",
55                p.root().display()
56            ))
57        })?;
58        // No Git repo found. Have to assume it is clean.
59        return Ok(None);
60    };
61
62    let Some(workdir) = repo.workdir() else {
63        debug!(
64            "no (git) workdir found for repo at `{}`",
65            repo.path().display()
66        );
67        // No git workdir. Have to assume it is clean.
68        return Ok(None);
69    };
70
71    debug!("found a git repo at `{}`", workdir.display());
72    let path = p.manifest_path();
73
74    let manifest_exists = path.exists();
75    let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
76    let rela_path =
77        gix::path::to_unix_separators_on_windows(gix::path::os_str_into_bstr(path.as_os_str())?);
78    if !manifest_exists {
79        gctx.shell().verbose(|shell| {
80            shell.warn(format_args!(
81                "Cargo.toml not found at `{}` in workdir `{}`",
82                path.display(),
83                workdir.display()
84            ))
85        })?;
86        // No `Cargo.toml` found. This package may be irrelevant.
87        // Have to assume it is clean.
88        return Ok(None);
89    };
90
91    let manifest_is_ignored = {
92        let index = repo.index_or_empty()?;
93        let mut excludes =
94            repo.excludes(&index, None, Source::WorktreeThenIdMappingIfNotSkipped)?;
95        excludes
96            .at_entry(rela_path.as_bstr(), Some(Mode::FILE))?
97            .is_excluded()
98    };
99    if manifest_is_ignored {
100        gctx.shell().verbose(|shell| {
101            shell.warn(format_args!(
102                "found (git) Cargo.toml ignored at `{}` in workdir `{}`",
103                path.display(),
104                workdir.display()
105            ))
106        })?;
107        // An ignored `Cargo.toml` found. This package may be irrelevant.
108        // Have to assume it is clean.
109        return Ok(None);
110    }
111
112    warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
113
114    debug!(
115        "found (git) Cargo.toml at `{}` in workdir `{}`",
116        path.display(),
117        workdir.display(),
118    );
119    let Some(git) = git(ws, p, src_files, &mut repo, &opts)? else {
120        // If the git repo lacks essential field like `sha1`, and since this field exists from the beginning,
121        // then don't generate the corresponding file in order to maintain consistency with past behavior.
122        return Ok(None);
123    };
124
125    let path_in_vcs = path
126        .parent()
127        .and_then(|p| p.to_str())
128        .unwrap_or("")
129        .replace("\\", "/");
130
131    Ok(Some(VcsInfo { git, path_in_vcs }))
132}
133
134/// Warns if any symlinks were checked out as plain text files.
135///
136/// Git config [`core.symlinks`] defaults to true when unset.
137/// In git-for-windows (and git as well),
138/// the config should be set to false explicitly when the repo was created,
139/// if symlink support wasn't detected [^1].
140///
141/// We assume the config was always set at creation time and never changed.
142/// So, if it is true, we don't bother users with any warning.
143///
144/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
145///
146/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
147fn warn_symlink_checked_out_as_plain_text_file(
148    gctx: &GlobalContext,
149    src_files: &[PathEntry],
150    repo: &gix::Repository,
151) -> CargoResult<()> {
152    if repo
153        .config_snapshot()
154        .boolean(&gix::config::tree::Core::SYMLINKS)
155        .unwrap_or(true)
156    {
157        return Ok(());
158    }
159
160    if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
161        let mut shell = gctx.shell();
162        shell.warn(format_args!(
163            "found symbolic links that may be checked out as regular files for git repo at `{}/`\n\
164        This might cause the `.crate` file to include incorrect or incomplete files",
165            repo.workdir().unwrap().display(),
166        ))?;
167        let extra_note = if cfg!(windows) {
168            "\nAnd on Windows, enable the Developer Mode to support symlinks"
169        } else {
170            ""
171        };
172        shell.note(format_args!(
173            "to avoid this, set the Git config `core.symlinks` to `true`{extra_note}",
174        ))?;
175    }
176
177    Ok(())
178}
179
180/// The real git status check starts from here.
181fn git(
182    ws: &Workspace<'_>,
183    pkg: &Package,
184    src_files: &[PathEntry],
185    repo: &mut gix::Repository,
186    opts: &PackageOpts<'_>,
187) -> CargoResult<Option<GitVcsInfo>> {
188    {
189        let mut config = repo.config_snapshot_mut();
190        // This currently is only a very minor speedup for the biggest repositories,
191        // but might trigger creating many threads.
192        config.set_value(&gix::config::tree::Index::THREADS, "false")?;
193    }
194    // This is a collection of any dirty or untracked files. This covers:
195    // - new/modified/deleted/renamed/type change (index or worktree)
196    // - untracked files (which are "new" worktree files)
197    // - ignored (in case the user has an `include` directive that
198    //   conflicts with .gitignore).
199    let mut dirty_files = Vec::new();
200    let workdir = repo.workdir().unwrap();
201    collect_statuses(
202        repo,
203        workdir,
204        relative_package_root(repo, pkg.root()).as_deref(),
205        &mut dirty_files,
206    )?;
207
208    // Include each submodule so that the error message can provide
209    // specifically *which* files in a submodule are modified.
210    status_submodules(repo, &mut dirty_files)?;
211
212    // Find the intersection of dirty in git, and the src_files that would
213    // be packaged. This is a lazy n^2 check, but seems fine with
214    // thousands of files.
215    let cwd = ws.gctx().cwd();
216    let mut dirty_src_files: Vec<_> = src_files
217        .iter()
218        .filter(|src_file| {
219            if let Some(canon_src_file) = src_file.is_symlink_or_under_symlink().then(|| {
220                gix::path::realpath_opts(
221                    &src_file,
222                    ws.gctx().cwd(),
223                    gix::path::realpath::MAX_SYMLINKS,
224                )
225                .unwrap_or_else(|_| src_file.to_path_buf())
226            }) {
227                dirty_files
228                    .iter()
229                    .any(|path| canon_src_file.starts_with(path))
230            } else {
231                dirty_files.iter().any(|path| src_file.starts_with(path))
232            }
233        })
234        .map(|p| p.as_ref())
235        .chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
236        .map(|path| {
237            pathdiff::diff_paths(path, cwd)
238                .as_ref()
239                .unwrap_or(path)
240                .display()
241                .to_string()
242        })
243        .collect();
244    let dirty = !dirty_src_files.is_empty();
245    if !dirty || opts.allow_dirty {
246        let maybe_head_id = repo.head()?.try_peel_to_id_in_place()?;
247        Ok(maybe_head_id.map(|id| GitVcsInfo {
248            sha1: id.to_string(),
249            dirty,
250        }))
251    } else {
252        dirty_src_files.sort_unstable();
253        anyhow::bail!(
254            "{} files in the working directory contain changes that were \
255             not yet committed into git:\n\n{}\n\n\
256             to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
257            dirty_src_files.len(),
258            dirty_src_files.join("\n")
259        )
260    }
261}
262
263/// Helper to collect dirty statuses for a single repo.
264/// `relative_package_root` is `Some` if the root is a sub-directory of the workdir.
265/// Writes dirty files outside `relative_package_root` into `dirty_files_outside_package_root`,
266/// and all *everything else* into `dirty_files`.
267#[must_use]
268fn collect_statuses(
269    repo: &gix::Repository,
270    workdir: &Path,
271    relative_package_root: Option<&Path>,
272    dirty_files: &mut Vec<PathBuf>,
273) -> CargoResult<()> {
274    let statuses = repo
275        .status(gix::progress::Discard)?
276        .dirwalk_options(configure_dirwalk)
277        .tree_index_track_renames(TrackRenames::Disabled)
278        .index_worktree_submodules(None)
279        .into_iter(
280            relative_package_root.map(|rela_pkg_root| {
281                gix::path::into_bstr(rela_pkg_root).into_owned()
282            }), /* pathspec patterns */
283        )
284        .with_context(|| {
285            format!(
286                "failed to begin git status for repo {}",
287                repo.path().display()
288            )
289        })?;
290
291    for status in statuses {
292        let status = status.with_context(|| {
293            format!(
294                "failed to retrieve git status from repo {}",
295                repo.path().display()
296            )
297        })?;
298
299        let rel_path = gix::path::from_bstr(status.location());
300        let path = workdir.join(&rel_path);
301        // It is OK to include Cargo.lock even if it is ignored.
302        if path.ends_with("Cargo.lock")
303            && matches!(
304                &status,
305                gix::status::Item::IndexWorktree(
306                    gix::status::index_worktree::Item::DirectoryContents { entry, .. }
307                ) if matches!(entry.status, gix::dir::entry::Status::Ignored(_))
308            )
309        {
310            continue;
311        }
312
313        dirty_files.push(path);
314    }
315    Ok(())
316}
317
318/// Helper to collect dirty statuses while recursing into submodules.
319fn status_submodules(repo: &gix::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
320    let Some(submodules) = repo.submodules()? else {
321        return Ok(());
322    };
323    for submodule in submodules {
324        // Ignore submodules that don't open, they are probably not initialized.
325        // If its files are required, then the verification step should fail.
326        if let Some(sub_repo) = submodule.open()? {
327            let Some(workdir) = sub_repo.workdir() else {
328                continue;
329            };
330            status_submodules(&sub_repo, dirty_files)?;
331            collect_statuses(&sub_repo, workdir, None, dirty_files)?;
332        }
333    }
334    Ok(())
335}
336
337/// Make `pkg_root` relative to the `repo` workdir.
338fn relative_package_root(repo: &gix::Repository, pkg_root: &Path) -> Option<PathBuf> {
339    let workdir = repo.workdir().unwrap();
340    let rela_root = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
341    if rela_root.as_os_str().is_empty() {
342        None
343    } else {
344        rela_root.to_owned().into()
345    }
346}
347
348/// Checks whether "included" source files outside package root have been modified.
349///
350/// This currently looks at
351///
352/// * `package.readme` and `package.license-file` pointing to paths outside package root
353/// * symlinks targets residing outside package root
354/// * Any change in the root workspace manifest, regardless of what has changed.
355///
356/// This is required because those paths may link to a file outside the
357/// current package root, but still under the git workdir, affecting the
358/// final packaged `.crate` file.
359fn dirty_files_outside_pkg_root(
360    ws: &Workspace<'_>,
361    pkg: &Package,
362    repo: &gix::Repository,
363    src_files: &[PathEntry],
364) -> CargoResult<Vec<PathBuf>> {
365    let pkg_root = pkg.root();
366    let workdir = repo.workdir().unwrap();
367
368    let meta = pkg.manifest().metadata();
369    let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
370        .into_iter()
371        .filter_map(|p| p.as_deref())
372        .map(|path| paths::normalize_path(&pkg_root.join(path)))
373        .collect();
374
375    let linked_files_outside_package_root: Vec<_> = src_files
376        .iter()
377        .filter(|p| p.is_symlink_or_under_symlink())
378        .map(|p| p.as_ref().as_path())
379        .chain(metadata_paths.iter().map(AsRef::as_ref))
380        .chain([ws.root_manifest()])
381        // If inside package root. Don't bother checking git status.
382        .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
383        // Handle files outside package root but under git workdir,
384        .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
385        .collect();
386
387    if linked_files_outside_package_root.is_empty() {
388        return Ok(Vec::new());
389    }
390
391    let statuses = repo
392        .status(gix::progress::Discard)?
393        .dirwalk_options(configure_dirwalk)
394        // Limit the amount of threads for used for the worktree status, as the pathspec will
395        // prevent most paths from being visited anyway there is not much work.
396        .index_worktree_options_mut(|opts| opts.thread_limit = Some(1))
397        .tree_index_track_renames(TrackRenames::Disabled)
398        .index_worktree_submodules(None)
399        .into_iter(
400            linked_files_outside_package_root
401                .into_iter()
402                .map(|p| gix::path::into_bstr(p).into_owned()),
403        )
404        .with_context(|| {
405            format!(
406                "failed to begin git status for outfor repo {}",
407                repo.path().display()
408            )
409        })?;
410
411    let mut dirty_files = Vec::new();
412    for status in statuses {
413        let status = status.with_context(|| {
414            format!(
415                "failed to retrieve git status from repo {}",
416                repo.path().display()
417            )
418        })?;
419
420        let rel_path = gix::path::from_bstr(status.location());
421        let path = workdir.join(&rel_path);
422        dirty_files.push(path);
423    }
424    Ok(dirty_files)
425}
426
427fn configure_dirwalk(opts: Options) -> Options {
428    opts.emit_untracked(gix::dir::walk::EmissionMode::Matching)
429        // Also pick up ignored files or whole directories
430        // to specifically catch overzealously ignored source files.
431        // Later we will match these dirs by prefix, which is why collapsing
432        // them is desirable here.
433        .emit_ignored(Some(EmissionMode::CollapseDirectory))
434        .emit_tracked(false)
435        .recurse_repositories(false)
436        .symlinks_to_directories_are_ignored_like_directories(true)
437        .emit_empty_directories(false)
438}