cargo/ops/cargo_package/
vcs.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
//! Helpers to gather the VCS information for `cargo package`.

use std::collections::HashSet;
use std::path::PathBuf;

use anyhow::Context as _;
use cargo_util::paths;
use serde::Serialize;
use tracing::debug;

use crate::core::Package;
use crate::sources::PathEntry;
use crate::CargoResult;
use crate::GlobalContext;

use super::PackageOpts;

/// Represents the VCS information when packaging.
#[derive(Serialize)]
pub struct VcsInfo {
    git: GitVcsInfo,
    /// Path to the package within repo (empty string if root).
    path_in_vcs: String,
}

/// Represents the Git VCS information when packaging.
#[derive(Serialize)]
pub struct GitVcsInfo {
    sha1: String,
    /// Indicate whether or not the Git worktree is dirty.
    #[serde(skip_serializing_if = "std::ops::Not::not")]
    dirty: bool,
}

/// Checks if the package source is in a *git* DVCS repository.
///
/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
/// and `--allow-dirty` has not been passed,
/// then `bail!` with an informative message.
/// Otherwise return the sha1 hash of the current *HEAD* commit,
/// or `None` if no repo is found.
#[tracing::instrument(skip_all)]
pub fn check_repo_state(
    p: &Package,
    src_files: &[PathEntry],
    gctx: &GlobalContext,
    opts: &PackageOpts<'_>,
) -> CargoResult<Option<VcsInfo>> {
    let Ok(repo) = git2::Repository::discover(p.root()) else {
        gctx.shell().verbose(|shell| {
            shell.warn(format!("no (git) VCS found for `{}`", p.root().display()))
        })?;
        // No Git repo found. Have to assume it is clean.
        return Ok(None);
    };

    let Some(workdir) = repo.workdir() else {
        debug!(
            "no (git) workdir found for repo at `{}`",
            repo.path().display()
        );
        // No git workdir. Have to assume it is clean.
        return Ok(None);
    };

    debug!("found a git repo at `{}`", workdir.display());
    let path = p.manifest_path();
    let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
    let Ok(status) = repo.status_file(&path) else {
        gctx.shell().verbose(|shell| {
            shell.warn(format!(
                "no (git) Cargo.toml found at `{}` in workdir `{}`",
                path.display(),
                workdir.display()
            ))
        })?;
        // No checked-in `Cargo.toml` found. This package may be irrelevant.
        // Have to assume it is clean.
        return Ok(None);
    };

    if !(status & git2::Status::IGNORED).is_empty() {
        gctx.shell().verbose(|shell| {
            shell.warn(format!(
                "found (git) Cargo.toml ignored at `{}` in workdir `{}`",
                path.display(),
                workdir.display()
            ))
        })?;
        // An ignored `Cargo.toml` found. This package may be irrelevant.
        // Have to assume it is clean.
        return Ok(None);
    }

    warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;

    debug!(
        "found (git) Cargo.toml at `{}` in workdir `{}`",
        path.display(),
        workdir.display(),
    );
    let path_in_vcs = path
        .parent()
        .and_then(|p| p.to_str())
        .unwrap_or("")
        .replace("\\", "/");
    let Some(git) = git(p, gctx, src_files, &repo, &opts)? else {
        // If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
        // then don't generate the corresponding file in order to maintain consistency with past behavior.
        return Ok(None);
    };

    return Ok(Some(VcsInfo { git, path_in_vcs }));
}

/// Warns if any symlinks were checked out as plain text files.
///
/// Git config [`core.symlinks`] defaults to true when unset.
/// In git-for-windows (and git as well),
/// the config should be set to false explicitly when the repo was created,
/// if symlink support wasn't detected [^1].
///
/// We assume the config was always set at creation time and never changed.
/// So, if it is true, we don't bother users with any warning.
///
/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
///
/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
fn warn_symlink_checked_out_as_plain_text_file(
    gctx: &GlobalContext,
    src_files: &[PathEntry],
    repo: &git2::Repository,
) -> CargoResult<()> {
    if repo
        .config()
        .and_then(|c| c.get_bool("core.symlinks"))
        .unwrap_or(true)
    {
        return Ok(());
    }

    if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
        let mut shell = gctx.shell();
        shell.warn(format_args!(
            "found symbolic links that may be checked out as regular files for git repo at `{}`\n\
            This might cause the `.crate` file to include incorrect or incomplete files",
            repo.workdir().unwrap().display(),
        ))?;
        let extra_note = if cfg!(windows) {
            "\nAnd on Windows, enable the Developer Mode to support symlinks"
        } else {
            ""
        };
        shell.note(format_args!(
            "to avoid this, set the Git config `core.symlinks` to `true`{extra_note}",
        ))?;
    }

    Ok(())
}

/// The real git status check starts from here.
fn git(
    pkg: &Package,
    gctx: &GlobalContext,
    src_files: &[PathEntry],
    repo: &git2::Repository,
    opts: &PackageOpts<'_>,
) -> CargoResult<Option<GitVcsInfo>> {
    // This is a collection of any dirty or untracked files. This covers:
    // - new/modified/deleted/renamed/type change (index or worktree)
    // - untracked files (which are "new" worktree files)
    // - ignored (in case the user has an `include` directive that
    //   conflicts with .gitignore).
    let mut dirty_files = Vec::new();
    collect_statuses(repo, &mut dirty_files)?;
    // Include each submodule so that the error message can provide
    // specifically *which* files in a submodule are modified.
    status_submodules(repo, &mut dirty_files)?;

    // Find the intersection of dirty in git, and the src_files that would
    // be packaged. This is a lazy n^2 check, but seems fine with
    // thousands of files.
    let cwd = gctx.cwd();
    let mut dirty_src_files: Vec<_> = src_files
        .iter()
        .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
        .map(|p| p.as_ref())
        .chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
        .map(|path| {
            pathdiff::diff_paths(path, cwd)
                .as_ref()
                .unwrap_or(path)
                .display()
                .to_string()
        })
        .collect();
    let dirty = !dirty_src_files.is_empty();
    if !dirty || opts.allow_dirty {
        // Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
        // Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
        if repo.is_empty()? {
            return Ok(None);
        }
        let rev_obj = repo.revparse_single("HEAD")?;
        Ok(Some(GitVcsInfo {
            sha1: rev_obj.id().to_string(),
            dirty,
        }))
    } else {
        dirty_src_files.sort_unstable();
        anyhow::bail!(
            "{} files in the working directory contain changes that were \
             not yet committed into git:\n\n{}\n\n\
             to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
            dirty_src_files.len(),
            dirty_src_files.join("\n")
        )
    }
}

/// Checks whether "included" source files outside package root have been modified.
///
/// This currently looks at
///
/// * `package.readme` and `package.license-file` pointing to paths outside package root
/// * symlinks targets reside outside package root
///
/// This is required because those paths may link to a file outside the
/// current package root, but still under the git workdir, affecting the
/// final packaged `.crate` file.
fn dirty_files_outside_pkg_root(
    pkg: &Package,
    repo: &git2::Repository,
    src_files: &[PathEntry],
) -> CargoResult<HashSet<PathBuf>> {
    let pkg_root = pkg.root();
    let workdir = repo.workdir().unwrap();

    let meta = pkg.manifest().metadata();
    let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
        .into_iter()
        .filter_map(|p| p.as_deref())
        .map(|path| paths::normalize_path(&pkg_root.join(path)))
        .collect();

    let mut dirty_symlinks = HashSet::new();
    for rel_path in src_files
        .iter()
        .filter(|p| p.is_symlink_or_under_symlink())
        .map(|p| p.as_ref())
        .chain(metadata_paths.iter())
        // If inside package root. Don't bother checking git status.
        .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
        // Handle files outside package root but under git workdir,
        .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
    {
        if repo.status_file(&rel_path)? != git2::Status::CURRENT {
            dirty_symlinks.insert(workdir.join(rel_path));
        }
    }
    Ok(dirty_symlinks)
}

/// Helper to collect dirty statuses for a single repo.
fn collect_statuses(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
    let mut status_opts = git2::StatusOptions::new();
    // Exclude submodules, as they are being handled manually by recursing
    // into each one so that details about specific files can be
    // retrieved.
    status_opts
        .exclude_submodules(true)
        .include_ignored(true)
        .include_untracked(true);
    let repo_statuses = repo.statuses(Some(&mut status_opts)).with_context(|| {
        format!(
            "failed to retrieve git status from repo {}",
            repo.path().display()
        )
    })?;
    let workdir = repo.workdir().unwrap();
    let this_dirty = repo_statuses.iter().filter_map(|entry| {
        let path = entry.path().expect("valid utf-8 path");
        if path.ends_with("Cargo.lock") && entry.status() == git2::Status::IGNORED {
            // It is OK to include Cargo.lock even if it is ignored.
            return None;
        }
        // Use an absolute path, so that comparing paths is easier
        // (particularly with submodules).
        Some(workdir.join(path))
    });
    dirty_files.extend(this_dirty);
    Ok(())
}

/// Helper to collect dirty statuses while recursing into submodules.
fn status_submodules(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
    for submodule in repo.submodules()? {
        // Ignore submodules that don't open, they are probably not initialized.
        // If its files are required, then the verification step should fail.
        if let Ok(sub_repo) = submodule.open() {
            status_submodules(&sub_repo, dirty_files)?;
            collect_statuses(&sub_repo, dirty_files)?;
        }
    }
    Ok(())
}