build_helper/
git.rs

1use std::path::Path;
2use std::process::{Command, Stdio};
3
4use crate::ci::CiEnv;
5
6#[derive(Debug)]
7pub struct GitConfig<'a> {
8    pub nightly_branch: &'a str,
9    pub git_merge_commit_email: &'a str,
10}
11
12/// Runs a command and returns the output
13pub fn output_result(cmd: &mut Command) -> Result<String, String> {
14    let output = match cmd.stderr(Stdio::inherit()).output() {
15        Ok(status) => status,
16        Err(e) => return Err(format!("failed to run command: {cmd:?}: {e}")),
17    };
18    if !output.status.success() {
19        return Err(format!(
20            "command did not execute successfully: {:?}\n\
21             expected success, got: {}\n{}",
22            cmd,
23            output.status,
24            String::from_utf8(output.stderr).map_err(|err| format!("{err:?}"))?
25        ));
26    }
27    String::from_utf8(output.stdout).map_err(|err| format!("{err:?}"))
28}
29
30/// Represents the result of checking whether a set of paths
31/// have been modified locally or not.
32#[derive(PartialEq, Debug, Clone)]
33pub enum PathFreshness {
34    /// Artifacts should be downloaded from this upstream commit,
35    /// there are no local modifications.
36    LastModifiedUpstream { upstream: String },
37    /// There are local modifications to a certain set of paths.
38    /// "Local" essentially means "not-upstream" here.
39    /// `upstream` is the latest upstream merge commit that made modifications to the
40    /// set of paths.
41    HasLocalModifications { upstream: String },
42    /// No upstream commit was found.
43    /// This should not happen in most reasonable circumstances, but one never knows.
44    MissingUpstream,
45}
46
47/// This function figures out if a set of paths was last modified upstream or
48/// if there are some local modifications made to them.
49/// It can be used to figure out if we should download artifacts from CI or rather
50/// build them locally.
51///
52/// The function assumes that at least a single upstream bors merge commit is in the
53/// local git history.
54///
55/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir`
56/// whose modifications would invalidate the artifact.
57/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside
58/// the `foo` directory.
59/// See <https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec>
60/// for how git `pathspec` works.
61///
62/// The function behaves differently in CI and outside CI.
63///
64/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
65///   top of the latest upstream commit that is available in local git history.
66///   If not, we try to find the most recent upstream commit (which we assume are commits
67///   made by bors) that modified `target_paths`.
68///   We don't want to simply take the latest master commit to avoid changing the output of
69///   this function frequently after rebasing on the latest master branch even if `target_paths`
70///   were not modified upstream in the meantime. In that case we would be redownloading CI
71///   artifacts unnecessarily.
72///
73/// - In CI, we use a shallow clone of depth 2, i.e., we fetch only a single parent commit
74///   (which will be the most recent bors merge commit) and do not have access
75///   to the full git history. Luckily, we only need to distinguish between two situations:
76///   1) The current PR made modifications to `target_paths`.
77///      In that case, a build is typically necessary.
78///   2) The current PR did not make modifications to `target_paths`.
79///      In that case we simply take the latest upstream commit, because on CI there is no need to avoid
80///      redownloading.
81pub fn check_path_modifications(
82    git_dir: &Path,
83    config: &GitConfig<'_>,
84    target_paths: &[&str],
85    ci_env: CiEnv,
86) -> Result<PathFreshness, String> {
87    assert!(!target_paths.is_empty());
88    for path in target_paths {
89        assert!(Path::new(path.trim_start_matches(":!")).is_relative());
90    }
91
92    let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
93        // Here the situation is different for PR CI and try/auto CI.
94        // For PR CI, we have the following history:
95        // <merge commit made by GitHub>
96        // 1-N PR commits
97        // upstream merge commit made by bors
98        //
99        // For try/auto CI, we have the following history:
100        // <**non-upstream** merge commit made by bors>
101        // 1-N PR commits
102        // upstream merge commit made by bors
103        //
104        // But on both cases, HEAD should be a merge commit.
105        // So if HEAD contains modifications of `target_paths`, our PR has modified
106        // them. If not, we can use the only available upstream commit for downloading
107        // artifacts.
108
109        // Do not include HEAD, as it is never an upstream commit
110        // If we do not find an upstream commit in CI, something is seriously wrong.
111        Some(
112            get_closest_upstream_commit(Some(git_dir), config, ci_env)?
113                .expect("No upstream commit was found on CI"),
114        )
115    } else {
116        // Outside CI, we want to find the most recent upstream commit that
117        // modified the set of paths, to have an upstream reference that does not change
118        // unnecessarily often.
119        // However, if such commit is not found, we can fall back to the latest upstream commit
120        let upstream_with_modifications =
121            get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?;
122        match upstream_with_modifications {
123            Some(sha) => Some(sha),
124            None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?,
125        }
126    };
127
128    let Some(upstream_sha) = upstream_sha else {
129        return Ok(PathFreshness::MissingUpstream);
130    };
131
132    // For local environments, we want to find out if something has changed
133    // from the latest upstream commit.
134    // However, that should be equivalent to checking if something has changed
135    // from the latest upstream commit *that modified `target_paths`*, and
136    // with this approach we do not need to invoke git an additional time.
137    if has_changed_since(git_dir, &upstream_sha, target_paths) {
138        Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha })
139    } else {
140        Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha })
141    }
142}
143
144/// Returns true if any of the passed `paths` have changed since the `base` commit.
145pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool {
146    run_git_diff_index(Some(git_dir), |cmd| {
147        cmd.args(["--quiet", base, "--"]).args(paths);
148
149        // Exit code 0 => no changes
150        // Exit code 1 => some changes were detected
151        !cmd.status().expect("cannot run git diff-index").success()
152    })
153}
154
155const LEGACY_BORS_EMAIL: &str = "bors@rust-lang.org";
156
157/// Escape characters from the git user e-mail, so that git commands do not interpret it as regex
158/// special characters.
159fn escape_email_git_regex(text: &str) -> String {
160    text.replace("[", "\\[").replace("]", "\\]").replace(".", "\\.")
161}
162
163/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
164/// was found.
165fn get_latest_upstream_commit_that_modified_files(
166    git_dir: &Path,
167    git_config: &GitConfig<'_>,
168    target_paths: &[&str],
169) -> Result<Option<String>, String> {
170    let mut git = Command::new("git");
171    git.current_dir(git_dir);
172
173    // In theory, we could just use
174    // `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
175    // to find the latest upstream commit that modified `<paths>`.
176    // However, this does not work if you are in a subtree sync branch that contains merge commits
177    // which have the subtree history as their first parent, and the rustc history as second parent:
178    // `--first-parent` will just walk up the subtree history and never see a single rustc commit.
179    // We thus have to take a two-pronged approach. First lookup the most recent upstream commit
180    // by *date* (this should work even in a subtree sync branch), and then start the lookup for
181    // modified paths starting from that commit.
182    //
183    // See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
184    let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)?
185        .unwrap_or_else(|| "HEAD".to_string());
186
187    git.args([
188        "rev-list",
189        "--first-parent",
190        "-n1",
191        &upstream,
192        "--author",
193        &escape_email_git_regex(git_config.git_merge_commit_email),
194    ]);
195
196    // Also search for legacy bors account, before we accrue enough commits to
197    // have changes to all relevant file paths done by new bors.
198    if git_config.git_merge_commit_email != LEGACY_BORS_EMAIL {
199        git.args(["--author", LEGACY_BORS_EMAIL]);
200    }
201
202    if !target_paths.is_empty() {
203        git.arg("--").args(target_paths);
204    }
205    let output = output_result(&mut git)?.trim().to_owned();
206    if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
207}
208
209/// Returns the most recent (ordered chronologically) commit found in the local history that
210/// should exist upstream. We identify upstream commits by the e-mail of the commit
211/// author.
212///
213/// If we are in CI, we simply return our first parent.
214pub fn get_closest_upstream_commit(
215    git_dir: Option<&Path>,
216    config: &GitConfig<'_>,
217    env: CiEnv,
218) -> Result<Option<String>, String> {
219    let base = match env {
220        CiEnv::None => "HEAD",
221        CiEnv::GitHubActions => {
222            // On CI, we should always have a non-upstream merge commit at the tip,
223            // and our first parent should be the most recently merged upstream commit.
224            // We thus simply return our first parent.
225            return resolve_commit_sha(git_dir, "HEAD^1").map(Some);
226        }
227    };
228
229    let mut git = Command::new("git");
230
231    if let Some(git_dir) = git_dir {
232        git.current_dir(git_dir);
233    }
234
235    // We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
236    // a subtree merge that actually has the main rustc history as its second parent.
237    // Using `--first-parent` would recurse into the history of the subtree, which could have some
238    // old bors commits that are not relevant to us.
239    // With `--author-date-order`, git recurses into all parent subtrees, and returns the most
240    // chronologically recent bors commit.
241    // Here we assume that none of our subtrees use bors anymore, and that all their old bors
242    // commits are way older than recent rustc bors commits!
243    git.args([
244        "rev-list",
245        "--author-date-order",
246        &format!("--author={}", &escape_email_git_regex(config.git_merge_commit_email),),
247        "-n1",
248        base,
249    ]);
250
251    // Also search for legacy bors account, before we accrue enough commits to
252    // have changes to all relevant file paths done by new bors.
253    if config.git_merge_commit_email != LEGACY_BORS_EMAIL {
254        git.args(["--author", LEGACY_BORS_EMAIL]);
255    }
256
257    let output = output_result(&mut git)?.trim().to_owned();
258    if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
259}
260
261/// Resolve the commit SHA of `commit_ref`.
262fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> {
263    let mut git = Command::new("git");
264
265    if let Some(git_dir) = git_dir {
266        git.current_dir(git_dir);
267    }
268
269    git.args(["rev-parse", commit_ref]);
270
271    Ok(output_result(&mut git)?.trim().to_owned())
272}
273
274/// Returns the files that have been modified in the current branch compared to the master branch.
275/// This includes committed changes, uncommitted changes, and changes that are not even staged.
276///
277/// The `extensions` parameter can be used to filter the files by their extension.
278/// Does not include removed files.
279/// If `extensions` is empty, all files will be returned.
280pub fn get_git_modified_files(
281    config: &GitConfig<'_>,
282    git_dir: Option<&Path>,
283    extensions: &[&str],
284) -> Result<Vec<String>, String> {
285    let Some(merge_base) = get_closest_upstream_commit(git_dir, config, CiEnv::None)? else {
286        return Err("No upstream commit was found".to_string());
287    };
288
289    let files = run_git_diff_index(git_dir, |cmd| {
290        output_result(cmd.args(["--name-status", merge_base.trim()]))
291    })?
292    .lines()
293    .filter_map(|f| {
294        let (status, name) = f.trim().split_once(char::is_whitespace).unwrap();
295        if status == "D" {
296            None
297        } else if Path::new(name).extension().map_or(extensions.is_empty(), |ext| {
298            // If there is no extension, we allow the path if `extensions` is empty
299            // If there is an extension, we allow it if `extension` is empty or it contains the
300            // extension.
301            extensions.is_empty() || extensions.contains(&ext.to_str().unwrap())
302        }) {
303            Some(name.to_owned())
304        } else {
305            None
306        }
307    })
308    .collect();
309    Ok(files)
310}
311
312/// diff-index can return outdated information, because it does not update the git index.
313/// This function uses `update-index` to update the index first, and then provides `func` with a
314/// command prepared to run `git diff-index`.
315fn run_git_diff_index<F, T>(git_dir: Option<&Path>, func: F) -> T
316where
317    F: FnOnce(&mut Command) -> T,
318{
319    let git = || {
320        let mut git = Command::new("git");
321        if let Some(git_dir) = git_dir {
322            git.current_dir(git_dir);
323        }
324        git
325    };
326
327    // We ignore the exit code, as it errors out when some files are modified.
328    let _ = output_result(git().args(["update-index", "--refresh", "-q"]));
329    func(git().arg("diff-index"))
330}
331
332/// Returns the files that haven't been added to git yet.
333pub fn get_git_untracked_files(git_dir: Option<&Path>) -> Result<Option<Vec<String>>, String> {
334    let mut git = Command::new("git");
335    if let Some(git_dir) = git_dir {
336        git.current_dir(git_dir);
337    }
338
339    let files = output_result(git.arg("ls-files").arg("--others").arg("--exclude-standard"))?
340        .lines()
341        .map(|s| s.trim().to_owned())
342        .collect();
343    Ok(Some(files))
344}