build_helper/git.rs
1use std::path::{Path, PathBuf};
2use std::process::{Command, Stdio};
3
4use crate::ci::CiEnv;
5
6#[derive(Debug)]
7pub struct GitConfig<'a> {
8 pub nightly_branch: &'a str,
9 pub git_merge_commit_email: &'a str,
10}
11
12/// Runs a command and returns the output
13pub fn output_result(cmd: &mut Command) -> Result<String, String> {
14 let output = match cmd.stderr(Stdio::inherit()).output() {
15 Ok(status) => status,
16 Err(e) => return Err(format!("failed to run command: {cmd:?}: {e}")),
17 };
18 if !output.status.success() {
19 return Err(format!(
20 "command did not execute successfully: {:?}\n\
21 expected success, got: {}\n{}",
22 cmd,
23 output.status,
24 String::from_utf8(output.stderr).map_err(|err| format!("{err:?}"))?
25 ));
26 }
27 String::from_utf8(output.stdout).map_err(|err| format!("{err:?}"))
28}
29
30/// Represents the result of checking whether a set of paths
31/// have been modified locally or not.
32#[derive(PartialEq, Debug, Clone)]
33pub enum PathFreshness {
34 /// Artifacts should be downloaded from this upstream commit,
35 /// there are no local modifications.
36 LastModifiedUpstream { upstream: String },
37 /// There are local modifications to a certain set of paths.
38 /// "Local" essentially means "not-upstream" here.
39 /// `upstream` is the latest upstream merge commit that made modifications to the
40 /// set of paths.
41 HasLocalModifications { upstream: String, modifications: Vec<PathBuf> },
42 /// No upstream commit was found.
43 /// This should not happen in most reasonable circumstances, but one never knows.
44 MissingUpstream,
45}
46
47/// This function figures out if a set of paths was last modified upstream or
48/// if there are some local modifications made to them.
49/// It can be used to figure out if we should download artifacts from CI or rather
50/// build them locally.
51///
52/// The function assumes that at least a single upstream bors merge commit is in the
53/// local git history.
54///
55/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir`
56/// whose modifications would invalidate the artifact.
57/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside
58/// the `foo` directory.
59/// See <https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec>
60/// for how git `pathspec` works.
61///
62/// The function behaves differently in CI and outside CI.
63///
64/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
65/// top of the latest upstream commit that is available in local git history.
66/// If not, we try to find the most recent upstream commit (which we assume are commits
67/// made by bors) that modified `target_paths`.
68/// We don't want to simply take the latest master commit to avoid changing the output of
69/// this function frequently after rebasing on the latest master branch even if `target_paths`
70/// were not modified upstream in the meantime. In that case we would be redownloading CI
71/// artifacts unnecessarily.
72///
73/// - In CI, we use a shallow clone of depth 2, i.e., we fetch only a single parent commit
74/// (which will be the most recent bors merge commit) and do not have access
75/// to the full git history. Luckily, we only need to distinguish between two situations:
76/// 1) The current PR made modifications to `target_paths`.
77/// In that case, a build is typically necessary.
78/// 2) The current PR did not make modifications to `target_paths`.
79/// In that case we simply take the latest upstream commit, because on CI there is no need to avoid
80/// redownloading.
81pub fn check_path_modifications(
82 git_dir: &Path,
83 config: &GitConfig<'_>,
84 target_paths: &[&str],
85 ci_env: CiEnv,
86) -> Result<PathFreshness, String> {
87 assert!(!target_paths.is_empty());
88 for path in target_paths {
89 assert!(Path::new(path.trim_start_matches(":!")).is_relative());
90 }
91
92 let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
93 // Here the situation is different for PR CI and try/auto CI.
94 // For PR CI, we have the following history:
95 // <merge commit made by GitHub>
96 // 1-N PR commits
97 // upstream merge commit made by bors
98 //
99 // For try/auto CI, we have the following history:
100 // <**non-upstream** merge commit made by bors>
101 // 1-N PR commits
102 // upstream merge commit made by bors
103 //
104 // But on both cases, HEAD should be a merge commit.
105 // So if HEAD contains modifications of `target_paths`, our PR has modified
106 // them. If not, we can use the only available upstream commit for downloading
107 // artifacts.
108
109 // Do not include HEAD, as it is never an upstream commit
110 // If we do not find an upstream commit in CI, something is seriously wrong.
111 Some(
112 get_closest_upstream_commit(Some(git_dir), config, ci_env)?
113 .expect("No upstream commit was found on CI"),
114 )
115 } else {
116 // Outside CI, we want to find the most recent upstream commit that
117 // modified the set of paths, to have an upstream reference that does not change
118 // unnecessarily often.
119 // However, if such commit is not found, we can fall back to the latest upstream commit
120 let upstream_with_modifications =
121 get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?;
122 match upstream_with_modifications {
123 Some(sha) => Some(sha),
124 None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?,
125 }
126 };
127
128 let Some(upstream_sha) = upstream_sha else {
129 return Ok(PathFreshness::MissingUpstream);
130 };
131
132 // For local environments, we want to find out if something has changed
133 // from the latest upstream commit.
134 // However, that should be equivalent to checking if something has changed
135 // from the latest upstream commit *that modified `target_paths`*, and
136 // with this approach we do not need to invoke git an additional time.
137 let modifications = changes_since(git_dir, &upstream_sha, target_paths)?;
138 if !modifications.is_empty() {
139 Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha, modifications })
140 } else {
141 Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha })
142 }
143}
144
145/// Returns true if any of the passed `paths` have changed since the `base` commit.
146pub fn changes_since(git_dir: &Path, base: &str, paths: &[&str]) -> Result<Vec<PathBuf>, String> {
147 use std::io::BufRead;
148
149 run_git_diff_index(Some(git_dir), |cmd| {
150 cmd.args([base, "--name-only", "--"]).args(paths);
151
152 let output = cmd.stderr(Stdio::inherit()).output().expect("cannot run git diff-index");
153 if !output.status.success() {
154 return Err(format!("failed to run: {cmd:?}: {:?}", output.status));
155 }
156
157 output
158 .stdout
159 .lines()
160 .map(|res| match res {
161 Ok(line) => Ok(PathBuf::from(line)),
162 Err(e) => Err(format!("invalid UTF-8 in diff-index: {e:?}")),
163 })
164 .collect()
165 })
166}
167
168// Temporary e-mail used by new bors for merge commits for a few days, until it learned how to reuse
169// the original homu e-mail
170// FIXME: remove in Q2 2026
171const TEMPORARY_BORS_EMAIL: &str = "122020455+rust-bors[bot]@users.noreply.github.com";
172
173/// Escape characters from the git user e-mail, so that git commands do not interpret it as regex
174/// special characters.
175fn escape_email_git_regex(text: &str) -> String {
176 text.replace("[", "\\[").replace("]", "\\]").replace(".", "\\.")
177}
178
179/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
180/// was found.
181fn get_latest_upstream_commit_that_modified_files(
182 git_dir: &Path,
183 git_config: &GitConfig<'_>,
184 target_paths: &[&str],
185) -> Result<Option<String>, String> {
186 let mut git = Command::new("git");
187 git.current_dir(git_dir);
188
189 // In theory, we could just use
190 // `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
191 // to find the latest upstream commit that modified `<paths>`.
192 // However, this does not work if you are in a subtree sync branch that contains merge commits
193 // which have the subtree history as their first parent, and the rustc history as second parent:
194 // `--first-parent` will just walk up the subtree history and never see a single rustc commit.
195 // We thus have to take a two-pronged approach. First lookup the most recent upstream commit
196 // by *date* (this should work even in a subtree sync branch), and then start the lookup for
197 // modified paths starting from that commit.
198 //
199 // See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
200 let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)?
201 .unwrap_or_else(|| "HEAD".to_string());
202
203 git.args([
204 "rev-list",
205 "--first-parent",
206 "-n1",
207 &upstream,
208 "--author",
209 &escape_email_git_regex(git_config.git_merge_commit_email),
210 ]);
211
212 // Also search for temporary bors account
213 if git_config.git_merge_commit_email != TEMPORARY_BORS_EMAIL {
214 git.args(["--author", &escape_email_git_regex(TEMPORARY_BORS_EMAIL)]);
215 }
216
217 if !target_paths.is_empty() {
218 git.arg("--").args(target_paths);
219 }
220 let output = output_result(&mut git)?.trim().to_owned();
221 if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
222}
223
224/// Returns the most recent (ordered chronologically) commit found in the local history that
225/// should exist upstream. We identify upstream commits by the e-mail of the commit
226/// author.
227///
228/// If we are in CI, we simply return our first parent.
229pub fn get_closest_upstream_commit(
230 git_dir: Option<&Path>,
231 config: &GitConfig<'_>,
232 env: CiEnv,
233) -> Result<Option<String>, String> {
234 let base = match env {
235 CiEnv::None => "HEAD",
236 CiEnv::GitHubActions => {
237 // On CI, we should always have a non-upstream merge commit at the tip,
238 // and our first parent should be the most recently merged upstream commit.
239 // We thus simply return our first parent.
240 return resolve_commit_sha(git_dir, "HEAD^1").map(Some);
241 }
242 };
243
244 let mut git = Command::new("git");
245
246 if let Some(git_dir) = git_dir {
247 git.current_dir(git_dir);
248 }
249
250 // We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
251 // a subtree merge that actually has the main rustc history as its second parent.
252 // Using `--first-parent` would recurse into the history of the subtree, which could have some
253 // old bors commits that are not relevant to us.
254 // With `--author-date-order`, git recurses into all parent subtrees, and returns the most
255 // chronologically recent bors commit.
256 // Here we assume that none of our subtrees use bors anymore, and that all their old bors
257 // commits are way older than recent rustc bors commits!
258 git.args([
259 "rev-list",
260 "--author-date-order",
261 &format!("--author={}", &escape_email_git_regex(config.git_merge_commit_email),),
262 "-n1",
263 base,
264 ]);
265
266 // Also search for temporary bors account
267 if config.git_merge_commit_email != TEMPORARY_BORS_EMAIL {
268 git.args(["--author", &escape_email_git_regex(TEMPORARY_BORS_EMAIL)]);
269 }
270
271 let output = output_result(&mut git)?.trim().to_owned();
272 if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
273}
274
275/// Resolve the commit SHA of `commit_ref`.
276fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> {
277 let mut git = Command::new("git");
278
279 if let Some(git_dir) = git_dir {
280 git.current_dir(git_dir);
281 }
282
283 git.args(["rev-parse", commit_ref]);
284
285 Ok(output_result(&mut git)?.trim().to_owned())
286}
287
288/// Returns the files that have been modified in the current branch compared to the master branch.
289/// This includes committed changes, uncommitted changes, and changes that are not even staged.
290///
291/// The `extensions` parameter can be used to filter the files by their extension.
292/// Does not include removed files.
293/// If `extensions` is empty, all files will be returned.
294pub fn get_git_modified_files(
295 config: &GitConfig<'_>,
296 git_dir: Option<&Path>,
297 extensions: &[&str],
298) -> Result<Vec<String>, String> {
299 let Some(merge_base) = get_closest_upstream_commit(git_dir, config, CiEnv::None)? else {
300 return Err("No upstream commit was found".to_string());
301 };
302
303 let files = run_git_diff_index(git_dir, |cmd| {
304 output_result(cmd.args(["--name-status", merge_base.trim()]))
305 })?
306 .lines()
307 .filter_map(|f| {
308 let (status, name) = f.trim().split_once(char::is_whitespace).unwrap();
309 if status == "D" {
310 None
311 } else if Path::new(name).extension().map_or(extensions.is_empty(), |ext| {
312 // If there is no extension, we allow the path if `extensions` is empty
313 // If there is an extension, we allow it if `extension` is empty or it contains the
314 // extension.
315 extensions.is_empty() || extensions.contains(&ext.to_str().unwrap())
316 }) {
317 Some(name.to_owned())
318 } else {
319 None
320 }
321 })
322 .collect();
323 Ok(files)
324}
325
326/// diff-index can return outdated information, because it does not update the git index.
327/// This function uses `update-index` to update the index first, and then provides `func` with a
328/// command prepared to run `git diff-index`.
329fn run_git_diff_index<F, T>(git_dir: Option<&Path>, func: F) -> T
330where
331 F: FnOnce(&mut Command) -> T,
332{
333 let git = || {
334 let mut git = Command::new("git");
335 if let Some(git_dir) = git_dir {
336 git.current_dir(git_dir);
337 }
338 git
339 };
340
341 // We ignore the exit code, as it errors out when some files are modified.
342 let _ = output_result(git().args(["update-index", "--refresh", "-q"]));
343 func(git().arg("diff-index"))
344}
345
346/// Returns the files that haven't been added to git yet.
347pub fn get_git_untracked_files(git_dir: Option<&Path>) -> Result<Option<Vec<String>>, String> {
348 let mut git = Command::new("git");
349 if let Some(git_dir) = git_dir {
350 git.current_dir(git_dir);
351 }
352
353 let files = output_result(git.arg("ls-files").arg("--others").arg("--exclude-standard"))?
354 .lines()
355 .map(|s| s.trim().to_owned())
356 .collect();
357 Ok(Some(files))
358}