Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 082ab9c

Browse files
committed
Implement a new unified function for figuring out how if a set of paths have been modified locally
Also adds several git tests to make sure that the behavior works in common cases (PR CI, auto CI, local usage).
1 parent 49e5e4e commit 082ab9c

File tree

4 files changed

+465
-7
lines changed

4 files changed

+465
-7
lines changed

src/bootstrap/Cargo.lock

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,12 @@ dependencies = [
225225

226226
[[package]]
227227
name = "errno"
228-
version = "0.3.9"
228+
version = "0.3.10"
229229
source = "registry+https://github.com/rust-lang/crates.io-index"
230-
checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
230+
checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
231231
dependencies = [
232232
"libc",
233-
"windows-sys 0.52.0",
233+
"windows-sys 0.59.0",
234234
]
235235

236236
[[package]]
@@ -334,9 +334,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
334334

335335
[[package]]
336336
name = "libc"
337-
version = "0.2.167"
337+
version = "0.2.171"
338338
source = "registry+https://github.com/rust-lang/crates.io-index"
339-
checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
339+
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
340340

341341
[[package]]
342342
name = "libredox"

src/build_helper/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@ edition = "2021"
88
[dependencies]
99
serde = "1"
1010
serde_derive = "1"
11+
12+
[dev-dependencies]
13+
tempfile = "3.19"

src/build_helper/src/git.rs

Lines changed: 179 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
#[cfg(test)]
2+
mod tests;
3+
14
use std::path::Path;
25
use std::process::{Command, Stdio};
36

@@ -167,7 +170,181 @@ pub fn get_closest_merge_commit(
167170
Ok(output_result(&mut git)?.trim().to_owned())
168171
}
169172

170-
/// Returns the files that have been modified in the current branch compared to the last merge.
173+
/// Represents the result of checking whether a set of paths
174+
/// have been modified locally or not.
175+
#[derive(PartialEq, Debug)]
176+
pub enum PathFreshness {
177+
/// Artifacts should be downloaded from this upstream commit,
178+
/// there are no local modifications.
179+
LastModifiedUpstream { upstream: String },
180+
/// There are local modifications to a certain set of paths.
181+
/// "Local" essentially means "not-upstream" here.
182+
/// `upstream` is the latest upstream merge commit that made modifications to the
183+
/// set of paths.
184+
HasLocalModifications { upstream: String },
185+
}
186+
187+
/// This function figures out if a set of paths was last modified upstream or
188+
/// if there are some local modifications made to them.
189+
///
190+
/// It can be used to figure out if we should download artifacts from CI or rather
191+
/// build them locally.
192+
///
193+
/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir`
194+
/// or the current working directory whose modifications would invalidate the artifact.
195+
/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside
196+
/// the `foo` directory.
197+
/// See https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec
198+
/// for how git `pathspec` works.
199+
///
200+
/// The function behaves differently in CI and outside CI.
201+
///
202+
/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
203+
/// top of the local master branch.
204+
/// If not, we try to find the most recent upstream commit (which we assume are commits
205+
/// made by bors) that modified `target_paths`.
206+
/// We don't want to simply take the latest master commit to avoid changing the output of
207+
/// this function frequently after rebasing on the latest master branch even if `target_paths`
208+
/// were not modified upstream in the meantime. In that case we would be redownloading CI
209+
/// artifacts unnecessarily.
210+
///
211+
/// - In CI, we always fetch only a single parent merge commit, so we do not have access
212+
/// to the full git history. Luckily, we only need to distinguish between two situations:
213+
/// 1) The current PR made modifications to `target_paths`.
214+
/// In that case, a build is typically necessary.
215+
/// 2) The current PR did not make modifications to `target_paths`.
216+
/// In that case we simply take the latest upstream commit, because on CI there is no need to avoid
217+
/// redownloading.
218+
pub fn check_path_modifications(
219+
git_dir: Option<&Path>,
220+
config: &GitConfig<'_>,
221+
target_paths: &[&str],
222+
ci_env: CiEnv,
223+
) -> Result<PathFreshness, String> {
224+
assert!(!target_paths.is_empty());
225+
for path in target_paths {
226+
assert!(Path::new(path.trim_start_matches(":!")).is_relative());
227+
}
228+
229+
let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
230+
// Here the situation is different for PR CI and try/auto CI.
231+
// For PR CI, we have the following history:
232+
// <merge commit made by GitHub>
233+
// 1-N PR commits
234+
// upstream merge commit made by bors
235+
//
236+
// For try/auto CI, we have the following history:
237+
// <**non-upstream** merge commit made by bors>
238+
// 1-N PR commits
239+
// upstream merge commit made by bors
240+
//
241+
// But on both cases, HEAD should be a merge commit.
242+
// So if HEAD contains modifications of `target_paths`, our PR has modified
243+
// them. If not, we can use the only available upstream commit for downloading
244+
// artifacts.
245+
246+
// Do not include HEAD, as it is never an upstream commit
247+
get_closest_upstream_commit(git_dir, config, ci_env)?
248+
} else {
249+
// Outside CI, we have to find the most recent upstream commit that
250+
// modified the set of paths, to have an upstream reference.
251+
let upstream_sha = get_latest_commit_that_modified_files(
252+
git_dir,
253+
target_paths,
254+
config.git_merge_commit_email,
255+
)?;
256+
let Some(upstream_sha) = upstream_sha else {
257+
eprintln!("No upstream commit that modified paths {target_paths:?} found.");
258+
eprintln!("Try to fetch more upstream history.");
259+
return Err("No upstream commit with modifications found".to_string());
260+
};
261+
upstream_sha
262+
};
263+
264+
if has_changed_since(git_dir, &upstream_sha, target_paths) {
265+
Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha })
266+
} else {
267+
Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha })
268+
}
269+
}
270+
271+
/// Returns true if any of the passed `paths` have changed since the `base` commit.
272+
pub fn has_changed_since(git_dir: Option<&Path>, base: &str, paths: &[&str]) -> bool {
273+
let mut git = Command::new("git");
274+
275+
if let Some(git_dir) = git_dir {
276+
git.current_dir(git_dir);
277+
}
278+
279+
git.args(["diff-index", "--quiet", base, "--"]).args(paths);
280+
281+
// Exit code 0 => no changes
282+
// Exit code 1 => some changes were detected
283+
!git.status().expect("cannot run git diff-index").success()
284+
}
285+
286+
/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
287+
/// If `author` is `Some`, only considers commits made by that author.
288+
fn get_latest_commit_that_modified_files(
289+
git_dir: Option<&Path>,
290+
target_paths: &[&str],
291+
author: &str,
292+
) -> Result<Option<String>, String> {
293+
let mut git = Command::new("git");
294+
295+
if let Some(git_dir) = git_dir {
296+
git.current_dir(git_dir);
297+
}
298+
299+
git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]);
300+
301+
if !target_paths.is_empty() {
302+
git.arg("--").args(target_paths);
303+
}
304+
let output = output_result(&mut git)?.trim().to_owned();
305+
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
306+
}
307+
308+
/// Returns the most recent commit found in the local history that should definitely
309+
/// exist upstream. We identify upstream commits by the e-mail of the commit author.
310+
///
311+
/// If `include_head` is false, the HEAD (current) commit will be ignored and only
312+
/// its parents will be searched. This is useful for try/auto CI, where HEAD is
313+
/// actually a commit made by bors, although it is not upstream yet.
314+
fn get_closest_upstream_commit(
315+
git_dir: Option<&Path>,
316+
config: &GitConfig<'_>,
317+
env: CiEnv,
318+
) -> Result<String, String> {
319+
let mut git = Command::new("git");
320+
321+
if let Some(git_dir) = git_dir {
322+
git.current_dir(git_dir);
323+
}
324+
325+
let base = match env {
326+
CiEnv::None => "HEAD",
327+
CiEnv::GitHubActions => {
328+
// On CI, we always have a merge commit at the tip.
329+
// We thus skip it, because although it can be creatd by
330+
// `config.git_merge_commit_email`, it should not be upstream.
331+
"HEAD^1"
332+
}
333+
};
334+
git.args([
335+
"rev-list",
336+
&format!("--author={}", config.git_merge_commit_email),
337+
"-n1",
338+
"--first-parent",
339+
&base,
340+
]);
341+
342+
Ok(output_result(&mut git)?.trim().to_owned())
343+
}
344+
345+
/// Returns the files that have been modified in the current branch compared to the master branch.
346+
/// This includes committed changes, uncommitted changes, and changes that are not even staged.
347+
///
171348
/// The `extensions` parameter can be used to filter the files by their extension.
172349
/// Does not include removed files.
173350
/// If `extensions` is empty, all files will be returned.
@@ -176,7 +353,7 @@ pub fn get_git_modified_files(
176353
git_dir: Option<&Path>,
177354
extensions: &[&str],
178355
) -> Result<Vec<String>, String> {
179-
let merge_base = get_closest_merge_commit(git_dir, config, &[])?;
356+
let merge_base = get_closest_upstream_commit(git_dir, config, CiEnv::None)?;
180357

181358
let mut git = Command::new("git");
182359
if let Some(git_dir) = git_dir {

0 commit comments

Comments
 (0)