Skip to content

Commit 4005851

Browse files
committed
Use --author-date-order when looking up upstream commits to support subtree synces
1 parent e9f3e3a commit 4005851

File tree

3 files changed

+143
-27
lines changed

3 files changed

+143
-27
lines changed

src/bootstrap/src/core/config/tests.rs

+71
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,7 @@ fn test_local_changes_in_previous_upstream() {
858858
ctx.create_branch("feature");
859859
ctx.modify("d");
860860
ctx.commit();
861+
861862
assert_eq!(
862863
ctx.check_modifications(&["a"], CiEnv::None),
863864
PathFreshness::LastModifiedUpstream { upstream: sha }
@@ -914,3 +915,73 @@ fn test_local_changes_negative_path() {
914915
);
915916
});
916917
}
918+
919+
#[test]
920+
fn test_local_changes_subtree_that_used_bors() {
921+
// Here we simulate a very specific situation related to subtrees.
922+
// When you have merge commits locally, we should ignore them w.r.t. the artifact download
923+
// logic.
924+
// The upstream search code currently uses a simple heuristic:
925+
// - Find commits by bors (or in general an author with the merge commit e-mail)
926+
// - Find the newest such commit
927+
// This should make it work even for subtrees that:
928+
// - Used bors in the past (so they have bors merge commits in their history).
929+
// - Use Josh to merge rustc into the subtree, in a way that the rustc history is the second
930+
// parent, not the first one.
931+
//
932+
// In addition, when searching for modified files, we cannot simply start from HEAD, because
933+
// in this situation git wouldn't find the right commit.
934+
//
935+
// This test checks that this specific scenario will resolve to the right rustc commit, both
936+
// when finding a modified file and when finding a non-existent file (which essentially means
937+
// that we just lookup the most recent upstream commit).
938+
//
939+
// See https://github.com/rust-lang/rust/issues/101907#issuecomment-2697671282 for more details.
940+
git_test(|ctx| {
941+
ctx.create_upstream_merge(&["a"]);
942+
943+
// Start unrelated subtree history
944+
ctx.run_git(&["switch", "--orphan", "subtree"]);
945+
ctx.modify("bar");
946+
ctx.commit();
947+
// Now we need to emulate old bors commits in the subtree.
948+
// Git only has a resolution of one second, which is a problem, since our git logic orders
949+
// merge commits by their date.
950+
// To avoid sleeping in the test, we modify the commit date to be forcefully in the past.
951+
ctx.create_upstream_merge(&["subtree/a"]);
952+
ctx.run_git(&["commit", "--amend", "--date", "Wed Feb 16 14:00 2011 +0100", "--no-edit"]);
953+
954+
// Merge the subtree history into rustc
955+
ctx.switch_to_branch("main");
956+
ctx.run_git(&["merge", "subtree", "--allow-unrelated"]);
957+
958+
// Create a rustc commit that modifies a path that we're interested in (`x`)
959+
let upstream_1 = ctx.create_upstream_merge(&["x"]);
960+
// Create another bors commit
961+
let upstream_2 = ctx.create_upstream_merge(&["a"]);
962+
963+
ctx.switch_to_branch("subtree");
964+
965+
// Create a subtree branch
966+
ctx.create_branch("subtree-pr");
967+
ctx.modify("baz");
968+
ctx.commit();
969+
// We merge rustc into this branch (simulating a "subtree pull")
970+
ctx.merge("main", "committer <[email protected]>");
971+
972+
// And then merge that branch into the subtree (simulating a situation right before a
973+
// "subtree push")
974+
ctx.switch_to_branch("subtree");
975+
ctx.merge("subtree-pr", "committer <[email protected]>");
976+
977+
// And we want to check that we resolve to the right commits.
978+
assert_eq!(
979+
ctx.check_modifications(&["x"], CiEnv::None),
980+
PathFreshness::LastModifiedUpstream { upstream: upstream_1 }
981+
);
982+
assert_eq!(
983+
ctx.check_modifications(&["nonexistent"], CiEnv::None),
984+
PathFreshness::LastModifiedUpstream { upstream: upstream_2 }
985+
);
986+
});
987+
}

src/bootstrap/src/utils/tests/git.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,14 @@ impl GitCtx {
5353
modified_files: &[&str],
5454
author: &str,
5555
) -> String {
56+
let current_branch = self.get_current_branch();
57+
5658
self.create_branch(branch);
5759
for file in modified_files {
5860
self.modify(file);
5961
}
6062
self.commit();
61-
self.switch_to_branch("main");
63+
self.switch_to_branch(&current_branch);
6264
self.merge(branch, author);
6365
self.run_git(&["branch", "-d", branch]);
6466
self.get_current_commit()
@@ -68,12 +70,16 @@ impl GitCtx {
6870
self.run_git(&["rev-parse", "HEAD"])
6971
}
7072

73+
pub fn get_current_branch(&self) -> String {
74+
self.run_git(&["rev-parse", "--abbrev-ref", "HEAD"])
75+
}
76+
7177
pub fn merge(&self, branch: &str, author: &str) {
7278
self.run_git(&["merge", "--no-commit", "--no-ff", branch]);
7379
self.run_git(&[
7480
"commit".to_string(),
7581
"-m".to_string(),
76-
"Merge of {branch}".to_string(),
82+
format!("Merge of {branch} into {}", self.get_current_branch()),
7783
"--author".to_string(),
7884
author.to_string(),
7985
]);

src/build_helper/src/git.rs

+64-25
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,8 @@ pub fn check_path_modifications(
118118
// modified the set of paths, to have an upstream reference that does not change
119119
// unnecessarily often.
120120
// However, if such commit is not found, we can fall back to the latest upstream commit
121-
let upstream_with_modifications = get_latest_commit_that_modified_files(
122-
git_dir,
123-
target_paths,
124-
config.git_merge_commit_email,
125-
)?;
121+
let upstream_with_modifications =
122+
get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?;
126123
match upstream_with_modifications {
127124
Some(sha) => Some(sha),
128125
None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?,
@@ -157,17 +154,38 @@ pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool {
157154
!git.status().expect("cannot run git diff-index").success()
158155
}
159156

160-
/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
161-
/// If `author` is `Some`, only considers commits made by that author.
162-
fn get_latest_commit_that_modified_files(
157+
/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
158+
/// was found.
159+
fn get_latest_upstream_commit_that_modified_files(
163160
git_dir: &Path,
161+
git_config: &GitConfig<'_>,
164162
target_paths: &[&str],
165-
author: &str,
166163
) -> Result<Option<String>, String> {
167164
let mut git = Command::new("git");
168165
git.current_dir(git_dir);
169166

170-
git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]);
167+
// In theory, we could just use
168+
// `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
169+
// to find the latest upstream commit that modified `<paths>`.
170+
// However, this does not work if you are in a subtree sync branch that contains merge commits
171+
// which have the subtree history as their first parent, and the rustc history as second parent:
172+
// `--first-parent` will just walk up the subtree history and never see a single rustc commit.
173+
// We thus have to take a two-pronged approach. First lookup the most recent upstream commit
174+
// by *date* (this should work even in a subtree sync branch), and then start the lookup for
175+
// modified paths starting from that commit.
176+
//
177+
// See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
178+
let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)?
179+
.unwrap_or_else(|| "HEAD".to_string());
180+
181+
git.args([
182+
"rev-list",
183+
"--first-parent",
184+
"-n1",
185+
&upstream,
186+
"--author",
187+
git_config.git_merge_commit_email,
188+
]);
171189

172190
if !target_paths.is_empty() {
173191
git.arg("--").args(target_paths);
@@ -176,44 +194,65 @@ fn get_latest_commit_that_modified_files(
176194
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
177195
}
178196

179-
/// Returns the most recent commit found in the local history that should definitely
180-
/// exist upstream. We identify upstream commits by the e-mail of the commit author.
197+
/// Returns the most recent (ordered chronologically) commit found in the local history that
198+
/// should exist upstream. We identify upstream commits by the e-mail of the commit
199+
/// author.
181200
///
182-
/// If `include_head` is false, the HEAD (current) commit will be ignored and only
183-
/// its parents will be searched. This is useful for try/auto CI, where HEAD is
184-
/// actually a commit made by bors, although it is not upstream yet.
201+
/// If we are in CI, we simply return our first parent.
185202
fn get_closest_upstream_commit(
186203
git_dir: Option<&Path>,
187204
config: &GitConfig<'_>,
188205
env: CiEnv,
189206
) -> Result<Option<String>, String> {
207+
let base = match env {
208+
CiEnv::None => "HEAD",
209+
CiEnv::GitHubActions => {
210+
// On CI, we should always have a non-upstream merge commit at the tip,
211+
// and our first parent should be the most recently merged upstream commit.
212+
// We thus simply return our first parent.
213+
return resolve_commit_sha(git_dir, "HEAD^1").map(Some);
214+
}
215+
};
216+
190217
let mut git = Command::new("git");
191218

192219
if let Some(git_dir) = git_dir {
193220
git.current_dir(git_dir);
194221
}
195222

196-
let base = match env {
197-
CiEnv::None => "HEAD",
198-
CiEnv::GitHubActions => {
199-
// On CI, we always have a merge commit at the tip.
200-
// We thus skip it, because although it can be created by
201-
// `config.git_merge_commit_email`, it should not be upstream.
202-
"HEAD^1"
203-
}
204-
};
223+
// We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
224+
// a subtree merge that actually has the main rustc history as its second parent.
225+
// Using `--first-parent` would recurse into the history of the subtree, which could have some
226+
// old bors commits that are not relevant to us.
227+
// With `--author-date-order`, git recurses into all parent subtrees, and returns the most
228+
// chronologically recent bors commit.
229+
// Here we assume that none of our subtrees use bors anymore, and that all their old bors
230+
// commits are way older than recent rustc bors commits!
205231
git.args([
206232
"rev-list",
233+
"--author-date-order",
207234
&format!("--author={}", config.git_merge_commit_email),
208235
"-n1",
209-
"--first-parent",
210236
&base,
211237
]);
212238

213239
let output = output_result(&mut git)?.trim().to_owned();
214240
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
215241
}
216242

243+
/// Resolve the commit SHA of `commit_ref`.
244+
fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> {
245+
let mut git = Command::new("git");
246+
247+
if let Some(git_dir) = git_dir {
248+
git.current_dir(git_dir);
249+
}
250+
251+
git.args(["rev-parse", commit_ref]);
252+
253+
Ok(output_result(&mut git)?.trim().to_owned())
254+
}
255+
217256
/// Returns the files that have been modified in the current branch compared to the master branch.
218257
/// This includes committed changes, uncommitted changes, and changes that are not even staged.
219258
///

0 commit comments

Comments
 (0)