Skip to content

Commit 0a53f43

Browse files
authored
[utils] support "Reverts ${PR}" in commit messages (#112226)
A bisection in ChromeOS ended at a reverted commit, which wasn't flagged by this revert checking script, since it used `Reverts ${PR}` rather than `This reverts commit ${SHA}`. `grep` says that somewhere around 400 reverts in the last year have used `Reverts ${PR}` syntax. Support it. Tested in part by running the command that was expected to catch this revert: ``` $ ./revert_checker.py -C ~/llvm/main/ \ 3b5e7c8 origin/main \ | grep -q 4b0276d $ echo $? 0 ```
1 parent 9e6d24f commit 0a53f43

File tree

2 files changed

+147
-15
lines changed

2 files changed

+147
-15
lines changed

llvm/utils/revert_checker.py

Lines changed: 124 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,35 +45,78 @@
4545
import re
4646
import subprocess
4747
import sys
48-
from typing import Generator, List, NamedTuple, Iterable
48+
from typing import Dict, Generator, Iterable, List, NamedTuple, Optional, Tuple
4949

5050
assert sys.version_info >= (3, 6), "Only Python 3.6+ is supported."
5151

5252
# People are creative with their reverts, and heuristics are a bit difficult.
53-
# Like 90% of of reverts have "This reverts commit ${full_sha}".
54-
# Some lack that entirely, while others have many of them specified in ad-hoc
55-
# ways, while others use short SHAs and whatever.
53+
# At a glance, most reverts have "This reverts commit ${full_sha}". Many others
54+
# have `Reverts llvm/llvm-project#${PR_NUMBER}`.
5655
#
57-
# The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
58-
# starts involving human intervention, which is probably not worth it for now.
56+
# By their powers combined, we should be able to automatically catch something
57+
# like 80% of reverts with reasonable confidence. At some point, human
58+
# intervention will always be required (e.g., I saw
59+
# ```
60+
# This reverts commit ${commit_sha_1} and
61+
# also ${commit_sha_2_shorthand}
62+
# ```
63+
# during my sample)
64+
65+
_CommitMessageReverts = NamedTuple(
66+
"_CommitMessageReverts",
67+
[
68+
("potential_shas", List[str]),
69+
("potential_pr_numbers", List[int]),
70+
],
71+
)
72+
5973

74+
def _try_parse_reverts_from_commit_message(
75+
commit_message: str,
76+
) -> _CommitMessageReverts:
77+
"""Tries to parse revert SHAs and LLVM PR numbers form the commit message.
6078
61-
def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]:
79+
Returns:
80+
A namedtuple containing:
81+
- A list of potentially reverted SHAs
82+
- A list of potentially reverted LLVM PR numbers
83+
"""
6284
if not commit_message:
63-
return []
85+
return _CommitMessageReverts([], [])
6486

65-
results = re.findall(r"This reverts commit ([a-f0-9]{40})\b", commit_message)
87+
sha_reverts = re.findall(
88+
r"This reverts commit ([a-f0-9]{40})\b",
89+
commit_message,
90+
)
6691

6792
first_line = commit_message.splitlines()[0]
6893
initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
6994
if initial_revert:
70-
results.append(initial_revert.group(1))
71-
return results
95+
sha_reverts.append(initial_revert.group(1))
7296

97+
pr_numbers = [
98+
int(x)
99+
for x in re.findall(
100+
r"Reverts llvm/llvm-project#(\d+)",
101+
commit_message,
102+
)
103+
]
104+
105+
return _CommitMessageReverts(
106+
potential_shas=sha_reverts,
107+
potential_pr_numbers=pr_numbers,
108+
)
73109

74-
def _stream_stdout(command: List[str]) -> Generator[str, None, None]:
110+
111+
def _stream_stdout(
112+
command: List[str], cwd: Optional[str] = None
113+
) -> Generator[str, None, None]:
75114
with subprocess.Popen(
76-
command, stdout=subprocess.PIPE, encoding="utf-8", errors="replace"
115+
command,
116+
cwd=cwd,
117+
stdout=subprocess.PIPE,
118+
encoding="utf-8",
119+
errors="replace",
77120
) as p:
78121
assert p.stdout is not None # for mypy's happiness.
79122
yield from p.stdout
@@ -175,10 +218,43 @@ def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
175218
).strip()
176219

177220

178-
def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]:
221+
def _load_pr_commit_mappings(
222+
git_dir: str, root: str, min_ref: str
223+
) -> Dict[int, List[str]]:
224+
git_log = ["git", "log", "--format=%H %s", f"{min_ref}..{root}"]
225+
results = collections.defaultdict(list)
226+
pr_regex = re.compile(r"\s\(#(\d+)\)$")
227+
for line in _stream_stdout(git_log, cwd=git_dir):
228+
m = pr_regex.search(line)
229+
if not m:
230+
continue
231+
232+
pr_number = int(m.group(1))
233+
sha = line.split(None, 1)[0]
234+
# N.B., these are kept in log (read: reverse chronological) order,
235+
# which is what's expected by `find_reverts`.
236+
results[pr_number].append(sha)
237+
return results
238+
239+
240+
# N.B., max_pr_lookback's default of 20K commits is arbitrary, but should be
241+
# enough for the 99% case of reverts: rarely should someone land a cleanish
242+
# revert of a >6 month old change...
243+
def find_reverts(
244+
git_dir: str, across_ref: str, root: str, max_pr_lookback: int = 20000
245+
) -> List[Revert]:
179246
"""Finds reverts across `across_ref` in `git_dir`, starting from `root`.
180247
181248
These reverts are returned in order of oldest reverts first.
249+
250+
Args:
251+
git_dir: git directory to find reverts in.
252+
across_ref: the ref to find reverts across.
253+
root: the 'main' ref to look for reverts on.
254+
max_pr_lookback: this function uses heuristics to map PR numbers to
255+
SHAs. These heuristics require that commit history from `root` to
256+
`some_parent_of_root` is loaded in memory. `max_pr_lookback` is how
257+
many commits behind `across_ref` should be loaded in memory.
182258
"""
183259
across_sha = _rev_parse(git_dir, across_ref)
184260
root_sha = _rev_parse(git_dir, root)
@@ -201,8 +277,41 @@ def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]:
201277
)
202278

203279
all_reverts = []
280+
# Lazily load PR <-> commit mappings, since it can be expensive.
281+
pr_commit_mappings = None
204282
for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
205-
reverts = _try_parse_reverts_from_commit_message(commit_message)
283+
reverts, pr_reverts = _try_parse_reverts_from_commit_message(
284+
commit_message,
285+
)
286+
if pr_reverts:
287+
if pr_commit_mappings is None:
288+
logging.info(
289+
"Loading PR <-> commit mappings. This may take a moment..."
290+
)
291+
pr_commit_mappings = _load_pr_commit_mappings(
292+
git_dir, root_sha, f"{across_sha}~{max_pr_lookback}"
293+
)
294+
logging.info(
295+
"Loaded %d PR <-> commit mappings", len(pr_commit_mappings)
296+
)
297+
298+
for reverted_pr_number in pr_reverts:
299+
reverted_shas = pr_commit_mappings.get(reverted_pr_number)
300+
if not reverted_shas:
301+
logging.warning(
302+
"No SHAs for reverted PR %d (commit %s)",
303+
reverted_pr_number,
304+
sha,
305+
)
306+
continue
307+
logging.debug(
308+
"Inferred SHAs %s for reverted PR %d (commit %s)",
309+
reverted_shas,
310+
reverted_pr_number,
311+
sha,
312+
)
313+
reverts.extend(reverted_shas)
314+
206315
if not reverts:
207316
continue
208317

llvm/utils/revert_checker_test.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def test_reverted_noncommit_object_is_a_nop(self) -> None:
9696
git_dir=get_llvm_project_path(),
9797
across_ref="c9944df916e41b1014dff5f6f75d52297b48ecdc~",
9898
root="c9944df916e41b1014dff5f6f75d52297b48ecdc",
99+
max_pr_lookback=50,
99100
)
100101
self.assertEqual(reverts, [])
101102

@@ -113,6 +114,7 @@ def test_known_reverts_across_arbitrary_llvm_rev(self) -> None:
113114
git_dir=get_llvm_project_path(),
114115
across_ref="c47f971694be0159ffddfee8a75ae515eba91439",
115116
root="9f981e9adf9c8d29bb80306daf08d2770263ade6",
117+
max_pr_lookback=50,
116118
)
117119
self.assertEqual(
118120
reverts,
@@ -128,6 +130,27 @@ def test_known_reverts_across_arbitrary_llvm_rev(self) -> None:
128130
],
129131
)
130132

133+
def test_pr_based_revert_works(self) -> None:
134+
reverts = revert_checker.find_reverts(
135+
git_dir=get_llvm_project_path(),
136+
# This SHA is a direct child of the reverted SHA expected below.
137+
across_ref="2d5f3b0a61fb171617012a2c3ba05fd31fb3bb1d",
138+
# This SHA is a direct child of the revert SHA listed below.
139+
root="2c01b278580212914ec037bb5dd9b73702dfe7f1",
140+
max_pr_lookback=50,
141+
)
142+
self.assertEqual(
143+
reverts,
144+
[
145+
revert_checker.Revert(
146+
# This SHA is a `Reverts ${PR}` for #111004.
147+
sha="50866e84d1da8462aeb96607bf6d9e5bbd5869c5",
148+
# ...And this was the commit for #111004.
149+
reverted_sha="67160c5ab5f5b7fd5fa7851abcfde367c8a9f91b",
150+
),
151+
],
152+
)
153+
131154

132155
if __name__ == "__main__":
133156
unittest.main()

0 commit comments

Comments
 (0)