Skip to content

Commit 42ebf3e

Browse files
klensyklensy
and
klensy
authored
[utils][filecheck-lint]: speedup filecheck_lint (#94191)
For example: clang\test\OpenMP\task_codegen.cpp: 0m29.570s -> 0m0.159s clang\test\Driver: 4m55.917s -> 1m48.053s Most win from big files. --------- Co-authored-by: klensy <[email protected]>
1 parent 7eaae4e commit 42ebf3e

File tree

2 files changed

+40
-31
lines changed

2 files changed

+40
-31
lines changed

llvm/utils/filecheck_lint/filecheck_lint.py

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -81,29 +81,40 @@ class FileRange:
8181
"""Stores the coordinates of a span on a single line within a file.
8282
8383
Attributes:
84-
line: the line number
85-
start_column: the (inclusive) column where the span starts
86-
end_column: the (inclusive) column where the span ends
84+
content: line str
85+
start_byte: the (inclusive) byte offset the span starts
86+
end_byte: the (inclusive) byte offset the span ends
8787
"""
8888

89-
line: int
90-
start_column: int
91-
end_column: int
89+
content: str
90+
start_byte: int
91+
end_byte: int
9292

9393
def __init__(
9494
self, content: str, start_byte: int, end_byte: int
9595
): # pylint: disable=g-doc-args
96-
"""Derives a span's coordinates based on a string and start/end bytes.
96+
"""
97+
Stores the coordinates of a span based on a string and start/end bytes.
9798
9899
`start_byte` and `end_byte` are assumed to be on the same line.
99100
"""
100-
content_before_span = content[:start_byte]
101-
self.line = content_before_span.count("\n") + 1
102-
self.start_column = start_byte - content_before_span.rfind("\n")
103-
self.end_column = self.start_column + (end_byte - start_byte - 1)
101+
self.content = content
102+
self.start_byte = start_byte
103+
self.end_byte = end_byte
104104

105-
def __str__(self) -> str:
106-
return f"{self.line}:{self.start_column}-{self.end_column}"
105+
def as_str(self):
106+
"""
107+
Derives span from line and coordinates.
108+
109+
start_column: the (inclusive) column where the span starts
110+
end_column: the (inclusive) column where the span ends
111+
"""
112+
content_before_span = self.content[: self.start_byte]
113+
line = content_before_span.count("\n") + 1
114+
start_column = self.start_byte - content_before_span.rfind("\n")
115+
end_column = start_column + (self.end_byte - self.start_byte - 1)
116+
117+
return f"{line}:{start_column}-{end_column}"
107118

108119

109120
class Diagnostic:
@@ -134,7 +145,7 @@ def __init__(
134145
self.fix = fix
135146

136147
def __str__(self) -> str:
137-
return f"{self.filepath}:" + str(self.filerange) + f": {self.summary()}"
148+
return f"{self.filepath}:" + self.filerange.as_str() + f": {self.summary()}"
138149

139150
def summary(self) -> str:
140151
return (
@@ -228,7 +239,8 @@ def find_best_match(typo):
228239
)
229240

230241
potential_directives = find_potential_directives(content)
231-
242+
# Cache score and best_match to skip recalculating.
243+
score_and_best_match_for_potential_directive = dict()
232244
for filerange, potential_directive in potential_directives:
233245
# TODO(bchetioui): match count directives more finely. We skip directives
234246
# starting with 'CHECK-COUNT-' for the moment as they require more complex
@@ -244,7 +256,16 @@ def find_best_match(typo):
244256
if len(potential_directive) > max(map(len, all_directives)) + threshold:
245257
continue
246258

247-
score, best_match = find_best_match(potential_directive)
259+
if potential_directive not in score_and_best_match_for_potential_directive:
260+
score, best_match = find_best_match(potential_directive)
261+
score_and_best_match_for_potential_directive[potential_directive] = (
262+
score,
263+
best_match,
264+
)
265+
else:
266+
score, best_match = score_and_best_match_for_potential_directive[
267+
potential_directive
268+
]
248269
if score == 0: # This is an actual directive, ignore.
249270
continue
250271
elif score <= threshold and best_match not in _ignore:

llvm/utils/filecheck_lint/filecheck_lint_test.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,27 +49,15 @@ def test_find_potential_directives_comment_prefix(self):
4949
results = list(fcl.find_potential_directives(content))
5050
assert len(results) == 3
5151
pos, match = results[0]
52-
assert (
53-
pos.line == 1
54-
and pos.start_column == len("junk; ") + 1
55-
and pos.end_column == len(lines[0]) - 1
56-
)
52+
assert pos.as_str() == "1:7-11"
5753
assert match == "CHCK1"
5854

5955
pos, match = results[1]
60-
assert (
61-
pos.line == 2
62-
and pos.start_column == len("junk// ") + 1
63-
and pos.end_column == len(lines[1]) - 1
64-
)
56+
assert pos.as_str() == "2:8-12"
6557
assert match == "CHCK2"
6658

6759
pos, match = results[2]
68-
assert (
69-
pos.line == 3
70-
and pos.start_column == 1
71-
and pos.end_column == len(lines[2]) - 1
72-
)
60+
assert pos.as_str() == "3:1-10"
7361
assert match == "SOME CHCK3"
7462

7563
def test_levenshtein(self):

0 commit comments

Comments
 (0)