Skip to content

Commit 4134b33

Browse files
[MLGO] Add ability to extract IR from bazel using aquery (llvm#96964)
This patch adds in support for extracting IR from binaries built with bazel through querying the linker command line using bazel aquery.
1 parent 6b55ec1 commit 4134b33

File tree

3 files changed

+67
-2
lines changed

3 files changed

+67
-2
lines changed

llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ def parse_args_and_run():
4545
parser.add_argument(
4646
"--input_type",
4747
type=str,
48-
help="Input file type - JSON, LLD params, or directory.",
49-
choices=["json", "params", "directory"],
48+
help="Input file type - JSON, LLD params, directory, or bazel aquery.",
49+
choices=["json", "params", "directory", "bazel_aquery"],
5050
default="json",
5151
nargs="?",
5252
)
@@ -149,6 +149,11 @@ def main(args):
149149
"structured compilation database, use that instead"
150150
)
151151
objs = extract_ir_lib.load_from_directory(args.input, args.output_dir)
152+
elif args.input_type == "bazel_aquery":
153+
with open(args.input, encoding="utf-8") as aquery_json_handle:
154+
objs = extract_ir_lib.load_bazel_aquery(
155+
json.load(aquery_json_handle), args.obj_base_dir, args.output_dir
156+
)
152157
else:
153158
logging.error("Unknown input type: %s", args.input_type)
154159

llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,29 @@ def make_spec(obj_file: str):
316316
return [make_spec(path) for path in paths]
317317

318318

319+
def load_bazel_aquery(aquery_json, obj_base_dir: str, output_dir: str):
320+
"""Creates an object file array by looking at the JSON output of bazel aquery.
321+
322+
Args:
323+
aquery_json: The JSON-formatted output of the bazel aquery command for
324+
the target of interest. The bazel aquery JSON should be a JSON
325+
serialized version of the analysis.ActionGraphContainer proto.
326+
https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto
327+
obj_base_dir: The base build directory that all object files will be
328+
written out as arelative to.
329+
output_dir: The output directory where extracted .bc and .cmd files should
330+
be placed.
331+
"""
332+
linker_params = []
333+
334+
for action_info in aquery_json["actions"]:
335+
if action_info["mnemonic"] != "CppLink":
336+
continue
337+
linker_params = action_info["arguments"]
338+
339+
return load_from_lld_params(linker_params, obj_base_dir, output_dir)
340+
341+
319342
def run_extraction(
320343
objs: List[TrainingIRExtractor],
321344
num_workers: int,

llvm/utils/mlgo-utils/tests/corpus/extract_ir_test.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,43 @@ def test_lld_thinlto_extraction(outer, outdir):
304304
# CHECK-LLD-THINLTO-EXTRACTION-PY: 3
305305

306306

307+
## Test that we can load a bazel query JSON as expected.
308+
309+
# RUN: %python %s test_load_bazel_aquery | FileCheck %s --check-prefix CHECK-TEST-LOAD-BAZEL-AQUERY
310+
311+
312+
def test_load_bazel_aquery():
313+
obj = extract_ir_lib.load_bazel_aquery(
314+
{
315+
"actions": [
316+
{"mnemonic": "not-link", "arguments": []},
317+
{
318+
"mnemonic": "CppLink",
319+
"arguments": ["clang", "-o", "output_binary", "test1.o", "test2.o"],
320+
},
321+
]
322+
},
323+
"/some/path",
324+
"/tmp/out",
325+
)
326+
print(obj[0].input_obj())
327+
# CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test1.o
328+
print(obj[0].relative_output_path())
329+
# CHECK-TEST-LOAD-BAZEL-AQUERY: test1.o
330+
print(obj[0].cmd_file())
331+
# CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.cmd
332+
print(obj[0].bc_file())
333+
# CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.bc
334+
print(obj[1].input_obj())
335+
# CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test2.o
336+
print(obj[1].relative_output_path())
337+
# CHECK-TEST-LOAD-BAZEL-AQUERY: test2.o
338+
print(obj[1].cmd_file())
339+
# CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.cmd
340+
print(obj[1].bc_file())
341+
# CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.bc
342+
343+
307344
## Test that filtering works correctly
308345

309346
# RUN: %python %s test_filtering | FileCheck %s --check-prefix CHECK-TEST-FILTERING

0 commit comments

Comments
 (0)