Skip to content

Commit 120e062

Browse files
[MLGO] Remove absl dependency from scripts (#78880)
This patch removes the absl dependency from the mlgo-utils scripts. We were only using absl.logging, and absl.flags, so this patch just consists of mechanically converting the absl flags parsing to Python's builtin argparse as Python's logging is a drop in replacement for absl.logging.
1 parent c71956d commit 120e062

File tree

8 files changed

+148
-149
lines changed

8 files changed

+148
-149
lines changed

llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,26 +23,25 @@
2323
and corpus2 are combined into combinedcorpus.
2424
"""
2525

26-
from absl import app
27-
from absl import flags
26+
import argparse
2827

2928
from mlgo.corpus import combine_training_corpus_lib
3029

31-
flags.DEFINE_string("root_dir", "", "root dir of module paths to combine.")
3230

33-
FLAGS = flags.FLAGS
31+
def parse_args_and_run():
32+
parser = argparse.ArgumentParser(
33+
description="A tool for combining multiple training corpora"
34+
)
35+
parser.add_argument(
36+
"--root_dir", type=str, help="The root dir of module paths to combine."
37+
)
38+
args = parser.parse_args()
39+
main(args)
3440

3541

36-
def main(argv):
37-
if len(argv) > 1:
38-
raise app.UsageError("Too many command-line arguments.")
39-
40-
combine_training_corpus_lib.combine_corpus(FLAGS.root_dir)
41-
42-
43-
def entrypoint():
44-
app.run(main)
42+
def main(args):
43+
combine_training_corpus_lib.combine_corpus(args.root_dir)
4544

4645

4746
if __name__ == "__main__":
48-
entrypoint()
47+
parse_args_and_run()

llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py

Lines changed: 110 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -24,129 +24,146 @@
2424
any output.
2525
"""
2626

27+
import argparse
2728
import json
29+
import logging
2830
import multiprocessing
2931

30-
from absl import app
31-
from absl import flags
32-
from absl import logging
33-
3432
from mlgo.corpus import extract_ir_lib
3533

36-
flags.DEFINE_string(
37-
"input",
38-
None,
39-
"Input file or directory - either compile_commands.json, a linker parameter"
40-
"list, or a path to a directory containing object files.",
41-
)
42-
flags.DEFINE_enum(
43-
"input_type",
44-
"json",
45-
["json", "params", "directory"],
46-
"Input file type - json, params, or directory. params latter refers to lld"
47-
"params.",
48-
)
49-
flags.DEFINE_string("output_dir", None, "Output directory")
50-
flags.DEFINE_integer(
51-
"num_workers",
52-
None,
53-
"Number of parallel workers for objcopy. `None` for maximum available.",
54-
)
55-
flags.DEFINE_string("llvm_objcopy_path", "llvm-objcopy", "Path to llvm-objcopy")
56-
flags.DEFINE_string(
57-
"obj_base_dir",
58-
"",
59-
"Base directory for object files. Defaults to current working dir.",
60-
)
61-
flags.DEFINE_string(
62-
"cmd_filter",
63-
None,
64-
"Include only those modules with a command line matching this regexp. "
65-
"Setting it to None for not filtering. Note that the regexp is applied "
66-
"independently for each separate command line option. For example, ^-Oz$ "
67-
"will match Oz - built binaries. Does not work with thinlto_build=lld.",
68-
)
69-
flags.DEFINE_enum(
70-
"thinlto_build",
71-
None,
72-
["distributed", "local"],
73-
"Set if the build was performed with either 'distributed' or "
74-
"'local' ThinLTO. This ensures the thinlto.bc files are also copied. "
75-
"The build is assumed to have had "
76-
"-mllvm -lto-embed-bitcode=post-merge-pre-opt passed in the distributed "
77-
"case, or -Wl,--save-temps=import and -Wl,--thinlto-emit-index-files "
78-
"passed in the local case.",
79-
)
80-
flags.DEFINE_string(
81-
"cmd_section_name",
82-
".llvmcmd",
83-
"The section name passed to llvm-objcopy. For ELF object files, the "
84-
"default .llvmcmd is correct. For Mach-O object files, one should use "
85-
"something like __LLVM,__cmdline",
86-
)
87-
flags.DEFINE_string(
88-
"bitcode_section_name",
89-
".llvmbc",
90-
"The section name passed to llvm-objcopy. For ELF object files, the "
91-
"default .llvmbc is correct. For Mach-O object files, one should use "
92-
"__LLVM,__bitcode",
93-
)
94-
95-
flags.mark_flag_as_required("output_dir")
96-
97-
FLAGS = flags.FLAGS
98-
99-
100-
def main(argv):
101-
if len(argv) > 1:
102-
raise app.UsageError("Too many command-line arguments.")
10334

35+
def parse_args_and_run():
36+
parser = argparse.ArgumentParser(
37+
description="A tool for making a corpus from build artifacts"
38+
)
39+
parser.add_argument(
40+
"--input",
41+
type=str,
42+
help="Input file or directory - either compile_commands.json, a linker "
43+
"parameter list, or a path to a directory containing object files.",
44+
)
45+
parser.add_argument(
46+
"--input_type",
47+
type=str,
48+
help="Input file type - JSON, LLD params, or directory.",
49+
choices=["json", "params", "directory"],
50+
default="json",
51+
nargs="?",
52+
)
53+
parser.add_argument("--output_dir", type=str, help="Output directory")
54+
parser.add_argument(
55+
"--num_workers",
56+
type=int,
57+
help="Number of parallel works for objcopy. `None` for maximum available.",
58+
default=None,
59+
nargs="?",
60+
)
61+
parser.add_argument(
62+
"--llvm_objcopy_path",
63+
type=str,
64+
help="Path to llvm-objcopy",
65+
default="llvm-objcopy",
66+
nargs="?",
67+
)
68+
parser.add_argument(
69+
"--obj_base_dir",
70+
type=str,
71+
help="Base directory for object files. Defaults to current working dir.",
72+
default="",
73+
nargs="?",
74+
)
75+
parser.add_argument(
76+
"--cmd_filter",
77+
type=str,
78+
help="Include only those modules with a command line matching this regular "
79+
"expression. Set it to None to not perform any filtering. Note that the "
80+
"regular expression is applied independently for each separate command line "
81+
"option. For example, ^-Oz$ will match Oz built binaries. This does not work "
82+
"with thinlto_build=lld.",
83+
default=None,
84+
nargs="?",
85+
)
86+
parser.add_argument(
87+
"--thinlto_build",
88+
type=str,
89+
help="Set if the build was performed with either 'distributed' or 'local' "
90+
"ThinLTO. This ensures the thinlto.bc files are also copied. The build is "
91+
"assumed to have had -mllvm -lto-embed-bitcode=post-merge-pre-opt passed in "
92+
"the distributed case or -Wl,--save-temps=import and "
93+
"-Wl,--thinlto-emit-index-files passed in the local case",
94+
choices=["distributed", "local"],
95+
default=None,
96+
nargs="?",
97+
)
98+
parser.add_argument(
99+
"--cmd_section_name",
100+
type=str,
101+
help="The section name passed to llvm-objcopy. For ELF object files, the "
102+
"default .llvmcmd is correct. For Mach-O object files, one should use "
103+
"something like __LLVM,__cmdline",
104+
default=".llvmcmd",
105+
nargs="?",
106+
)
107+
parser.add_argument(
108+
"--bitcode_section_name",
109+
type=str,
110+
help="The section name passed to llvm-objcopy. For ELF object files, the "
111+
"default .llvmbc is correct. For Mach-O object files, one should use "
112+
"__LLVM,__bitcode",
113+
default=".llvmbc",
114+
nargs="?",
115+
)
116+
args = parser.parse_args()
117+
main(args)
118+
119+
120+
def main(args):
104121
objs = []
105-
if FLAGS.input is not None and FLAGS.thinlto_build == "local":
122+
if args.input is not None and args.thinlto_build == "local":
106123
raise ValueError("--thinlto_build=local cannot be run with --input")
107-
if FLAGS.input is None:
108-
if FLAGS.thinlto_build != "local":
124+
if args.input is None:
125+
if args.thinlto_build != "local":
109126
raise ValueError("--input or --thinlto_build=local must be provided")
110-
objs = extract_ir_lib.load_for_lld_thinlto(FLAGS.obj_base_dir, FLAGS.output_dir)
111-
elif FLAGS.input_type == "json":
112-
with open(FLAGS.input, encoding="utf-8") as f:
127+
objs = extract_ir_lib.load_for_lld_thinlto(args.obj_base_dir, args.output_dir)
128+
elif args.input_type == "json":
129+
with open(args.input, encoding="utf-8") as f:
113130
objs = extract_ir_lib.load_from_compile_commands(
114-
json.load(f), FLAGS.output_dir
131+
json.load(f), args.output_dir
115132
)
116-
elif FLAGS.input_type == "params":
117-
if not FLAGS.obj_base_dir:
133+
elif args.input_type == "params":
134+
if not args.obj_base_dir:
118135
logging.info(
119136
"-obj_base_dir is unspecified, assuming current directory."
120137
"If no objects are found, use this option to specify the root"
121138
"directory for the object file paths in the input file."
122139
)
123-
with open(FLAGS.input, encoding="utf-8") as f:
140+
with open(args.input, encoding="utf-8") as f:
124141
objs = extract_ir_lib.load_from_lld_params(
125-
[l.strip() for l in f.readlines()], FLAGS.obj_base_dir, FLAGS.output_dir
142+
[l.strip() for l in f.readlines()], args.obj_base_dir, args.output_dir
126143
)
127-
elif FLAGS.input_type == "directory":
144+
elif args.input_type == "directory":
128145
logging.warning(
129146
"Using the directory input is only recommended if the build system"
130147
"your project uses does not support any structured output that"
131148
"ml-compiler-opt understands. If your build system provides a"
132149
"structured compilation database, use that instead"
133150
)
134-
objs = extract_ir_lib.load_from_directory(FLAGS.input, FLAGS.output_dir)
151+
objs = extract_ir_lib.load_from_directory(args.input, args.output_dir)
135152
else:
136-
logging.error("Unknown input type: %s", FLAGS.input_type)
153+
logging.error("Unknown input type: %s", args.input_type)
137154

138155
relative_output_paths = extract_ir_lib.run_extraction(
139156
objs,
140-
FLAGS.num_workers,
141-
FLAGS.llvm_objcopy_path,
142-
FLAGS.cmd_filter,
143-
FLAGS.thinlto_build,
144-
FLAGS.cmd_section_name,
145-
FLAGS.bitcode_section_name,
157+
args.num_workers,
158+
args.llvm_objcopy_path,
159+
args.cmd_filter,
160+
args.thinlto_build,
161+
args.cmd_section_name,
162+
args.bitcode_section_name,
146163
)
147164

148165
extract_ir_lib.write_corpus_manifest(
149-
FLAGS.thinlto_build, relative_output_paths, FLAGS.output_dir
166+
args.thinlto_build, relative_output_paths, args.output_dir
150167
)
151168

152169
logging.info(
@@ -156,10 +173,5 @@ def main(argv):
156173
)
157174

158175

159-
def entrypoint():
160-
multiprocessing.set_start_method("fork")
161-
app.run(main)
162-
163-
164176
if __name__ == "__main__":
165-
entrypoint()
177+
parse_args_and_run()

llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,43 +12,42 @@
1212
--default_args="<list of space separated flags>"
1313
"""
1414

15-
from absl import app
16-
from absl import flags
17-
from absl import logging
15+
import argparse
16+
import logging
1817

1918
from mlgo.corpus import make_corpus_lib
2019

21-
flags.DEFINE_string("input_dir", None, "The input directory.")
22-
flags.DEFINE_string("output_dir", None, "The output directory.")
23-
flags.DEFINE_string(
24-
"default_args",
25-
"",
26-
"The compiler flags to compile with when using downstream tooling.",
27-
)
2820

29-
flags.mark_flag_as_required("input_dir")
30-
flags.mark_flag_as_required("output_dir")
31-
32-
FLAGS = flags.FLAGS
21+
def parse_args_and_run():
22+
parser = argparse.ArgumentParser(
23+
description="A tool for making a corpus from arbitrary bitcode"
24+
)
25+
parser.add_argument("--input_dir", type=str, help="The input directory.")
26+
parser.add_argument("--output_dir", type=str, help="The output directory.")
27+
parser.add_argument(
28+
"--default_args",
29+
type=str,
30+
help="The compiler flags to compile with when using downstream tooling.",
31+
default="",
32+
nargs="?",
33+
)
34+
args = parser.parse_args()
35+
main(args)
3336

3437

35-
def main(_):
38+
def main(args):
3639
logging.warning(
3740
"Using this tool does not guarantee that the bitcode is taken at "
3841
"the correct stage for consumption during model training. Make "
3942
"sure to validate assumptions about where the bitcode is coming "
4043
"from before using it in production."
4144
)
42-
relative_paths = make_corpus_lib.load_bitcode_from_directory(FLAGS.input_dir)
43-
make_corpus_lib.copy_bitcode(relative_paths, FLAGS.input_dir, FLAGS.output_dir)
45+
relative_paths = make_corpus_lib.load_bitcode_from_directory(args.input_dir)
46+
make_corpus_lib.copy_bitcode(relative_paths, args.input_dir, args.output_dir)
4447
make_corpus_lib.write_corpus_manifest(
45-
relative_paths, FLAGS.output_dir, FLAGS.default_args.split()
48+
relative_paths, args.output_dir, args.default_args.split()
4649
)
4750

4851

49-
def entrypoint():
50-
app.run(main)
51-
52-
5352
if __name__ == "__main__":
54-
entrypoint()
53+
parse_args_and_run()

llvm/utils/mlgo-utils/pyproject.toml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ name = "mlgo"
77
description = "Tooling for ML in LLVM"
88
readme = "README.md"
99
requires-python = ">=3.8,<3.11"
10-
dependencies = [
11-
"absl-py>=1.0.0"
12-
]
1310
dynamic = ["version"]
1411
license = {text = "Apache-2.0 WITH LLVM-exception"}
1512
classifiers = [

llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# REQUIRES: python-38, absl, system-linux
1+
# REQUIRES: python-38, system-linux
22

33
## Testing that the combine_trainig_corpus script works as expected when
44
## invoked.

llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# REQUIRES: python-38, absl, system-linux
1+
# REQUIRES: python-38, system-linux
22

33
## Test that invoking the extract_ir script work as expected.
44

llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# REQUIRES: python-38, absl, system-linux
1+
# REQUIRES: python-38, system-linux
22

33
## Testing that the make_corpus script works as expected when invoked.
44

llvm/utils/mlgo-utils/tests/lit.local.cfg

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,3 @@ import sys
44
# the entire project has been bumped to 3.8.
55
if sys.version_info > (3,8):
66
config.available_features.add("python-38")
7-
8-
# TODO(boomanaiden154): Remove this flag once the scripts are converted to
9-
# not use absl anymore.
10-
try:
11-
import absl
12-
config.available_features.add("absl")
13-
except:
14-
pass

0 commit comments

Comments
 (0)