24
24
any output.
25
25
"""
26
26
27
+ import argparse
27
28
import json
29
+ import logging
28
30
import multiprocessing
29
31
30
- from absl import app
31
- from absl import flags
32
- from absl import logging
33
-
34
32
from mlgo .corpus import extract_ir_lib
35
33
36
- flags .DEFINE_string (
37
- "input" ,
38
- None ,
39
- "Input file or directory - either compile_commands.json, a linker parameter"
40
- "list, or a path to a directory containing object files." ,
41
- )
42
- flags .DEFINE_enum (
43
- "input_type" ,
44
- "json" ,
45
- ["json" , "params" , "directory" ],
46
- "Input file type - json, params, or directory. params latter refers to lld"
47
- "params." ,
48
- )
49
- flags .DEFINE_string ("output_dir" , None , "Output directory" )
50
- flags .DEFINE_integer (
51
- "num_workers" ,
52
- None ,
53
- "Number of parallel workers for objcopy. `None` for maximum available." ,
54
- )
55
- flags .DEFINE_string ("llvm_objcopy_path" , "llvm-objcopy" , "Path to llvm-objcopy" )
56
- flags .DEFINE_string (
57
- "obj_base_dir" ,
58
- "" ,
59
- "Base directory for object files. Defaults to current working dir." ,
60
- )
61
- flags .DEFINE_string (
62
- "cmd_filter" ,
63
- None ,
64
- "Include only those modules with a command line matching this regexp. "
65
- "Setting it to None for not filtering. Note that the regexp is applied "
66
- "independently for each separate command line option. For example, ^-Oz$ "
67
- "will match Oz - built binaries. Does not work with thinlto_build=lld." ,
68
- )
69
- flags .DEFINE_enum (
70
- "thinlto_build" ,
71
- None ,
72
- ["distributed" , "local" ],
73
- "Set if the build was performed with either 'distributed' or "
74
- "'local' ThinLTO. This ensures the thinlto.bc files are also copied. "
75
- "The build is assumed to have had "
76
- "-mllvm -lto-embed-bitcode=post-merge-pre-opt passed in the distributed "
77
- "case, or -Wl,--save-temps=import and -Wl,--thinlto-emit-index-files "
78
- "passed in the local case." ,
79
- )
80
- flags .DEFINE_string (
81
- "cmd_section_name" ,
82
- ".llvmcmd" ,
83
- "The section name passed to llvm-objcopy. For ELF object files, the "
84
- "default .llvmcmd is correct. For Mach-O object files, one should use "
85
- "something like __LLVM,__cmdline" ,
86
- )
87
- flags .DEFINE_string (
88
- "bitcode_section_name" ,
89
- ".llvmbc" ,
90
- "The section name passed to llvm-objcopy. For ELF object files, the "
91
- "default .llvmbc is correct. For Mach-O object files, one should use "
92
- "__LLVM,__bitcode" ,
93
- )
94
-
95
- flags .mark_flag_as_required ("output_dir" )
96
-
97
- FLAGS = flags .FLAGS
98
-
99
-
100
- def main (argv ):
101
- if len (argv ) > 1 :
102
- raise app .UsageError ("Too many command-line arguments." )
103
34
35
+ def parse_args_and_run ():
36
+ parser = argparse .ArgumentParser (
37
+ description = "A tool for making a corpus from build artifacts"
38
+ )
39
+ parser .add_argument (
40
+ "--input" ,
41
+ type = str ,
42
+ help = "Input file or directory - either compile_commands.json, a linker "
43
+ "parameter list, or a path to a directory containing object files." ,
44
+ )
45
+ parser .add_argument (
46
+ "--input_type" ,
47
+ type = str ,
48
+ help = "Input file type - JSON, LLD params, or directory." ,
49
+ choices = ["json" , "params" , "directory" ],
50
+ default = "json" ,
51
+ nargs = "?" ,
52
+ )
53
+ parser .add_argument ("--output_dir" , type = str , help = "Output directory" )
54
+ parser .add_argument (
55
+ "--num_workers" ,
56
+ type = int ,
57
+ help = "Number of parallel works for objcopy. `None` for maximum available." ,
58
+ default = None ,
59
+ nargs = "?" ,
60
+ )
61
+ parser .add_argument (
62
+ "--llvm_objcopy_path" ,
63
+ type = str ,
64
+ help = "Path to llvm-objcopy" ,
65
+ default = "llvm-objcopy" ,
66
+ nargs = "?" ,
67
+ )
68
+ parser .add_argument (
69
+ "--obj_base_dir" ,
70
+ type = str ,
71
+ help = "Base directory for object files. Defaults to current working dir." ,
72
+ default = "" ,
73
+ nargs = "?" ,
74
+ )
75
+ parser .add_argument (
76
+ "--cmd_filter" ,
77
+ type = str ,
78
+ help = "Include only those modules with a command line matching this regular "
79
+ "expression. Set it to None to not perform any filtering. Note that the "
80
+ "regular expression is applied independently for each separate command line "
81
+ "option. For example, ^-Oz$ will match Oz built binaries. This does not work "
82
+ "with thinlto_build=lld." ,
83
+ default = None ,
84
+ nargs = "?" ,
85
+ )
86
+ parser .add_argument (
87
+ "--thinlto_build" ,
88
+ type = str ,
89
+ help = "Set if the build was performed with either 'distributed' or 'local' "
90
+ "ThinLTO. This ensures the thinlto.bc files are also copied. The build is "
91
+ "assumed to have had -mllvm -lto-embed-bitcode=post-merge-pre-opt passed in "
92
+ "the distributed case or -Wl,--save-temps=import and "
93
+ "-Wl,--thinlto-emit-index-files passed in the local case" ,
94
+ choices = ["distributed" , "local" ],
95
+ default = None ,
96
+ nargs = "?" ,
97
+ )
98
+ parser .add_argument (
99
+ "--cmd_section_name" ,
100
+ type = str ,
101
+ help = "The section name passed to llvm-objcopy. For ELF object files, the "
102
+ "default .llvmcmd is correct. For Mach-O object files, one should use "
103
+ "something like __LLVM,__cmdline" ,
104
+ default = ".llvmcmd" ,
105
+ nargs = "?" ,
106
+ )
107
+ parser .add_argument (
108
+ "--bitcode_section_name" ,
109
+ type = str ,
110
+ help = "The section name passed to llvm-objcopy. For ELF object files, the "
111
+ "default .llvmbc is correct. For Mach-O object files, one should use "
112
+ "__LLVM,__bitcode" ,
113
+ default = ".llvmbc" ,
114
+ nargs = "?" ,
115
+ )
116
+ args = parser .parse_args ()
117
+ main (args )
118
+
119
+
120
+ def main (args ):
104
121
objs = []
105
- if FLAGS .input is not None and FLAGS .thinlto_build == "local" :
122
+ if args .input is not None and args .thinlto_build == "local" :
106
123
raise ValueError ("--thinlto_build=local cannot be run with --input" )
107
- if FLAGS .input is None :
108
- if FLAGS .thinlto_build != "local" :
124
+ if args .input is None :
125
+ if args .thinlto_build != "local" :
109
126
raise ValueError ("--input or --thinlto_build=local must be provided" )
110
- objs = extract_ir_lib .load_for_lld_thinlto (FLAGS .obj_base_dir , FLAGS .output_dir )
111
- elif FLAGS .input_type == "json" :
112
- with open (FLAGS .input , encoding = "utf-8" ) as f :
127
+ objs = extract_ir_lib .load_for_lld_thinlto (args .obj_base_dir , args .output_dir )
128
+ elif args .input_type == "json" :
129
+ with open (args .input , encoding = "utf-8" ) as f :
113
130
objs = extract_ir_lib .load_from_compile_commands (
114
- json .load (f ), FLAGS .output_dir
131
+ json .load (f ), args .output_dir
115
132
)
116
- elif FLAGS .input_type == "params" :
117
- if not FLAGS .obj_base_dir :
133
+ elif args .input_type == "params" :
134
+ if not args .obj_base_dir :
118
135
logging .info (
119
136
"-obj_base_dir is unspecified, assuming current directory."
120
137
"If no objects are found, use this option to specify the root"
121
138
"directory for the object file paths in the input file."
122
139
)
123
- with open (FLAGS .input , encoding = "utf-8" ) as f :
140
+ with open (args .input , encoding = "utf-8" ) as f :
124
141
objs = extract_ir_lib .load_from_lld_params (
125
- [l .strip () for l in f .readlines ()], FLAGS .obj_base_dir , FLAGS .output_dir
142
+ [l .strip () for l in f .readlines ()], args .obj_base_dir , args .output_dir
126
143
)
127
- elif FLAGS .input_type == "directory" :
144
+ elif args .input_type == "directory" :
128
145
logging .warning (
129
146
"Using the directory input is only recommended if the build system"
130
147
"your project uses does not support any structured output that"
131
148
"ml-compiler-opt understands. If your build system provides a"
132
149
"structured compilation database, use that instead"
133
150
)
134
- objs = extract_ir_lib .load_from_directory (FLAGS .input , FLAGS .output_dir )
151
+ objs = extract_ir_lib .load_from_directory (args .input , args .output_dir )
135
152
else :
136
- logging .error ("Unknown input type: %s" , FLAGS .input_type )
153
+ logging .error ("Unknown input type: %s" , args .input_type )
137
154
138
155
relative_output_paths = extract_ir_lib .run_extraction (
139
156
objs ,
140
- FLAGS .num_workers ,
141
- FLAGS .llvm_objcopy_path ,
142
- FLAGS .cmd_filter ,
143
- FLAGS .thinlto_build ,
144
- FLAGS .cmd_section_name ,
145
- FLAGS .bitcode_section_name ,
157
+ args .num_workers ,
158
+ args .llvm_objcopy_path ,
159
+ args .cmd_filter ,
160
+ args .thinlto_build ,
161
+ args .cmd_section_name ,
162
+ args .bitcode_section_name ,
146
163
)
147
164
148
165
extract_ir_lib .write_corpus_manifest (
149
- FLAGS .thinlto_build , relative_output_paths , FLAGS .output_dir
166
+ args .thinlto_build , relative_output_paths , args .output_dir
150
167
)
151
168
152
169
logging .info (
@@ -156,10 +173,5 @@ def main(argv):
156
173
)
157
174
158
175
159
- def entrypoint ():
160
- multiprocessing .set_start_method ("fork" )
161
- app .run (main )
162
-
163
-
164
176
if __name__ == "__main__" :
165
- entrypoint ()
177
+ parse_args_and_run ()
0 commit comments