4
4
# This source code is licensed under the license found in the
5
5
# LICENSE file in the root directory of this source tree.
6
6
7
+ import argparse
7
8
import json
8
9
import logging
9
10
import os
@@ -43,44 +44,46 @@ def check_args(args, verb: str) -> None:
43
44
download_and_convert (args .model , args .model_directory , args .hf_token )
44
45
45
46
47
+ # Given a arg parser and a subcommand (verb), add the appropriate arguments
48
+ # for that subcommand.
46
49
def add_arguments_for_verb (parser , verb : str ) -> None :
47
- # Model specification. TODO Simplify this.
48
- # A model can be specified using a positional model name or HuggingFace
49
- # path. Alternatively, the model can be specified via --gguf-path or via
50
- # an explicit --checkpoint-dir, --checkpoint-path, or --tokenizer-path.
51
-
50
+ # Argument closure for inventory related subcommands
52
51
if verb in INVENTORY_VERBS :
53
52
_configure_artifact_inventory_args (parser , verb )
54
53
_add_cli_metadata_args (parser )
55
54
return
56
55
56
+ # Model specification
57
+ # A model can be specified using a positional model name or checkpoint path
57
58
parser .add_argument (
58
59
"model" ,
59
60
type = str ,
60
61
nargs = "?" ,
61
62
default = None ,
62
63
help = "Model name for well-known models" ,
63
64
)
65
+ parser .add_argument (
66
+ "--checkpoint-path" ,
67
+ type = Path ,
68
+ default = "not_specified" ,
69
+ help = "Use the specified model checkpoint path" ,
70
+ )
64
71
72
+ # Add thematic argument groups based on the subcommand
65
73
if verb in ["browser" , "chat" , "generate" ]:
66
74
_add_generation_args (parser )
75
+ if verb == "eval" :
76
+ _add_evaluation_args (parser )
77
+
78
+ # Add argument groups for exported model path IO
79
+ _add_exported_input_path_args (parser )
80
+ _add_export_output_path_args (parser )
67
81
68
- parser .add_argument (
69
- "--distributed" ,
70
- action = "store_true" ,
71
- help = "Whether to enable distributed inference" ,
72
- )
73
82
parser .add_argument (
74
83
"--is-chat-model" ,
75
84
action = "store_true" ,
76
85
help = "Indicate that the model was trained to support chat functionality" ,
77
86
)
78
- parser .add_argument (
79
- "--seed" ,
80
- type = int ,
81
- default = None ,
82
- help = "Initialize torch seed" ,
83
- )
84
87
parser .add_argument (
85
88
"--compile" ,
86
89
action = "store_true" ,
@@ -91,52 +94,6 @@ def add_arguments_for_verb(parser, verb: str) -> None:
91
94
action = "store_true" ,
92
95
help = "Whether to compile the prefill. Improves prefill perf, but has higher compile times." ,
93
96
)
94
- parser .add_argument (
95
- "--profile" ,
96
- type = Path ,
97
- default = None ,
98
- help = "Profile path." ,
99
- )
100
- parser .add_argument (
101
- "--draft-checkpoint-path" ,
102
- type = Path ,
103
- default = None ,
104
- help = "Use the specified draft checkpoint path" ,
105
- )
106
- parser .add_argument (
107
- "--checkpoint-path" ,
108
- type = Path ,
109
- default = "not_specified" ,
110
- help = "Use the specified model checkpoint path" ,
111
- )
112
- parser .add_argument (
113
- "--dcp-dir" ,
114
- type = Path ,
115
- default = None ,
116
- help = "Use the specified model checkpoint directory" ,
117
- )
118
- parser .add_argument (
119
- "--params-path" ,
120
- type = Path ,
121
- default = None ,
122
- help = "Use the specified parameter file" ,
123
- )
124
- parser .add_argument (
125
- "--gguf-path" ,
126
- type = Path ,
127
- default = None ,
128
- help = "Use the specified GGUF model file" ,
129
- )
130
- parser .add_argument (
131
- "--tokenizer-path" ,
132
- type = Path ,
133
- default = None ,
134
- help = "Use the specified model tokenizer file" ,
135
- )
136
-
137
- _add_exported_model_input_args (parser )
138
- _add_export_output_path_args (parser )
139
-
140
97
parser .add_argument (
141
98
"--dtype" ,
142
99
default = "fast" ,
@@ -152,34 +109,13 @@ def add_arguments_for_verb(parser, verb: str) -> None:
152
109
+ "modes are: embedding, linear:int8, linear:int4, linear:a8w4dq, precision."
153
110
),
154
111
)
155
- parser .add_argument (
156
- "--draft-quantize" ,
157
- type = str ,
158
- default = "{ }" ,
159
- help = (
160
- "Quantization options. Same format as quantize, "
161
- + "or 'quantize' to indicate same options specified by "
162
- + "--quantize to main model. Applied to draft model."
163
- ),
164
- )
165
- parser .add_argument (
166
- "--params-table" ,
167
- type = str ,
168
- default = None ,
169
- choices = allowable_params_table (),
170
- help = "Parameter table to use" ,
171
- )
172
112
parser .add_argument (
173
113
"--device" ,
174
114
type = str ,
175
115
default = default_device ,
176
116
choices = ["fast" , "cpu" , "cuda" , "mps" ],
177
117
help = "Hardware device to use. Options: cpu, cuda, mps" ,
178
118
)
179
-
180
- if verb == "eval" :
181
- _add_evaluation_args (parser )
182
-
183
119
parser .add_argument (
184
120
"--hf-token" ,
185
121
type = str ,
@@ -192,6 +128,12 @@ def add_arguments_for_verb(parser, verb: str) -> None:
192
128
default = default_model_dir ,
193
129
help = f"The directory to store downloaded model artifacts. Default: { default_model_dir } " ,
194
130
)
131
+ parser .add_argument (
132
+ "--profile" ,
133
+ type = Path ,
134
+ default = None ,
135
+ help = "Profile path." ,
136
+ )
195
137
parser .add_argument (
196
138
"--port" ,
197
139
type = int ,
@@ -200,6 +142,11 @@ def add_arguments_for_verb(parser, verb: str) -> None:
200
142
)
201
143
_add_cli_metadata_args (parser )
202
144
145
+ # WIP Features (suppressed from --help)
146
+ _add_distributed_args (parser )
147
+ _add_custom_model_args (parser )
148
+ _add_speculative_execution_args (parser )
149
+
203
150
204
151
# Add CLI Args representing user provided exported model files
205
152
def _add_export_output_path_args (parser ) -> None :
@@ -219,7 +166,7 @@ def _add_export_output_path_args(parser) -> None:
219
166
220
167
221
168
# Add CLI Args representing user provided exported model files
222
- def _add_exported_model_input_args (parser ) -> None :
169
+ def _add_exported_input_path_args (parser ) -> None :
223
170
exported_model_path_parser = parser .add_argument_group ("Exported Model Path Args" , "Specify the path of the exported model files to ingest" )
224
171
exported_model_path_parser .add_argument (
225
172
"--dso-path" ,
@@ -235,14 +182,20 @@ def _add_exported_model_input_args(parser) -> None:
235
182
)
236
183
237
184
238
- # Add CLI Args that are relevant to any subcommand execution
185
+ # Add CLI Args that are general to subcommand cli execution
239
186
def _add_cli_metadata_args (parser ) -> None :
240
187
parser .add_argument (
241
188
"-v" ,
242
189
"--verbose" ,
243
190
action = "store_true" ,
244
191
help = "Verbose output" ,
245
192
)
193
+ parser .add_argument (
194
+ "--seed" ,
195
+ type = int ,
196
+ default = None ,
197
+ help = "Initialize torch seed" ,
198
+ )
246
199
247
200
248
201
# Configure CLI Args specific to Model Artifact Management
@@ -318,12 +271,6 @@ def _add_generation_args(parser) -> None:
318
271
action = "store_true" ,
319
272
help = "Whether to perform prefill sequentially. Only used for model debug." ,
320
273
)
321
- generator_parser .add_argument (
322
- "--speculate-k" ,
323
- type = int ,
324
- default = 5 ,
325
- help = "Speculative execution depth" ,
326
- )
327
274
328
275
329
276
# Add CLI Args specific to Model Evaluation
@@ -350,6 +297,88 @@ def _add_evaluation_args(parser) -> None:
350
297
)
351
298
352
299
300
+ # Add CLI Args related to distributed inference
301
+ # This feature is currently a [WIP] and hidden from --help
302
+ def _add_distributed_args (parser ) -> None :
303
+ parser .add_argument (
304
+ "--distributed" ,
305
+ action = "store_true" ,
306
+ help = argparse .SUPPRESS ,
307
+ # "Whether to enable distributed inference",
308
+ )
309
+ parser .add_argument (
310
+ "--dcp-dir" ,
311
+ type = Path ,
312
+ default = None ,
313
+ help = argparse .SUPPRESS ,
314
+ # "Use the specified model checkpoint directory",
315
+ )
316
+
317
+
318
+ # Add CLI Args related to custom model inputs (e.g. GGUF)
319
+ # This feature is currently a [WIP] and hidden from --help
320
+ def _add_custom_model_args (parser ) -> None :
321
+ parser .add_argument (
322
+ "--params-table" ,
323
+ type = str ,
324
+ default = None ,
325
+ choices = allowable_params_table (),
326
+ help = argparse .SUPPRESS ,
327
+ # "Parameter table to use",
328
+ )
329
+ parser .add_argument (
330
+ "--params-path" ,
331
+ type = Path ,
332
+ default = None ,
333
+ help = argparse .SUPPRESS ,
334
+ # "Use the specified parameter file",
335
+ )
336
+ parser .add_argument (
337
+ "--gguf-path" ,
338
+ type = Path ,
339
+ default = None ,
340
+ help = argparse .SUPPRESS ,
341
+ # "Use the specified GGUF model file",
342
+ )
343
+ parser .add_argument (
344
+ "--tokenizer-path" ,
345
+ type = Path ,
346
+ default = None ,
347
+ help = argparse .SUPPRESS ,
348
+ # "Use the specified model tokenizer file",
349
+ )
350
+
351
+
352
+ # Add CLI Args related to speculative execution
353
+ # This feature is currently a [WIP] and hidden from --help
354
+ def _add_speculative_execution_args (parser ) -> None :
355
+ parser .add_argument (
356
+ "--speculate-k" ,
357
+ type = int ,
358
+ default = 5 ,
359
+ help = argparse .SUPPRESS ,
360
+ # "Speculative execution depth",
361
+ )
362
+ parser .add_argument (
363
+ "--draft-checkpoint-path" ,
364
+ type = Path ,
365
+ default = None ,
366
+ help = argparse .SUPPRESS ,
367
+ # "Use the specified draft checkpoint path",
368
+ )
369
+ parser .add_argument (
370
+ "--draft-quantize" ,
371
+ type = str ,
372
+ default = "{ }" ,
373
+ help = argparse .SUPPRESS ,
374
+ # (
375
+ # "Quantization options. Same format as quantize, "
376
+ # + "or 'quantize' to indicate same options specified by "
377
+ # + "--quantize to main model. Applied to draft model."
378
+ # ),
379
+ )
380
+
381
+
353
382
def arg_init (args ):
354
383
if not (torch .__version__ > "2.3" ):
355
384
raise RuntimeError (
0 commit comments