9
9
10
10
import torch
11
11
12
- default_device = "cpu"
12
+ # CPU is always available and also exportable to ExecuTorch
13
+ default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'
13
14
14
15
def check_args (args , name : str ) -> None :
15
16
pass
16
17
18
+ def add_arguments_for_chat (parser ):
19
+ # Only chat specific options should be here
20
+ _add_arguments_common (parser )
21
+
22
+
23
+ def add_arguments_for_browser (parser ):
24
+ # Only browser specific options should be here
25
+ _add_arguments_common (parser )
26
+ parser .add_argument (
27
+ "--port" ,
28
+ type = int ,
29
+ default = 5000 ,
30
+ help = "Port for the web server in browser mode"
31
+ )
32
+ _add_arguments_common (parser )
33
+
34
+
17
35
def add_arguments_for_download (parser ):
18
36
# Only download specific options should be here
19
37
_add_arguments_common (parser )
@@ -33,158 +51,204 @@ def add_arguments_for_export(parser):
33
51
# Only export specific options should be here
34
52
_add_arguments_common (parser )
35
53
36
- def add_arguments_for_browser (parser ):
37
- # Only browser specific options should be here
38
- _add_arguments_common (parser )
39
- parser .add_argument (
40
- "--port" ,
41
- type = int ,
42
- default = 5000 ,
43
- help = "Port for the web server for browser mode."
44
- )
45
54
46
55
def _add_arguments_common (parser ):
47
56
# Model specification. TODO Simplify this.
48
57
# A model can be specified using a positional model name or HuggingFace
49
58
# path. Alternatively, the model can be specified via --gguf-path or via
50
59
# an explicit --checkpoint-dir, --checkpoint-path, or --tokenizer-path.
51
-
52
60
parser .add_argument (
53
61
"model" ,
54
62
type = str ,
55
63
nargs = "?" ,
56
64
default = None ,
57
- help = "Model name for well-known models. " ,
65
+ help = "Model name for well-known models" ,
58
66
)
59
67
68
+
69
+ def add_arguments (parser ):
60
70
# TODO: Refactor this so that only common options are here
61
- # and subcommand -specific options are inside individual
71
+ # and command -specific options are inside individual
62
72
# add_arguments_for_generate, add_arguments_for_export etc.
73
+
63
74
parser .add_argument (
64
- "--seed" ,
65
- type = int ,
66
- default = 1234 , # set None for release
67
- help = "Initialize torch seed" ,
68
- )
69
- parser .add_argument (
70
- "--prompt" , type = str , default = "Hello, my name is" , help = "Input prompt."
75
+ "--chat" ,
76
+ action = "store_true" ,
77
+ help = "Whether to start an interactive chat session" ,
71
78
)
72
79
parser .add_argument (
73
- "--tiktoken " ,
80
+ "--gui " ,
74
81
action = "store_true" ,
75
- help = "Whether to use tiktoken tokenizer. " ,
82
+ help = "Whether to use a web UI for an interactive chat session " ,
76
83
)
77
84
parser .add_argument (
78
- "--chat" ,
79
- action = "store_true" ,
80
- help = "Use torchchat for an interactive chat session." ,
85
+ "--prompt" ,
86
+ type = str ,
87
+ default = "Hello, my name is" ,
88
+ help = "Input prompt" ,
81
89
)
82
90
parser .add_argument (
83
91
"--is-chat-model" ,
84
92
action = "store_true" ,
85
- help = "Indicate that the model was trained to support chat functionality. " ,
93
+ help = "Indicate that the model was trained to support chat functionality" ,
86
94
)
87
95
parser .add_argument (
88
- "--gui" ,
96
+ "--seed" ,
97
+ type = int ,
98
+ default = None ,
99
+ help = "Initialize torch seed" ,
100
+ )
101
+ parser .add_argument (
102
+ "--tiktoken" ,
89
103
action = "store_true" ,
90
- help = "Use torchchat to for an interactive gui-chat session." ,
104
+ help = "Whether to use tiktoken tokenizer" ,
105
+ )
106
+ parser .add_argument (
107
+ "--num-samples" ,
108
+ type = int ,
109
+ default = 1 ,
110
+ help = "Number of samples" ,
111
+ )
112
+ parser .add_argument (
113
+ "--max-new-tokens" ,
114
+ type = int ,
115
+ default = 200 ,
116
+ help = "Maximum number of new tokens" ,
91
117
)
92
- parser .add_argument ("--num-samples" , type = int , default = 1 , help = "Number of samples." )
93
118
parser .add_argument (
94
- "--max-new-tokens" , type = int , default = 200 , help = "Maximum number of new tokens."
119
+ "--top-k" ,
120
+ type = int ,
121
+ default = 200 ,
122
+ help = "Top-k for sampling" ,
95
123
)
96
- parser .add_argument ("--top-k" , type = int , default = 200 , help = "Top-k for sampling." )
97
124
parser .add_argument (
98
- "--temperature" , type = float , default = 0.8 , help = "Temperature for sampling."
125
+ "--temperature" ,
126
+ type = float ,
127
+ default = 0.8 ,
128
+ help = "Temperature for sampling"
99
129
)
100
130
parser .add_argument (
101
- "--compile" , action = "store_true" , help = "Whether to compile the model."
131
+ "--compile" ,
132
+ action = "store_true" ,
133
+ help = "Whether to compile the model with torch.compile" ,
102
134
)
103
135
parser .add_argument (
104
136
"--compile-prefill" ,
105
137
action = "store_true" ,
106
- help = "Whether to compile the prefill (improves prefill perf, but higher compile times)" ,
138
+ help = "Whether to compile the prefill. Improves prefill perf, but has higher compile times." ,
139
+ )
140
+ parser .add_argument (
141
+ "--profile" ,
142
+ type = Path ,
143
+ default = None ,
144
+ help = "Profile path." ,
107
145
)
108
- parser .add_argument ("--profile" , type = Path , default = None , help = "Profile path." )
109
146
parser .add_argument (
110
- "--speculate-k" , type = int , default = 5 , help = "Speculative execution depth."
147
+ "--speculate-k" ,
148
+ type = int ,
149
+ default = 5 ,
150
+ help = "Speculative execution depth" ,
111
151
)
112
152
parser .add_argument (
113
153
"--draft-checkpoint-path" ,
114
154
type = Path ,
115
155
default = None ,
116
- help = "Draft checkpoint path. " ,
156
+ help = "Use the specified draft checkpoint path" ,
117
157
)
118
158
parser .add_argument (
119
159
"--checkpoint-path" ,
120
160
type = Path ,
121
161
default = "not_specified" ,
122
- help = "Model checkpoint path. " ,
162
+ help = "Use the specified model checkpoint path" ,
123
163
)
124
- # parser.add_argument(
125
- # "--checkpoint-dir",
126
- # type=Path,
127
- # default=None,
128
- # help="Model checkpoint directory.",
129
- # )
130
164
parser .add_argument (
131
165
"--params-path" ,
132
166
type = Path ,
133
167
default = None ,
134
- help = "Parameter file path. " ,
168
+ help = "Use the specified parameter file " ,
135
169
)
136
170
parser .add_argument (
137
171
"--gguf-path" ,
138
172
type = Path ,
139
173
default = None ,
140
- help = "GGUF file path. " ,
174
+ help = "Use the specified GGUF model file " ,
141
175
)
142
176
parser .add_argument (
143
177
"--tokenizer-path" ,
144
178
type = Path ,
145
179
default = None ,
146
- help = "Model checkpoint path." ,
180
+ help = "Use the specified model tokenizer file" ,
181
+ )
182
+ parser .add_argument (
183
+ "--output-pte-path" ,
184
+ type = str ,
185
+ default = None ,
186
+ help = "Output to the specified ExecuTorch .pte model file" ,
187
+ )
188
+ parser .add_argument (
189
+ "--output-dso-path" ,
190
+ type = str ,
191
+ default = None ,
192
+ help = "Output to the specified AOT Inductor .dso model file" ,
147
193
)
148
- parser .add_argument ("--output-pte-path" , type = str , default = None , help = "Filename" )
149
- parser .add_argument ("--output-dso-path" , type = str , default = None , help = "Filename" )
150
194
parser .add_argument (
151
- "--dso-path" , type = Path , default = None , help = "Use the specified AOTI DSO model."
195
+ "--dso-path" ,
196
+ type = Path ,
197
+ default = None ,
198
+ help = "Use the specified AOT Inductor .dso model file" ,
152
199
)
153
200
parser .add_argument (
154
201
"--pte-path" ,
155
202
type = Path ,
156
203
default = None ,
157
- help = "Use the specified Executorch PTE model. " ,
204
+ help = "Use the specified ExecuTorch .pte model file " ,
158
205
)
159
206
parser .add_argument (
160
- "-d" ,
161
- "--dtype" ,
207
+ "-d" , "--dtype" ,
162
208
default = "float32" ,
163
209
help = "Override the dtype of the model (default is the checkpoint dtype). Options: bf16, fp16, fp32" ,
164
210
)
165
- parser .add_argument ("-v" , "--verbose" , action = "store_true" )
166
211
parser .add_argument (
167
- "--quantize" , type = str , default = "{ }" , help = "Quantization options."
212
+ "-v" , "--verbose" ,
213
+ action = "store_true" ,
214
+ help = "Verbose output" ,
215
+ )
216
+ parser .add_argument (
217
+ "--quantize" ,
218
+ type = str ,
219
+ default = "{ }" ,
220
+ help = "Quantization options" ,
221
+ )
222
+ parser .add_argument (
223
+ "--params-table" ,
224
+ type = str ,
225
+ default = None ,
226
+ help = "Parameter table to use" ,
168
227
)
169
- parser .add_argument ("--params-table" , type = str , default = None , help = "Device to use" )
170
228
parser .add_argument (
171
- "--device" , type = str , default = default_device , help = "Device to use"
229
+ "--device" ,
230
+ type = str ,
231
+ default = default_device ,
232
+ help = "Hardware device to use. Options: cpu, gpu, mps" ,
172
233
)
173
234
parser .add_argument (
174
235
"--tasks" ,
175
236
nargs = "+" ,
176
237
type = str ,
177
238
default = ["hellaswag" ],
178
- help = "list of lm-eluther tasks to evaluate usage : --tasks task1 task2" ,
239
+ help = "List of lm-eluther tasks to evaluate. Usage : --tasks task1 task2" ,
179
240
)
180
241
parser .add_argument (
181
- "--limit" , type = int , default = None , help = "number of samples to evaluate"
242
+ "--limit" ,
243
+ type = int ,
244
+ default = None ,
245
+ help = "Number of samples to evaluate" ,
182
246
)
183
247
parser .add_argument (
184
248
"--max-seq-length" ,
185
249
type = int ,
186
250
default = None ,
187
- help = "maximum length sequence to evaluate" ,
251
+ help = "Maximum length sequence to evaluate" ,
188
252
)
189
253
parser .add_argument (
190
254
"--hf-token" ,
@@ -201,7 +265,6 @@ def _add_arguments_common(parser):
201
265
202
266
203
267
def arg_init (args ):
204
-
205
268
if Path (args .quantize ).is_file ():
206
269
with open (args .quantize , "r" ) as f :
207
270
args .quantize = json .loads (f .read ())
0 commit comments