@@ -214,12 +214,12 @@ python3 generate.py [--compile] --checkpoint-path ${MODEL_PATH} --prompt "Hello,
214
214
```
215
215
216
216
```
217
- python3 export.py --checkpoint-path ${MODEL_PATH} -d fp32 --quantize '{"linear:int4": {"groupsize" : 32} }' [ --output-pte-path ${MODEL_OUT}/${MODEL_NAME}_int4-gw32.pte | --output- dso-path ${MODEL_OUT}/${MODEL_NAME}_int4-gw32.dso]
217
+ python3 export.py --checkpoint-path ${MODEL_PATH} -d fp32 --quantize '{"linear:int4": {"groupsize" : 32} }' --output-dso-path ${MODEL_OUT}/${MODEL_NAME}_int4-gw32.dso
218
218
```
219
219
Now you can run your model with the same command as before:
220
220
221
221
```
222
- python3 generate.py [ --pte-path ${MODEL_OUT}/${MODEL_NAME}_int4-gw32.pte | -- dso-path ${MODEL_OUT}/${MODEL_NAME}_int4-gw32.dso] --prompt "Hello my name is"
222
+ python3 generate.py -- dso-path ${MODEL_OUT}/${MODEL_NAME}_int4-gw32.dso --prompt "Hello my name is"
223
223
```
224
224
225
225
## 4-Bit Integer Linear Quantization (a8w4dq)
@@ -247,12 +247,12 @@ python3 generate.py [--compile] --checkpoint-path ${MODEL_PATH} --prompt "Hello,
247
247
```
248
248
249
249
```
250
- python3 export.py --checkpoint-path ${MODEL_PATH} -d fp32 --quantize '{"linear:gptq": {"groupsize" : 32} }' [ --output-pte -path ${MODEL_OUT}/${MODEL_NAME}_gptq.pte | ... dso... ]
250
+ python3 export.py --checkpoint-path ${MODEL_PATH} -d fp32 --quantize '{"linear:gptq": {"groupsize" : 32} }' --output-dso -path ${MODEL_OUT}/${MODEL_NAME}_gptq.dso
251
251
```
252
252
Now you can run your model with the same command as before:
253
253
254
254
```
255
- python3 generate.py [ --pte -path ${MODEL_OUT}/${MODEL_NAME}_gptq.pte | ... dso...] --prompt "Hello my name is"
255
+ python3 generate.py --dso -path ${MODEL_OUT}/${MODEL_NAME}_gptq.dso --prompt "Hello my name is"
256
256
```
257
257
258
258
## 4-bit Integer Linear Quantization with HQQ (hqq)
@@ -267,12 +267,12 @@ python3 generate.py [--compile] --checkpoint-path ${MODEL_PATH} --prompt "Hello,
267
267
```
268
268
269
269
```
270
- python3 export.py --checkpoint-path ${MODEL_PATH} -d fp32 --quantize '{"linear:hqq": {"groupsize" : 32} }' [ --output-pte -path ${MODEL_OUT}/${MODEL_NAME}_hqq.pte | ... dso... ]
270
+ python3 export.py --checkpoint-path ${MODEL_PATH} -d fp32 --quantize '{"linear:hqq": {"groupsize" : 32} }' --output-dso -path ${MODEL_OUT}/${MODEL_NAME}_hqq.dso
271
271
```
272
272
Now you can run your model with the same command as before:
273
273
274
274
```
275
- python3 generate.py [ --pte -path ${MODEL_OUT}/${MODEL_NAME}_hqq.pte | ... dso...] --prompt "Hello my name is"
275
+ python3 generate.py --dso -path ${MODEL_OUT}/${MODEL_NAME}_hqq.dso --prompt "Hello my name is"
276
276
277
277
278
278
## Adding additional quantization schemes
0 commit comments