@@ -133,60 +133,60 @@ function generate_aoti_model_output() {
133
133
echo " ******************************************"
134
134
echo " ************** non-quantized *************"
135
135
echo " ******************************************"
136
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path " ${MODEL_DIR} /${MODEL_NAME} .so " --device " $TARGET_DEVICE " || exit 1
137
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --dso- path " $MODEL_DIR /${MODEL_NAME} .so " --prompt " $PROMPT " --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
136
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path " ${MODEL_DIR} /${MODEL_NAME} .pt2 " --device " $TARGET_DEVICE " || exit 1
137
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --aoti-package- path " $MODEL_DIR /${MODEL_NAME} .pt2 " --prompt " $PROMPT " --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
138
138
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
139
139
140
140
echo " ******************************************"
141
141
echo " ******* Emb: channel-wise quantized ******"
142
142
echo " ******************************************"
143
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
144
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
143
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
144
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
145
145
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
146
146
147
147
echo " ******************************************"
148
148
echo " ******** Emb: group-wise quantized *******"
149
149
echo " ******************************************"
150
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
151
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
150
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
151
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
152
152
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
153
153
154
154
echo " ***********************************************"
155
155
echo " ******* Emb: 4bit channel-wise quantized ******"
156
156
echo " ***********************************************"
157
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 0, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
158
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
157
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 0, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
158
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
159
159
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
160
160
161
161
echo " ***********************************************"
162
162
echo " ******** Emb: 4bit group-wise quantized *******"
163
163
echo " ***********************************************"
164
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
165
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
164
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
165
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
166
166
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
167
167
168
168
if [ " ${EXCLUDE_INT8_QUANT:- false} " == false ]; then
169
169
echo " ******************************************"
170
170
echo " ******* INT8 channel-wise quantized ******"
171
171
echo " ******************************************"
172
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
173
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
172
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
173
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
174
174
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
175
175
176
176
echo " ******************************************"
177
177
echo " ******** INT8 group-wise quantized *******"
178
178
echo " ******************************************"
179
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
180
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
179
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
180
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
181
181
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
182
182
fi
183
183
echo " ******************************************"
184
184
echo " ******** INT4 group-wise quantized *******"
185
185
echo " ******************************************"
186
186
if [[ " $TARGET_DEVICE " != " cuda" || " $DTYPE " == " bfloat16" ]]; then
187
187
# For CUDA, only bfloat16 makes sense for int4 mm kernel
188
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
189
- python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
188
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " || exit 1
189
+ python3 -W ignore torchchat.py generate --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
190
190
.ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
191
191
fi
192
192
done
@@ -285,8 +285,8 @@ function eval_model_sanity_check() {
285
285
echo " ******** INT4 group-wise quantized (AOTI) *******"
286
286
echo " *************************************************"
287
287
if [ " $DTYPE " != " float16" ]; then
288
- python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --output-dso- path ${MODEL_DIR} /${MODEL_NAME} .so --dynamic-shapes --device " $TARGET_DEVICE " || exit 1
289
- python3 -W ignore torchchat.py eval --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --dso- path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /output_eval_aoti" || exit 1
288
+ python3 -W ignore torchchat.py export --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --output-aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --dynamic-shapes --device " $TARGET_DEVICE " || exit 1
289
+ python3 -W ignore torchchat.py eval --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --aoti-package- path ${MODEL_DIR} /${MODEL_NAME} .pt2 --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /output_eval_aoti" || exit 1
290
290
cat " $MODEL_DIR /output_eval_aoti"
291
291
fi ;
292
292
fi ;
0 commit comments