@@ -92,13 +92,16 @@ function generate_compiled_model_output() {
92
92
python3 -W ignore generate.py --dtype ${DTYPE} --compile --quant ' {"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_compiled" || exit 1
93
93
.ci/scripts/check_gibberish " $MODEL_DIR /output_compiled"
94
94
95
- echo " ******************************************"
96
- echo " ******** INT4 group-wise quantized *******"
97
- echo " ******************************************"
98
- python3 -W ignore generate.py --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_eager" || exit 1
99
- .ci/scripts/check_gibberish " $MODEL_DIR /output_eager"
100
- python3 -W ignore generate.py --dtype ${DTYPE} --compile --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_compiled" || exit 1
101
- .ci/scripts/check_gibberish " $MODEL_DIR /output_compiled"
95
+ if [[ $TARGET_DEVICE != " cuda" || " $DTYPE " == " bfloat16" ]]; then
96
+ # For CUDA, only bfloat16 makes sense for int4 mm kernel
97
+ echo " ******************************************"
98
+ echo " ******** INT4 group-wise quantized *******"
99
+ echo " ******************************************"
100
+ python3 -W ignore generate.py --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_eager" || exit 1
101
+ .ci/scripts/check_gibberish " $MODEL_DIR /output_eager"
102
+ python3 -W ignore generate.py --dtype ${DTYPE} --compile --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --device " $TARGET_DEVICE " > " $MODEL_DIR /output_compiled" || exit 1
103
+ .ci/scripts/check_gibberish " $MODEL_DIR /output_compiled"
104
+ fi
102
105
fi
103
106
done
104
107
}
@@ -180,12 +183,11 @@ function generate_aoti_model_output() {
180
183
echo " ******************************************"
181
184
echo " ******** INT4 group-wise quantized *******"
182
185
echo " ******************************************"
183
- if [ " $TARGET_DEVICE " == " cuda" ]; then
184
- if [ " $DTYPE " != " float16" ]; then
185
- python3 -W ignore export.py --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
186
- python3 -W ignore generate.py --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
187
- .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
188
- fi
186
+ if [[ " $TARGET_DEVICE " != " cuda" || " $DTYPE " == " bfloat16" ]]; then
187
+ # For CUDA, only bfloat16 makes sense for int4 mm kernel
188
+ python3 -W ignore export.py --dtype ${DTYPE} --quant ' {"linear:int4" : {"groupsize": 32}}' --checkpoint-path " $CHECKPOINT_PATH " --output-dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
189
+ python3 -W ignore generate.py --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " > " $MODEL_DIR /output_aoti" || exit 1
190
+ .ci/scripts/check_gibberish " $MODEL_DIR /output_aoti"
189
191
fi
190
192
done
191
193
}
@@ -225,21 +227,23 @@ function eval_model() {
225
227
echo " perplexity checking succeeded for non-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE "
226
228
fi ;
227
229
228
- echo " ******************************************"
229
- echo " ******** INT4 group-wise quantized *******"
230
- echo " ******************************************"
230
+ if [[ " $TARGET_DEVICE " != " cuda" || " $DTYPE " == " bfloat16" ]]; then
231
+ echo " ******************************************"
232
+ echo " ******** INT4 group-wise quantized *******"
233
+ echo " ******************************************"
231
234
232
- export QUANT_OPTIONS=' {"linear:int4" : {"groupsize": 32}}'
233
- python -W ignore eval.py --compile --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " > " $MODEL_DIR /eval" || exit 1
234
- cat " $MODEL_DIR /eval"
235
- export REF_PERPLEXITY=100000
236
- export PERPLEXITY=cat " $MODEL_DIR /eval" | tail -n 1 log | awk -F ' [, ]' ' {print $4}'
237
- # == 1 meaning the check succeeded
238
- if [ " $( echo " $PERPLEXITY >= $REF_PERPLEXITY " | bc) " == 1]; then
239
- echo " perplexity checking failed for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS "
240
- else
241
- echo " perplexity checking succeeded for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS "
242
- fi ;
235
+ export QUANT_OPTIONS=' {"linear:int4" : {"groupsize": 32}}'
236
+ python -W ignore eval.py --compile --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " > " $MODEL_DIR /eval" || exit 1
237
+ cat " $MODEL_DIR /eval"
238
+ export REF_PERPLEXITY=100000
239
+ export PERPLEXITY=cat " $MODEL_DIR /eval" | tail -n 1 log | awk -F ' [, ]' ' {print $4}'
240
+ # == 1 meaning the check succeeded
241
+ if [ " $( echo " $PERPLEXITY >= $REF_PERPLEXITY " | bc) " == 1]; then
242
+ echo " perplexity checking failed for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS "
243
+ else
244
+ echo " perplexity checking succeeded for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS "
245
+ fi ;
246
+ fi
243
247
244
248
done
245
249
}
@@ -260,32 +264,31 @@ function eval_model_sanity_check() {
260
264
python -W ignore eval.py --compile --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /eval" || exit 1
261
265
cat " $MODEL_DIR /eval"
262
266
263
- echo " ******************************************"
264
- echo " ******** INT4 group-wise quantized *******"
265
- echo " ******************************************"
267
+ if [[ " $TARGET_DEVICE " != " cuda" || " $DTYPE " == " bfloat16" ]]; then
268
+ echo " ******************************************"
269
+ echo " ******** INT4 group-wise quantized *******"
270
+ echo " ******************************************"
266
271
267
- export QUANT_OPTIONS=' {"linear:int4" : {"groupsize": 32}}'
268
- python -W ignore eval.py --compile --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /eval" || exit 1
269
- cat " $MODEL_DIR /eval"
272
+ export QUANT_OPTIONS=' {"linear:int4" : {"groupsize": 32}}'
273
+ python -W ignore eval.py --compile --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /eval" || exit 1
274
+ cat " $MODEL_DIR /eval"
270
275
271
- echo " **************************************************"
272
- echo " ******** INT4 group-wise quantized (eager) *******"
273
- echo " **************************************************"
276
+ echo " **************************************************"
277
+ echo " ******** INT4 group-wise quantized (eager) *******"
278
+ echo " **************************************************"
274
279
275
- if [ " $TARGET_DEVICE " == " cuda" ] && [ " $DTYPE " != " float16" ]; then
276
280
python -W ignore eval.py --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /eval_eager" || exit 1
277
281
cat " $MODEL_DIR /eval_eager"
278
- fi ;
279
282
280
-
281
- # there is some issues with AOTI cpu and cuda, need to fix and enable the test for cuda as well
282
- echo " ****************************************** *******"
283
- echo " ******** INT4 group-wise quantized (AOTI) *******"
284
- echo " ************************************************* "
285
- if [ " $ DTYPE" != " float16 " ] ; then
286
- python3 -W ignore export .py --dtype ${DTYPE} --quant " $QUANT_OPTIONS " -- checkpoint-path " $CHECKPOINT_PATH " --output- dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
287
- python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --dso-path ${ MODEL_DIR} / ${MODEL_NAME} .so --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR / output_eval_aoti" || exit 1
288
- cat " $MODEL_DIR /output_eval_aoti "
283
+ # there is some issues with AOTI cpu and cuda, need to fix and enable the test for cuda as well
284
+ echo " ************************************************* "
285
+ echo " ******** INT4 group-wise quantized (AOTI) *******"
286
+ echo " ****************************************** *******"
287
+ if [ " $DTYPE " != " float16 " ] ; then
288
+ python3 -W ignore export.py --dtype ${ DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --output-dso-path ${MODEL_DIR} / ${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
289
+ python3 -W ignore eval .py --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /output_eval_aoti " || exit 1
290
+ cat " $ MODEL_DIR/ output_eval_aoti"
291
+ fi ;
289
292
fi ;
290
293
291
294
done
0 commit comments