@@ -216,8 +216,8 @@ function eval_model() {
216
216
python -W ignore eval.py --compile --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " > " $MODEL_DIR /eval" || exit 1
217
217
cat " $MODEL_DIR /eval"
218
218
# extract perplexity number and compare with a constant
219
- local REF_PERPLEXITY=100000
220
- PERPLEXITY=cat " $MODEL_DIR /eval" | tail -n 1 log | awk -F ' [, ]' ' {print $4}'
219
+ export REF_PERPLEXITY=100000
220
+ export PERPLEXITY=cat " $MODEL_DIR /eval" | tail -n 1 log | awk -F ' [, ]' ' {print $4}'
221
221
# == 1 meaning the check succeeded
222
222
if [ " $( echo " $PERPLEXITY >= $REF_PERPLEXITY " | bc) " == 1]; then
223
223
echo " perplexity checking failed for non-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE "
@@ -229,17 +229,64 @@ function eval_model() {
229
229
echo " ******** INT4 group-wise quantized *******"
230
230
echo " ******************************************"
231
231
232
- QUANT_OPTIONS=' {"linear:int4" : {"groupsize": 32}}'
233
- python -W ignore eval.py --compile --dtype ${DTYPE} --quant $QUANT_OPTIONS --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " > " $MODEL_DIR /eval" || exit 1
232
+ export QUANT_OPTIONS=' {"linear:int4" : {"groupsize": 32}}'
233
+ python -W ignore eval.py --compile --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " > " $MODEL_DIR /eval" || exit 1
234
234
cat " $MODEL_DIR /eval"
235
- local REF_PERPLEXITY=100000
236
- PERPLEXITY=cat " $MODEL_DIR /eval" | tail -n 1 log | awk -F ' [, ]' ' {print $4}'
235
+ export REF_PERPLEXITY=100000
236
+ export PERPLEXITY=cat " $MODEL_DIR /eval" | tail -n 1 log | awk -F ' [, ]' ' {print $4}'
237
237
# == 1 meaning the check succeeded
238
238
if [ " $( echo " $PERPLEXITY >= $REF_PERPLEXITY " | bc) " == 1]; then
239
239
echo " perplexity checking failed for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS "
240
240
else
241
241
echo " perplexity checking succeeded for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS "
242
242
fi ;
243
+
244
+ done
245
+ }
246
+
247
+ function eval_model_sanity_check() {
248
+ local CHECKPOINT_PATH=" $1 "
249
+ local TARGET_DEVICE=" ${2:- cpu} "
250
+ local MODEL_DIR=" ${CHECKPOINT_PATH%/* } "
251
+ local MODEL_NAME=$( basename " $CHECKPOINT_PATH " | sed ' s/\.[^.]*$//' )
252
+
253
+ for DTYPE in float32 bfloat16 float16; do
254
+ echo " " # ############## Run eval with torch.compile for dtype $DTYPE "###############"
255
+ echo " "
256
+ echo " ******************************************"
257
+ echo " ************** non-quantized *************"
258
+ echo " ******************************************"
259
+ python -W ignore eval.py --compile --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /eval" || exit 1
260
+ cat " $MODEL_DIR /eval"
261
+
262
+ echo " ******************************************"
263
+ echo " ******** INT4 group-wise quantized *******"
264
+ echo " ******************************************"
265
+
266
+ export QUANT_OPTIONS=' {"linear:int4" : {"groupsize": 32}}'
267
+ python -W ignore eval.py --compile --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /eval" || exit 1
268
+ cat " $MODEL_DIR /eval"
269
+
270
+ echo " **************************************************"
271
+ echo " ******** INT4 group-wise quantized (eager) *******"
272
+ echo " **************************************************"
273
+
274
+ if [ " $TARGET_DEVICE " == " cuda" ] && [ " $DTYPE " != " float16" ]; then
275
+ python -W ignore eval.py --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /eval_eager" || exit 1
276
+ cat " $MODEL_DIR /eval_eager"
277
+ fi ;
278
+
279
+
280
+ # there is some issues with AOTI cpu and cuda, need to fix and enable the test for cuda as well
281
+ echo " *************************************************"
282
+ echo " ******** INT4 group-wise quantized (AOTI) *******"
283
+ echo " *************************************************"
284
+ if [ " $DTYPE " != " float16" ]; then
285
+ python3 -W ignore export.py --dtype ${DTYPE} --quant " $QUANT_OPTIONS " --checkpoint-path " $CHECKPOINT_PATH " --output-dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " || exit 1
286
+ python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path " $CHECKPOINT_PATH " --temperature 0 --dso-path ${MODEL_DIR} /${MODEL_NAME} .so --device " $TARGET_DEVICE " --limit 5 > " $MODEL_DIR /output_eval_aoti" || exit 1
287
+ cat " $MODEL_DIR /output_eval_aoti"
288
+ fi ;
289
+
243
290
done
244
291
}
245
292
@@ -263,6 +310,10 @@ function run_eval(){
263
310
eval_model " $CHECKPOINT_PATH " " $TARGET_DEVICE " || exit 1
264
311
}
265
312
313
+ function run_eval_sanity_check(){
314
+ eval_model_sanity_check " $CHECKPOINT_PATH " " $TARGET_DEVICE " || exit 1
315
+ }
316
+
266
317
CHECKPOINT_PATH=" $1 "
267
318
TARGET_DEVICE=" ${2:- cpu} "
268
319
PROMPT=" Hello, my name is"
@@ -284,6 +335,9 @@ if [ "$#" -gt 2 ]; then
284
335
" eval" )
285
336
run_eval || exit 1
286
337
;;
338
+ " eval_sanity_check" )
339
+ run_eval_sanity_check || exit 1
340
+ ;;
287
341
* )
288
342
echo " Unknown argument: $arg " >&2
289
343
exit 1
0 commit comments