@@ -266,6 +266,12 @@ jobs:
266
266
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float32"
267
267
echo "::endgroup::"
268
268
269
+ echo "::group::Run inference with quantize file"
270
+ if [ $(uname -s) != Darwin ]; then
271
+ python3 generate.py --quantize config/data/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
272
+ fi
273
+ echo "::endgroup::"
274
+
269
275
test-gpu-aoti-float16 :
270
276
uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
271
277
name : test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
@@ -307,6 +313,13 @@ jobs:
307
313
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float16"
308
314
echo "::endgroup::"
309
315
316
+ echo "::group::Run inference with quantize file"
317
+ if [ $(uname -s) == Darwin ]; then
318
+ python3 export.py --output-dso-path /tmp/model.so --quantize config/data/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
319
+ python3 generate.py --dso-path /tmp/model.so --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
320
+ fi
321
+ echo "::endgroup::"
322
+
310
323
test-gpu-eval-sanity-check :
311
324
uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
312
325
name : test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
@@ -428,9 +441,20 @@ jobs:
428
441
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
429
442
export MODEL_NAME=stories15M
430
443
export MODEL_DIR=/tmp
444
+
445
+ echo "******************************************"
446
+ echo "*** vanilla ***"
447
+ echo "******************************************"
431
448
python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
432
449
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
433
450
451
+ echo "******************************************"
452
+ echo "*** --quantize config/data/mobile.json ***"
453
+ echo "******************************************"
454
+ # python export.py --quantize config/data/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
455
+ # python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
456
+
457
+
434
458
echo "******************************************"
435
459
echo "******* Emb: channel-wise quantized ******"
436
460
echo "******************************************"
0 commit comments