Skip to content

Commit af88c63

Browse files
mikekgfbmalfet
authored andcommitted
Add tests for quantize json, add cuda device specification and precision to cuda.json (#519)
1 parent f25ef37 commit af88c63

File tree

3 files changed

+29
-0
lines changed

3 files changed

+29
-0
lines changed

.github/workflows/pull.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,12 @@ jobs:
266266
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float32"
267267
echo "::endgroup::"
268268
269+
echo "::group::Run inference with quantize file"
270+
if [ $(uname -s) != Darwin ]; then
271+
python3 generate.py --quantize config/data/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
272+
fi
273+
echo "::endgroup::"
274+
269275
test-gpu-aoti-float16:
270276
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
271277
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
@@ -307,6 +313,13 @@ jobs:
307313
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float16"
308314
echo "::endgroup::"
309315
316+
echo "::group::Run inference with quantize file"
317+
if [ $(uname -s) == Darwin ]; then
318+
python3 export.py --output-dso-path /tmp/model.so --quantize config/data/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
319+
python3 generate.py --dso-path /tmp/model.so --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
320+
fi
321+
echo "::endgroup::"
322+
310323
test-gpu-eval-sanity-check:
311324
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
312325
name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
@@ -428,9 +441,20 @@ jobs:
428441
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
429442
export MODEL_NAME=stories15M
430443
export MODEL_DIR=/tmp
444+
445+
echo "******************************************"
446+
echo "*** vanilla ***"
447+
echo "******************************************"
431448
python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
432449
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
433450
451+
echo "******************************************"
452+
echo "*** --quantize config/data/mobile.json ***"
453+
echo "******************************************"
454+
# python export.py --quantize config/data/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
455+
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
456+
457+
434458
echo "******************************************"
435459
echo "******* Emb: channel-wise quantized ******"
436460
echo "******************************************"

config/data/cuda.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
{
2+
"executor": {"accelerator": "cuda"},
3+
"precision": {"dtype": "bf16"},
24
"linear:int4": {"groupsize" : 256}
35
}

quantize.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import torch.nn.functional as F
1818
from build.utils import (
1919
find_multiple,
20+
get_device_str,
2021
get_precision,
2122
name_to_dtype,
2223
state_dict_device,
@@ -124,6 +125,8 @@ def quantized_model(self) -> nn.Module:
124125

125126
#########################################################################
126127
### wrapper for setting device as a QuantHandler ###
128+
### for onw select device for PyTorch eager and AOTI, in future ###
129+
### also use this for selecting delegate when exporting with ET ###
127130

128131

129132
class ExecutorHandler(QuantHandler):

0 commit comments

Comments
 (0)