Skip to content

Commit e9d64a6

Browse files
committed
Add eval script for pull.yml
Summary: att Test Plan: OSS CI Reviewers: Subscribers: Tasks: Tags:
1 parent 8d121ae commit e9d64a6

File tree

2 files changed

+97
-0
lines changed

2 files changed

+97
-0
lines changed

.ci/scripts/validate.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,27 @@ function eval_model() {
240240
else
241241
echo "perplexity checking succeeded for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS"
242242
fi;
243+
244+
echo "**************************************************"
245+
echo "******** INT4 group-wise quantized (eager) *******"
246+
echo "**************************************************"
247+
248+
if [ "$TARGET_DEVICE" == "cuda" ] && [ "$DTYPE" != "float16" ]; then
249+
QUANT_OPTIONS='{"linear:int4" : {"groupsize": 32}}'
250+
python -W ignore eval.py --dtype ${DTYPE} --quant $QUANT_OPTIONS --checkpoint-path "$CHECKPOINT_PATH" --device "$TARGET_DEVICE" > "$MODEL_DIR/eval_eager" || exit 1
251+
cat "$MODEL_DIR/eval_eager"
252+
fi;
253+
254+
255+
echo "*************************************************"
256+
echo "******** INT4 group-wise quantized (AOTI) *******"
257+
echo "*************************************************"
258+
if [ "$TARGET_DEVICE" == "cuda" ] && [ "$DTYPE" != "float16" ]; then
259+
python3 -W ignore export.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
260+
python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eval_aoti" || exit 1
261+
cat "$MODEL_DIR/output_eval_aoti"
262+
fi;
263+
243264
done
244265
}
245266

.github/workflows/pull.yml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ jobs:
5959
pushd ${TORCHCHAT_ROOT}
6060
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
6161
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "compile"
62+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval"
6263
test-cpu-aoti:
6364
name: test-cpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
6465
needs: gather-models-cpu
@@ -93,6 +94,43 @@ jobs:
9394
pushd ${TORCHCHAT_ROOT}
9495
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
9596
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "aoti"
97+
98+
test-cpu-eval:
99+
name: test-cpu-eval (${{ matrix.platform }}, ${{ matrix.model_name }})
100+
needs: gather-models-cpu
101+
strategy:
102+
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
103+
fail-fast: false
104+
runs-on: ${{ matrix.runner }}
105+
env:
106+
TORCHCHAT_ROOT: ${{ github.workspace }}
107+
REPO_NAME: ${{ matrix.repo_name }}
108+
steps:
109+
- name: Checkout repo
110+
uses: actions/checkout@v3
111+
- name: Setup Python
112+
uses: actions/setup-python@v4
113+
with:
114+
python-version: '3.11'
115+
- name: Print machine info
116+
run: |
117+
echo "$(uname -a)"
118+
- name: Install dependencies
119+
run: |
120+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
121+
pip install -r requirements.txt
122+
pip list
123+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
124+
- name: Download checkpoints
125+
run: |
126+
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
127+
- name: Run validation
128+
run: |
129+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
130+
pushd ${TORCHCHAT_ROOT}
131+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
132+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval"
133+
96134
gather-models-gpu:
97135
runs-on: ubuntu-22.04
98136
outputs:
@@ -144,6 +182,7 @@ jobs:
144182
echo "::group::Run inference"
145183
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
146184
echo "::endgroup::"
185+
147186
test-gpu-aoti:
148187
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
149188
name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
@@ -179,6 +218,43 @@ jobs:
179218
echo "::group::Run inference"
180219
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
181220
echo "::endgroup::"
221+
222+
test-gpu-eval:
223+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
224+
name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
225+
needs: gather-models-gpu
226+
strategy:
227+
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
228+
fail-fast: false
229+
with:
230+
runner: linux.g5.4xlarge.nvidia.gpu
231+
gpu-arch-type: cuda
232+
gpu-arch-version: "12.1"
233+
script: |
234+
echo "::group::Print machine info"
235+
nvidia-smi
236+
echo "::endgroup::"
237+
238+
echo "::group::Install required packages"
239+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
240+
pip install -r ./requirements.txt
241+
pip list
242+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
243+
echo "::endgroup::"
244+
245+
echo "::group::Download checkpoint"
246+
export REPO_NAME=${{ matrix.repo_name }}
247+
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
248+
echo "::endgroup::"
249+
250+
echo "::group::Convert checkpoint"
251+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
252+
echo "::endgroup::"
253+
254+
echo "::group::Run eval"
255+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "eval"
256+
echo "::endgroup::"
257+
182258
test-tinystories-executorch:
183259
strategy:
184260
matrix:

0 commit comments

Comments
 (0)