Skip to content

Commit a74cd78

Browse files
jerryzh168malfet
authored andcommitted
Add eval script for pull.yml (#429)
Summary: att Test Plan: OSS CI Reviewers: Subscribers: Tasks: Tags:
1 parent 770f70c commit a74cd78

File tree

3 files changed

+138
-7
lines changed

3 files changed

+138
-7
lines changed

.ci/scripts/validate.sh

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,8 @@ function eval_model() {
216216
python -W ignore eval.py --compile --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --device "$TARGET_DEVICE" > "$MODEL_DIR/eval" || exit 1
217217
cat "$MODEL_DIR/eval"
218218
# extract perplexity number and compare with a constant
219-
local REF_PERPLEXITY=100000
220-
PERPLEXITY=cat "$MODEL_DIR/eval" | tail -n 1 log | awk -F '[, ]' '{print $4}'
219+
export REF_PERPLEXITY=100000
220+
export PERPLEXITY=cat "$MODEL_DIR/eval" | tail -n 1 log | awk -F '[, ]' '{print $4}'
221221
# == 1 meaning the check succeeded
222222
if [ "$(echo "$PERPLEXITY >= $REF_PERPLEXITY" | bc)" == 1]; then
223223
echo "perplexity checking failed for non-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE"
@@ -229,17 +229,64 @@ function eval_model() {
229229
echo "******** INT4 group-wise quantized *******"
230230
echo "******************************************"
231231

232-
QUANT_OPTIONS='{"linear:int4" : {"groupsize": 32}}'
233-
python -W ignore eval.py --compile --dtype ${DTYPE} --quant $QUANT_OPTIONS --checkpoint-path "$CHECKPOINT_PATH" --device "$TARGET_DEVICE" > "$MODEL_DIR/eval" || exit 1
232+
export QUANT_OPTIONS='{"linear:int4" : {"groupsize": 32}}'
233+
python -W ignore eval.py --compile --dtype ${DTYPE} --quant "$QUANT_OPTIONS" --checkpoint-path "$CHECKPOINT_PATH" --device "$TARGET_DEVICE" > "$MODEL_DIR/eval" || exit 1
234234
cat "$MODEL_DIR/eval"
235-
local REF_PERPLEXITY=100000
236-
PERPLEXITY=cat "$MODEL_DIR/eval" | tail -n 1 log | awk -F '[, ]' '{print $4}'
235+
export REF_PERPLEXITY=100000
236+
export PERPLEXITY=cat "$MODEL_DIR/eval" | tail -n 1 log | awk -F '[, ]' '{print $4}'
237237
# == 1 meaning the check succeeded
238238
if [ "$(echo "$PERPLEXITY >= $REF_PERPLEXITY" | bc)" == 1]; then
239239
echo "perplexity checking failed for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS"
240240
else
241241
echo "perplexity checking succeeded for int4-quantized model $MODEL_NAME with $DTYPE $TARGET_DEVICE $QUANT_OPTIONS"
242242
fi;
243+
244+
done
245+
}
246+
247+
function eval_model_sanity_check() {
248+
local CHECKPOINT_PATH="$1"
249+
local TARGET_DEVICE="${2:-cpu}"
250+
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
251+
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
252+
253+
for DTYPE in float32 bfloat16 float16; do
254+
echo ""############### Run eval with torch.compile for dtype $DTYPE "###############"
255+
echo ""
256+
echo "******************************************"
257+
echo "************** non-quantized *************"
258+
echo "******************************************"
259+
python -W ignore eval.py --compile --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/eval" || exit 1
260+
cat "$MODEL_DIR/eval"
261+
262+
echo "******************************************"
263+
echo "******** INT4 group-wise quantized *******"
264+
echo "******************************************"
265+
266+
export QUANT_OPTIONS='{"linear:int4" : {"groupsize": 32}}'
267+
python -W ignore eval.py --compile --dtype ${DTYPE} --quant "$QUANT_OPTIONS" --checkpoint-path "$CHECKPOINT_PATH" --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/eval" || exit 1
268+
cat "$MODEL_DIR/eval"
269+
270+
echo "**************************************************"
271+
echo "******** INT4 group-wise quantized (eager) *******"
272+
echo "**************************************************"
273+
274+
if [ "$TARGET_DEVICE" == "cuda" ] && [ "$DTYPE" != "float16" ]; then
275+
python -W ignore eval.py --dtype ${DTYPE} --quant "$QUANT_OPTIONS" --checkpoint-path "$CHECKPOINT_PATH" --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/eval_eager" || exit 1
276+
cat "$MODEL_DIR/eval_eager"
277+
fi;
278+
279+
280+
# there is some issues with AOTI cpu and cuda, need to fix and enable the test for cuda as well
281+
echo "*************************************************"
282+
echo "******** INT4 group-wise quantized (AOTI) *******"
283+
echo "*************************************************"
284+
if [ "$DTYPE" != "float16" ]; then
285+
python3 -W ignore export.py --dtype ${DTYPE} --quant "$QUANT_OPTIONS" --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
286+
python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/output_eval_aoti" || exit 1
287+
cat "$MODEL_DIR/output_eval_aoti"
288+
fi;
289+
243290
done
244291
}
245292

@@ -263,6 +310,10 @@ function run_eval(){
263310
eval_model "$CHECKPOINT_PATH" "$TARGET_DEVICE" || exit 1
264311
}
265312

313+
function run_eval_sanity_check(){
314+
eval_model_sanity_check "$CHECKPOINT_PATH" "$TARGET_DEVICE" || exit 1
315+
}
316+
266317
CHECKPOINT_PATH="$1"
267318
TARGET_DEVICE="${2:-cpu}"
268319
PROMPT="Hello, my name is"
@@ -284,6 +335,9 @@ if [ "$#" -gt 2 ]; then
284335
"eval")
285336
run_eval || exit 1
286337
;;
338+
"eval_sanity_check")
339+
run_eval_sanity_check || exit 1
340+
;;
287341
*)
288342
echo "Unknown argument: $arg" >&2
289343
exit 1

.github/workflows/pull.yml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ jobs:
5959
pushd ${TORCHCHAT_ROOT}
6060
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
6161
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "compile"
62+
6263
test-cpu-aoti:
6364
name: test-cpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
6465
needs: gather-models-cpu
@@ -93,6 +94,43 @@ jobs:
9394
pushd ${TORCHCHAT_ROOT}
9495
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
9596
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "aoti"
97+
98+
test-cpu-eval-sanity-check:
99+
name: test-cpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
100+
needs: gather-models-cpu
101+
strategy:
102+
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
103+
fail-fast: false
104+
runs-on: ${{ matrix.runner }}
105+
env:
106+
TORCHCHAT_ROOT: ${{ github.workspace }}
107+
REPO_NAME: ${{ matrix.repo_name }}
108+
steps:
109+
- name: Checkout repo
110+
uses: actions/checkout@v3
111+
- name: Setup Python
112+
uses: actions/setup-python@v4
113+
with:
114+
python-version: '3.11'
115+
- name: Print machine info
116+
run: |
117+
echo "$(uname -a)"
118+
- name: Install dependencies
119+
run: |
120+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
121+
pip install -r requirements.txt
122+
pip list
123+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
124+
- name: Download checkpoints
125+
run: |
126+
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
127+
- name: Run validation
128+
run: |
129+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
130+
pushd ${TORCHCHAT_ROOT}
131+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
132+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check"
133+
96134
gather-models-gpu:
97135
runs-on: ubuntu-22.04
98136
outputs:
@@ -144,6 +182,7 @@ jobs:
144182
echo "::group::Run inference"
145183
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
146184
echo "::endgroup::"
185+
147186
test-gpu-aoti:
148187
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
149188
name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
@@ -179,6 +218,43 @@ jobs:
179218
echo "::group::Run inference"
180219
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
181220
echo "::endgroup::"
221+
222+
test-gpu-eval-sanity-check:
223+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
224+
name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
225+
needs: gather-models-gpu
226+
strategy:
227+
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
228+
fail-fast: false
229+
with:
230+
runner: linux.g5.4xlarge.nvidia.gpu
231+
gpu-arch-type: cuda
232+
gpu-arch-version: "12.1"
233+
script: |
234+
echo "::group::Print machine info"
235+
nvidia-smi
236+
echo "::endgroup::"
237+
238+
echo "::group::Install required packages"
239+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
240+
pip install -r ./requirements.txt
241+
pip list
242+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
243+
echo "::endgroup::"
244+
245+
echo "::group::Download checkpoint"
246+
export REPO_NAME=${{ matrix.repo_name }}
247+
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
248+
echo "::endgroup::"
249+
250+
echo "::group::Convert checkpoint"
251+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
252+
echo "::endgroup::"
253+
254+
echo "::group::Run eval"
255+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "eval_sanity_check"
256+
echo "::endgroup::"
257+
182258
test-tinystories-executorch:
183259
strategy:
184260
matrix:

build/builder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,8 @@ def _initialize_model(
405405
print(f"Time to quantize model: {time.time() - t0q:.02f} seconds")
406406

407407
if builder_args.setup_caches:
408-
max_seq_length = 350
408+
# TODO: get this from args?
409+
max_seq_length = 2048
409410
with torch.device(builder_args.device):
410411
model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)
411412

0 commit comments

Comments
 (0)