Skip to content

Commit 3ee001e

Browse files
metascroyMichael Gschwind
authored andcommitted
break aoti CI jobs separately (#500)
* init * fixes * more fixes * fixes * fix * fix * bug fix * add objcopy update * suppress int8 * undefined variable --------- Co-authored-by: Michael Gschwind <[email protected]>
1 parent 1207e51 commit 3ee001e

File tree

2 files changed

+126
-15
lines changed

2 files changed

+126
-15
lines changed

.ci/scripts/validate.sh

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ function generate_compiled_model_output() {
2525
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
2626
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
2727

28+
2829
if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
2930
DTYPES="bfloat16"
3031
EXCLUDE_INT8_QUANT=true
@@ -74,7 +75,7 @@ function generate_compiled_model_output() {
7475
python3 -W ignore generate.py --dtype ${DTYPE} --compile --quant '{"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
7576
cat "$MODEL_DIR/output_compiled"
7677

77-
if [ "$EXCLUDE_INT8_QUANT" = false ]; then
78+
if [ "${EXCLUDE_INT8_QUANT:-false}" == false ]; then
7879
echo "******************************************"
7980
echo "******* INT8 channel-wise quantized ******"
8081
echo "******************************************"
@@ -109,17 +110,24 @@ function generate_compiled_model_output() {
109110
function generate_aoti_model_output() {
110111
local CHECKPOINT_PATH="$1"
111112
local TARGET_DEVICE="${2:-cpu}"
113+
local DTYPES="${3:-default}"
112114
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
113115
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
114116

115-
if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
116-
DTYPES="bfloat16"
117-
EXCLUDE_INT8_QUANT=true
118-
else
119-
DTYPES="float32 bfloat16 float16"
120-
EXCLUDE_INT8_QUANT=false
117+
echo "Local DTYPES=$DTYPES"
118+
119+
if [[ $DTYPES == "default" ]]; then
120+
if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
121+
DTYPES="bfloat16"
122+
EXCLUDE_INT8_QUANT=true
123+
else
124+
DTYPES="float32 bfloat16 float16"
125+
EXCLUDE_INT8_QUANT=false
126+
fi
121127
fi
122128

129+
echo "Local after default DTYPES=$DTYPES"
130+
123131
for DTYPE in $DTYPES; do
124132
echo ""############### Run inference with AOT Inductor for dtype $DTYPE "###############"
125133
echo ""
@@ -158,7 +166,7 @@ function generate_aoti_model_output() {
158166
python3 -W ignore generate.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
159167
cat "$MODEL_DIR/output_aoti"
160168

161-
if [ "$EXCLUDE_INT8_QUANT" = false ]; then
169+
if [ "${EXCLUDE_INT8_QUANT:-false}" == false ]; then
162170
echo "******************************************"
163171
echo "******* INT8 channel-wise quantized ******"
164172
echo "******************************************"
@@ -295,11 +303,12 @@ function run_compile() {
295303
}
296304

297305
function run_aoti() {
298-
generate_aoti_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" || exit 1
306+
echo "Passing DTYPES=$DTYPES"
307+
generate_aoti_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" "$DTYPES" || exit 1
299308
}
300309

301310
function run_executorch() {
302-
if [ "$TARGET_DEVICE" = "cpu" ]; then
311+
if [ "$TARGET_DEVICE" == "cpu" ]; then
303312
generate_executorch_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" || exit 1
304313
else
305314
echo "Skipped: Executorch doesn't run on ${TARGET_DEVICE}"
@@ -318,24 +327,44 @@ CHECKPOINT_PATH="$1"
318327
TARGET_DEVICE="${2:-cpu}"
319328
PROMPT="Hello, my name is"
320329

321-
322330
if [ "$#" -gt 2 ]; then
323331
# Additional arguments provided
324332
for arg in "${@:3}"; do
325333
case "$arg" in
326334
"compile")
335+
echo "arg:$arg"
327336
run_compile || exit 1
328337
;;
329338
"aoti")
339+
echo "arg:$arg"
340+
DTYPES="default"
341+
run_aoti || exit 1
342+
;;
343+
"aoti-bfloat16")
344+
echo "arg:$arg"
345+
DTYPES="bfloat16"
346+
run_aoti || exit 1
347+
;;
348+
"aoti-float16")
349+
echo "arg:$arg"
350+
DTYPES="float16"
351+
run_aoti || exit 1
352+
;;
353+
"aoti-float32")
354+
echo "arg:$arg"
355+
DTYPES="float32"
330356
run_aoti || exit 1
331357
;;
332358
"executorch")
359+
echo "arg:$arg"
333360
run_executorch || exit 1
334361
;;
335362
"eval")
363+
echo "arg:$arg"
336364
run_eval || exit 1
337365
;;
338366
"eval_sanity_check")
367+
echo "arg:$arg"
339368
run_eval_sanity_check || exit 1
340369
;;
341370
*)

.github/workflows/pull.yml

Lines changed: 86 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,9 @@ jobs:
183183
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
184184
echo "::endgroup::"
185185
186-
test-gpu-aoti:
186+
test-gpu-aoti-bfloat16:
187187
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
188-
name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
188+
name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
189189
needs: gather-models-gpu
190190
strategy:
191191
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
@@ -222,7 +222,89 @@ jobs:
222222
echo "::endgroup::"
223223
224224
echo "::group::Run inference"
225-
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
225+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
226+
echo "::endgroup::"
227+
228+
test-gpu-aoti-float32:
229+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
230+
name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
231+
needs: gather-models-gpu
232+
strategy:
233+
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
234+
fail-fast: false
235+
with:
236+
runner: linux.g5.4xlarge.nvidia.gpu
237+
gpu-arch-type: cuda
238+
gpu-arch-version: "12.1"
239+
script: |
240+
echo "::group::Print machine info"
241+
nvidia-smi
242+
echo "::endgroup::"
243+
244+
echo "::group::Install newer objcopy that supports --set-section-alignment"
245+
yum install -y devtoolset-10-binutils
246+
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
247+
echo "::endgroup::"
248+
249+
echo "::group::Install required packages"
250+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
251+
pip install -r ./requirements.txt
252+
pip list
253+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
254+
echo "::endgroup::"
255+
256+
echo "::group::Download checkpoint"
257+
export REPO_NAME=${{ matrix.repo_name }}
258+
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
259+
echo "::endgroup::"
260+
261+
echo "::group::Convert checkpoint"
262+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
263+
echo "::endgroup::"
264+
265+
echo "::group::Run inference"
266+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float32"
267+
echo "::endgroup::"
268+
269+
test-gpu-aoti-float16:
270+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
271+
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
272+
needs: gather-models-gpu
273+
strategy:
274+
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
275+
fail-fast: false
276+
with:
277+
runner: linux.g5.4xlarge.nvidia.gpu
278+
gpu-arch-type: cuda
279+
gpu-arch-version: "12.1"
280+
script: |
281+
echo "::group::Print machine info"
282+
nvidia-smi
283+
echo "::endgroup::"
284+
285+
echo "::group::Install newer objcopy that supports --set-section-alignment"
286+
yum install -y devtoolset-10-binutils
287+
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
288+
echo "::endgroup::"
289+
290+
echo "::group::Install required packages"
291+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
292+
pip install -r ./requirements.txt
293+
pip list
294+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
295+
echo "::endgroup::"
296+
297+
echo "::group::Download checkpoint"
298+
export REPO_NAME=${{ matrix.repo_name }}
299+
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
300+
echo "::endgroup::"
301+
302+
echo "::group::Convert checkpoint"
303+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
304+
echo "::endgroup::"
305+
306+
echo "::group::Run inference"
307+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float16"
226308
echo "::endgroup::"
227309
228310
test-gpu-eval-sanity-check:
@@ -685,7 +767,7 @@ jobs:
685767
686768
echo "Running compiled"
687769
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
688-
770+
689771
echo "******************************************"
690772
echo "******* Emb: channel-wise quantized ******"
691773
echo "******************************************"

0 commit comments

Comments
 (0)