Skip to content

break aoti CI jobs separately #500

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 40 additions & 11 deletions .ci/scripts/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ function generate_compiled_model_output() {
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')


if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
DTYPES="bfloat16"
EXCLUDE_INT8_QUANT=true
Expand Down Expand Up @@ -74,7 +75,7 @@ function generate_compiled_model_output() {
python3 -W ignore generate.py --dtype ${DTYPE} --compile --quant '{"embedding" : {"bitwidth": 4, "groupsize": 8, "packed": "True"}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
cat "$MODEL_DIR/output_compiled"

if [ "$EXCLUDE_INT8_QUANT" = false ]; then
if [ "${EXCLUDE_INT8_QUANT:-false}" == false ]; then
echo "******************************************"
echo "******* INT8 channel-wise quantized ******"
echo "******************************************"
Expand Down Expand Up @@ -109,17 +110,24 @@ function generate_compiled_model_output() {
function generate_aoti_model_output() {
local CHECKPOINT_PATH="$1"
local TARGET_DEVICE="${2:-cpu}"
local DTYPES="${3:-default}"
local MODEL_DIR="${CHECKPOINT_PATH%/*}"
local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')

if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
DTYPES="bfloat16"
EXCLUDE_INT8_QUANT=true
else
DTYPES="float32 bfloat16 float16"
EXCLUDE_INT8_QUANT=false
echo "Local DTYPES=$DTYPES"

if [[ $DTYPES == "default" ]]; then
if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
DTYPES="bfloat16"
EXCLUDE_INT8_QUANT=true
else
DTYPES="float32 bfloat16 float16"
EXCLUDE_INT8_QUANT=false
fi
fi

echo "Local after default DTYPES=$DTYPES"

for DTYPE in $DTYPES; do
echo ""############### Run inference with AOT Inductor for dtype $DTYPE "###############"
echo ""
Expand Down Expand Up @@ -158,7 +166,7 @@ function generate_aoti_model_output() {
python3 -W ignore generate.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
cat "$MODEL_DIR/output_aoti"

if [ "$EXCLUDE_INT8_QUANT" = false ]; then
if [ "${EXCLUDE_INT8_QUANT:-false}" == false ]; then
echo "******************************************"
echo "******* INT8 channel-wise quantized ******"
echo "******************************************"
Expand Down Expand Up @@ -295,11 +303,12 @@ function run_compile() {
}

function run_aoti() {
generate_aoti_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" || exit 1
echo "Passing DTYPES=$DTYPES"
generate_aoti_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" "$DTYPES" || exit 1
}

function run_executorch() {
if [ "$TARGET_DEVICE" = "cpu" ]; then
if [ "$TARGET_DEVICE" == "cpu" ]; then
generate_executorch_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" || exit 1
else
echo "Skipped: Executorch doesn't run on ${TARGET_DEVICE}"
Expand All @@ -318,24 +327,44 @@ CHECKPOINT_PATH="$1"
TARGET_DEVICE="${2:-cpu}"
PROMPT="Hello, my name is"


if [ "$#" -gt 2 ]; then
# Additional arguments provided
for arg in "${@:3}"; do
case "$arg" in
"compile")
echo "arg:$arg"
run_compile || exit 1
;;
"aoti")
echo "arg:$arg"
DTYPES="default"
run_aoti || exit 1
;;
"aoti-bfloat16")
echo "arg:$arg"
DTYPES="bfloat16"
run_aoti || exit 1
;;
"aoti-float16")
echo "arg:$arg"
DTYPES="float16"
run_aoti || exit 1
;;
"aoti-float32")
echo "arg:$arg"
DTYPES="float32"
run_aoti || exit 1
;;
"executorch")
echo "arg:$arg"
run_executorch || exit 1
;;
"eval")
echo "arg:$arg"
run_eval || exit 1
;;
"eval_sanity_check")
echo "arg:$arg"
run_eval_sanity_check || exit 1
;;
*)
Expand Down
90 changes: 86 additions & 4 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,9 @@ jobs:
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
echo "::endgroup::"

test-gpu-aoti:
test-gpu-aoti-bfloat16:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
Expand Down Expand Up @@ -222,7 +222,89 @@ jobs:
echo "::endgroup::"

echo "::group::Run inference"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
echo "::endgroup::"

test-gpu-aoti-float32:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"

echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"

echo "::group::Install required packages"
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
pip install -r ./requirements.txt
pip list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"

echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"

echo "::group::Convert checkpoint"
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"

echo "::group::Run inference"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float32"
echo "::endgroup::"

test-gpu-aoti-float16:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"

echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"

echo "::group::Install required packages"
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
pip install -r ./requirements.txt
pip list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"

echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"

echo "::group::Convert checkpoint"
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"

echo "::group::Run inference"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float16"
echo "::endgroup::"

test-gpu-eval-sanity-check:
Expand Down Expand Up @@ -685,7 +767,7 @@ jobs:

echo "Running compiled"
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile

echo "******************************************"
echo "******* Emb: channel-wise quantized ******"
echo "******************************************"
Expand Down