init

metascroy · metascroy · commit 1a8e3b553574 · 2024-04-26T10:47:26.000-07:00
diff --git a/.ci/scripts/validate.sh b/.ci/scripts/validate.sh
@@ -25,6 +25,7 @@ function generate_compiled_model_output() {
     local MODEL_DIR="${CHECKPOINT_PATH%/*}"
     local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
 
+
     if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
         DTYPES="bfloat16"
         EXCLUDE_INT8_QUANT=true
@@ -109,15 +110,18 @@ function generate_compiled_model_output() {
 function generate_aoti_model_output() {
     local CHECKPOINT_PATH="$1"
     local TARGET_DEVICE="${2:-cpu}"
+    local DTYPES="${3-default}"
     local MODEL_DIR="${CHECKPOINT_PATH%/*}"
     local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
 
-    if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
-        DTYPES="bfloat16"
-        EXCLUDE_INT8_QUANT=true
-    else
-        DTYPES="float32 bfloat16 float16"
-        EXCLUDE_INT8_QUANT=false
+    if [[ DTYPES="default" ]]; then
+        if [[ $CHECKPOINT_PATH != *"stories"* && $TARGET_DEVICE == "cuda" ]]; then
+            DTYPES="bfloat16"
+            EXCLUDE_INT8_QUANT=true
+        else
+            DTYPES="float32 bfloat16 float16"
+            EXCLUDE_INT8_QUANT=false
+        fi
     fi
 
     for DTYPE in $DTYPES; do
@@ -295,7 +299,7 @@ function run_compile() {
 }
 
 function run_aoti() {
-    generate_aoti_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" || exit 1
+    generate_aoti_model_output "$CHECKPOINT_PATH" "$TARGET_DEVICE" "$DTYPES" || exit 1
 }
 
 function run_executorch() {
@@ -327,6 +331,7 @@ if [ "$#" -gt 2 ]; then
                 run_compile || exit 1
                 ;;
             "aoti")
+                DTYPES="${4:-default}"
                 run_aoti || exit 1
                 ;;
             "executorch")
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -183,7 +183,7 @@ jobs:
         bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
         echo "::endgroup::"
 
-  test-gpu-aoti:
+  test-gpu-aoti-bfloat16:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
     needs: gather-models-gpu
@@ -216,7 +216,79 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Run inference"
-        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
+        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti" "bfloat16"
+        echo "::endgroup::"
+
+  test-gpu-aoti-float32:
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
+    needs: gather-models-gpu
+    strategy:
+      matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
+      fail-fast: false
+    with:
+      runner: linux.g5.4xlarge.nvidia.gpu
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.1"
+      script: |
+        echo "::group::Print machine info"
+        nvidia-smi
+        echo "::endgroup::"
+
+        echo "::group::Install required packages"
+        pip install --pre torch  --index-url https://download.pytorch.org/whl/nightly/cu121
+        pip install -r ./requirements.txt
+        pip list
+        python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+        echo "::endgroup::"
+
+        echo "::group::Download checkpoint"
+        export REPO_NAME=${{ matrix.repo_name }}
+        bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
+        echo "::endgroup::"
+
+        echo "::group::Convert checkpoint"
+        bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
+        echo "::endgroup::"
+
+        echo "::group::Run inference"
+        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti" "float32"
+        echo "::endgroup::"
+
+  test-gpu-aoti-float16:
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
+    needs: gather-models-gpu
+    strategy:
+      matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
+      fail-fast: false
+    with:
+      runner: linux.g5.4xlarge.nvidia.gpu
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.1"
+      script: |
+        echo "::group::Print machine info"
+        nvidia-smi
+        echo "::endgroup::"
+
+        echo "::group::Install required packages"
+        pip install --pre torch  --index-url https://download.pytorch.org/whl/nightly/cu121
+        pip install -r ./requirements.txt
+        pip list
+        python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+        echo "::endgroup::"
+
+        echo "::group::Download checkpoint"
+        export REPO_NAME=${{ matrix.repo_name }}
+        bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
+        echo "::endgroup::"
+
+        echo "::group::Convert checkpoint"
+        bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
+        echo "::endgroup::"
+
+        echo "::group::Run inference"
+        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti" "float16"
         echo "::endgroup::"
 
   test-gpu-eval-sanity-check:
@@ -749,7 +821,7 @@ jobs:
 
           echo "Running compiled"
           python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
-          
+
           echo "******************************************"
           echo "******* Emb: channel-wise quantized ******"
           echo "******************************************"