|
59 | 59 | pushd ${TORCHCHAT_ROOT}
|
60 | 60 | bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
|
61 | 61 | bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "compile"
|
| 62 | + bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval" |
62 | 63 | test-cpu-aoti:
|
63 | 64 | name: test-cpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
|
64 | 65 | needs: gather-models-cpu
|
|
93 | 94 | pushd ${TORCHCHAT_ROOT}
|
94 | 95 | bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
|
95 | 96 | bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "aoti"
|
| 97 | +
|
| 98 | + test-cpu-eval: |
| 99 | + name: test-cpu-eval (${{ matrix.platform }}, ${{ matrix.model_name }}) |
| 100 | + needs: gather-models-cpu |
| 101 | + strategy: |
| 102 | + matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }} |
| 103 | + fail-fast: false |
| 104 | + runs-on: ${{ matrix.runner }} |
| 105 | + env: |
| 106 | + TORCHCHAT_ROOT: ${{ github.workspace }} |
| 107 | + REPO_NAME: ${{ matrix.repo_name }} |
| 108 | + steps: |
| 109 | + - name: Checkout repo |
| 110 | + uses: actions/checkout@v3 |
| 111 | + - name: Setup Python |
| 112 | + uses: actions/setup-python@v4 |
| 113 | + with: |
| 114 | + python-version: '3.11' |
| 115 | + - name: Print machine info |
| 116 | + run: | |
| 117 | + echo "$(uname -a)" |
| 118 | + - name: Install dependencies |
| 119 | + run: | |
| 120 | + pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu |
| 121 | + pip install -r requirements.txt |
| 122 | + pip list |
| 123 | + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' |
| 124 | + - name: Download checkpoints |
| 125 | + run: | |
| 126 | + bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}" |
| 127 | + - name: Run validation |
| 128 | + run: | |
| 129 | + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' |
| 130 | + pushd ${TORCHCHAT_ROOT} |
| 131 | + bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME} |
| 132 | + bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval" |
| 133 | +
|
96 | 134 | gather-models-gpu:
|
97 | 135 | runs-on: ubuntu-22.04
|
98 | 136 | outputs:
|
@@ -144,6 +182,7 @@ jobs:
|
144 | 182 | echo "::group::Run inference"
|
145 | 183 | bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
|
146 | 184 | echo "::endgroup::"
|
| 185 | +
|
147 | 186 | test-gpu-aoti:
|
148 | 187 | uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
|
149 | 188 | name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
|
@@ -179,6 +218,43 @@ jobs:
|
179 | 218 | echo "::group::Run inference"
|
180 | 219 | bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
|
181 | 220 | echo "::endgroup::"
|
| 221 | +
|
| 222 | + test-gpu-eval: |
| 223 | + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main |
| 224 | + name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }}) |
| 225 | + needs: gather-models-gpu |
| 226 | + strategy: |
| 227 | + matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }} |
| 228 | + fail-fast: false |
| 229 | + with: |
| 230 | + runner: linux.g5.4xlarge.nvidia.gpu |
| 231 | + gpu-arch-type: cuda |
| 232 | + gpu-arch-version: "12.1" |
| 233 | + script: | |
| 234 | + echo "::group::Print machine info" |
| 235 | + nvidia-smi |
| 236 | + echo "::endgroup::" |
| 237 | +
|
| 238 | + echo "::group::Install required packages" |
| 239 | + pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 |
| 240 | + pip install -r ./requirements.txt |
| 241 | + pip list |
| 242 | + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' |
| 243 | + echo "::endgroup::" |
| 244 | +
|
| 245 | + echo "::group::Download checkpoint" |
| 246 | + export REPO_NAME=${{ matrix.repo_name }} |
| 247 | + bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }} |
| 248 | + echo "::endgroup::" |
| 249 | +
|
| 250 | + echo "::group::Convert checkpoint" |
| 251 | + bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME} |
| 252 | + echo "::endgroup::" |
| 253 | +
|
| 254 | + echo "::group::Run eval" |
| 255 | + bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "eval" |
| 256 | + echo "::endgroup::" |
| 257 | +
|
182 | 258 | test-tinystories-executorch:
|
183 | 259 | strategy:
|
184 | 260 | matrix:
|
|
0 commit comments