@@ -183,7 +183,7 @@ jobs:
183
183
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
184
184
echo "::endgroup::"
185
185
186
- test-gpu-aoti :
186
+ test-gpu-aoti-bfloat16 :
187
187
uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
188
188
name : test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
189
189
needs : gather-models-gpu
@@ -216,7 +216,79 @@ jobs:
216
216
echo "::endgroup::"
217
217
218
218
echo "::group::Run inference"
219
- bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
219
+ bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti" "bfloat16"
220
+ echo "::endgroup::"
221
+
222
+ test-gpu-aoti-float32 :
223
+ uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
224
+ name : test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
225
+ needs : gather-models-gpu
226
+ strategy :
227
+ matrix : ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
228
+ fail-fast : false
229
+ with :
230
+ runner : linux.g5.4xlarge.nvidia.gpu
231
+ gpu-arch-type : cuda
232
+ gpu-arch-version : " 12.1"
233
+ script : |
234
+ echo "::group::Print machine info"
235
+ nvidia-smi
236
+ echo "::endgroup::"
237
+
238
+ echo "::group::Install required packages"
239
+ pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
240
+ pip install -r ./requirements.txt
241
+ pip list
242
+ python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
243
+ echo "::endgroup::"
244
+
245
+ echo "::group::Download checkpoint"
246
+ export REPO_NAME=${{ matrix.repo_name }}
247
+ bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
248
+ echo "::endgroup::"
249
+
250
+ echo "::group::Convert checkpoint"
251
+ bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
252
+ echo "::endgroup::"
253
+
254
+ echo "::group::Run inference"
255
+ bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti" "float32"
256
+ echo "::endgroup::"
257
+
258
+ test-gpu-aoti-float16 :
259
+ uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
260
+ name : test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
261
+ needs : gather-models-gpu
262
+ strategy :
263
+ matrix : ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
264
+ fail-fast : false
265
+ with :
266
+ runner : linux.g5.4xlarge.nvidia.gpu
267
+ gpu-arch-type : cuda
268
+ gpu-arch-version : " 12.1"
269
+ script : |
270
+ echo "::group::Print machine info"
271
+ nvidia-smi
272
+ echo "::endgroup::"
273
+
274
+ echo "::group::Install required packages"
275
+ pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
276
+ pip install -r ./requirements.txt
277
+ pip list
278
+ python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
279
+ echo "::endgroup::"
280
+
281
+ echo "::group::Download checkpoint"
282
+ export REPO_NAME=${{ matrix.repo_name }}
283
+ bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
284
+ echo "::endgroup::"
285
+
286
+ echo "::group::Convert checkpoint"
287
+ bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
288
+ echo "::endgroup::"
289
+
290
+ echo "::group::Run inference"
291
+ bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti" "float16"
220
292
echo "::endgroup::"
221
293
222
294
test-gpu-eval-sanity-check :
@@ -749,7 +821,7 @@ jobs:
749
821
750
822
echo "Running compiled"
751
823
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
752
-
824
+
753
825
echo "******************************************"
754
826
echo "******* Emb: channel-wise quantized ******"
755
827
echo "******************************************"
0 commit comments