@@ -183,9 +183,9 @@ jobs:
183
183
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
184
184
echo "::endgroup::"
185
185
186
- test-gpu-aoti :
186
+ test-gpu-aoti-bfloat16 :
187
187
uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
188
- name : test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
188
+ name : test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
189
189
needs : gather-models-gpu
190
190
strategy :
191
191
matrix : ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
@@ -222,7 +222,89 @@ jobs:
222
222
echo "::endgroup::"
223
223
224
224
echo "::group::Run inference"
225
- bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
225
+ bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
226
+ echo "::endgroup::"
227
+
228
+ test-gpu-aoti-float32 :
229
+ uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
230
+ name : test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
231
+ needs : gather-models-gpu
232
+ strategy :
233
+ matrix : ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
234
+ fail-fast : false
235
+ with :
236
+ runner : linux.g5.4xlarge.nvidia.gpu
237
+ gpu-arch-type : cuda
238
+ gpu-arch-version : " 12.1"
239
+ script : |
240
+ echo "::group::Print machine info"
241
+ nvidia-smi
242
+ echo "::endgroup::"
243
+
244
+ echo "::group::Install newer objcopy that supports --set-section-alignment"
245
+ yum install -y devtoolset-10-binutils
246
+ export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
247
+ echo "::endgroup::"
248
+
249
+ echo "::group::Install required packages"
250
+ pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
251
+ pip install -r ./requirements.txt
252
+ pip list
253
+ python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
254
+ echo "::endgroup::"
255
+
256
+ echo "::group::Download checkpoint"
257
+ export REPO_NAME=${{ matrix.repo_name }}
258
+ bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
259
+ echo "::endgroup::"
260
+
261
+ echo "::group::Convert checkpoint"
262
+ bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
263
+ echo "::endgroup::"
264
+
265
+ echo "::group::Run inference"
266
+ bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float32"
267
+ echo "::endgroup::"
268
+
269
+ test-gpu-aoti-float16 :
270
+ uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
271
+ name : test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
272
+ needs : gather-models-gpu
273
+ strategy :
274
+ matrix : ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
275
+ fail-fast : false
276
+ with :
277
+ runner : linux.g5.4xlarge.nvidia.gpu
278
+ gpu-arch-type : cuda
279
+ gpu-arch-version : " 12.1"
280
+ script : |
281
+ echo "::group::Print machine info"
282
+ nvidia-smi
283
+ echo "::endgroup::"
284
+
285
+ echo "::group::Install newer objcopy that supports --set-section-alignment"
286
+ yum install -y devtoolset-10-binutils
287
+ export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
288
+ echo "::endgroup::"
289
+
290
+ echo "::group::Install required packages"
291
+ pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
292
+ pip install -r ./requirements.txt
293
+ pip list
294
+ python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
295
+ echo "::endgroup::"
296
+
297
+ echo "::group::Download checkpoint"
298
+ export REPO_NAME=${{ matrix.repo_name }}
299
+ bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
300
+ echo "::endgroup::"
301
+
302
+ echo "::group::Convert checkpoint"
303
+ bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
304
+ echo "::endgroup::"
305
+
306
+ echo "::group::Run inference"
307
+ bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float16"
226
308
echo "::endgroup::"
227
309
228
310
test-gpu-eval-sanity-check :
@@ -685,7 +767,7 @@ jobs:
685
767
686
768
echo "Running compiled"
687
769
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile
688
-
770
+
689
771
echo "******************************************"
690
772
echo "******* Emb: channel-wise quantized ******"
691
773
echo "******************************************"
0 commit comments