File tree Expand file tree Collapse file tree 2 files changed +44
-0
lines changed Expand file tree Collapse file tree 2 files changed +44
-0
lines changed Original file line number Diff line number Diff line change @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
27
27
MODE=" $2 " # portable or xnnpack+custom or xnnpack+custom+qe
28
28
shift 2
29
29
;;
30
+ -pt2e_quantize)
31
+ PT2E_QUANTIZE=" $2 "
32
+ shift 2
33
+ ;;
30
34
-upload)
31
35
UPLOAD_DIR=" $2 "
32
36
shift 2
@@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then
234
238
fi
235
239
if [[ " ${QNN} " == " ON" ]]; then
236
240
EXPORT_ARGS=" ${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
241
+ echo " PT2E_QUANTIZE is ${PT2E_QUANTIZE} "
242
+ if [[ " ${PT2E_QUANTIZE} " == " qnn_16a16w" ]]; then
243
+ EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
244
+ fi
237
245
fi
238
246
# Add dynamically linked library location
239
247
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
Original file line number Diff line number Diff line change @@ -441,3 +441,39 @@ jobs:
441
441
442
442
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
443
443
echo "::endgroup::"
444
+
445
+
446
+ test-llama-runner-qnn-linux :
447
+ name : test-llama-runner-qnn-linux
448
+ uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
449
+ strategy :
450
+ matrix :
451
+ dtype : [fp32]
452
+ pt2e_quantize : [qnn_16a16w, qnn_8a8w]
453
+ mode : [qnn]
454
+ fail-fast : false
455
+ with :
456
+ runner : linux.2xlarge
457
+ docker-image : executorch-ubuntu-22.04-qnn-sdk
458
+ submodules : ' true'
459
+ ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
460
+ timeout : 900
461
+ script : |
462
+ # The generic Linux job chooses to use base env, not the one setup by the image
463
+ CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
464
+ conda activate "${CONDA_ENV}"
465
+
466
+ BUILD_TOOL="cmake"
467
+ DTYPE=${{ matrix.dtype }}
468
+ MODE=${{ matrix.mode }}
469
+ PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
470
+
471
+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
472
+ PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
473
+
474
+ # Setup executorch
475
+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
476
+ # Install requirements for export_llama
477
+ PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
478
+ # Test llama2
479
+ PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
You can’t perform that action at this time.
0 commit comments