Skip to content

Commit 089087b

Browse files
authored
Add qnn 16a16w quantization test (#7039)
Add qnn 16a16w quantization test (#7039) Summary: Pull Request resolved: #7039 Differential Revision: D66390212
1 parent fbcc9a1 commit 089087b

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

.ci/scripts/test_llama.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
2828
shift 2
2929
;;
30+
-pt2e_quantize)
31+
PT2E_QUANTIZE="$2"
32+
shift 2
33+
;;
3034
-upload)
3135
UPLOAD_DIR="$2"
3236
shift 2
@@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then
234238
fi
235239
if [[ "${QNN}" == "ON" ]]; then
236240
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
241+
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
242+
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
243+
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
244+
fi
237245
fi
238246
# Add dynamically linked library location
239247
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

.github/workflows/trunk.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,39 @@ jobs:
441441
442442
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
443443
echo "::endgroup::"
444+
445+
446+
test-llama-runner-qnn-linux:
447+
name: test-llama-runner-qnn-linux
448+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
449+
strategy:
450+
matrix:
451+
dtype: [fp32]
452+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
453+
mode: [qnn]
454+
fail-fast: false
455+
with:
456+
runner: linux.2xlarge
457+
docker-image: executorch-ubuntu-22.04-qnn-sdk
458+
submodules: 'true'
459+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
460+
timeout: 900
461+
script: |
462+
# The generic Linux job chooses to use base env, not the one setup by the image
463+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
464+
conda activate "${CONDA_ENV}"
465+
466+
BUILD_TOOL="cmake"
467+
DTYPE=${{ matrix.dtype }}
468+
MODE=${{ matrix.mode }}
469+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
470+
471+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
472+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
473+
474+
# Setup executorch
475+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
476+
# Install requirements for export_llama
477+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
478+
# Test llama2
479+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"

0 commit comments

Comments
 (0)