Skip to content

Commit 6bf844c

Browse files
committed
Qualcomm AI Engine Direct - Meta CI for Mobilebert and W2L
1 parent 745be4e commit 6bf844c

File tree

9 files changed

+60
-23
lines changed

9 files changed

+60
-23
lines changed

.ci/scripts/test_model.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ test_model_with_qnn() {
154154
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
155155
export PYTHONPATH=$EXECUTORCH_ROOT/..
156156

157+
EXTRA_FLAGS=""
157158
if [[ "${MODEL_NAME}" == "dl3" ]]; then
158159
EXPORT_SCRIPT=deeplab_v3
159160
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
@@ -166,6 +167,12 @@ test_model_with_qnn() {
166167
EXPORT_SCRIPT=inception_v3
167168
elif [[ "${MODEL_NAME}" == "vit" ]]; then
168169
EXPORT_SCRIPT=torchvision_vit
170+
elif [[ "${MODEL_NAME}" == "mb" ]]; then
171+
EXPORT_SCRIPT=mobilebert_fine_tune
172+
EXTRA_FLAGS="--num_epochs 1"
173+
pip install scikit-learn
174+
elif [[ "${MODEL_NAME}" == "w2l" ]]; then
175+
EXPORT_SCRIPT=wav2letter
169176
elif [[ "${MODEL_NAME}" == "edsr" ]]; then
170177
EXPORT_SCRIPT=edsr
171178
# Additional deps for edsr
@@ -179,7 +186,7 @@ test_model_with_qnn() {
179186
# TODO(guangyang): Make QNN chipset matches the target device
180187
QNN_CHIPSET=SM8450
181188

182-
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only
189+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
183190
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
184191
}
185192

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ jobs:
311311
strategy:
312312
matrix:
313313
dtype: [fp32]
314-
model: [dl3, mv3, mv2, ic4, ic3, vit]
314+
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
315315
fail-fast: false
316316
with:
317317
runner: linux.2xlarge

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
from executorch.examples.models.mobilenet_v3 import MV3Model
7575
from executorch.examples.models.torchvision_vit.model import TorchVisionViTModel
7676

77-
# from executorch.examples.models.wav2letter import Wav2LetterModel
77+
from executorch.examples.models.wav2letter import Wav2LetterModel
7878
from executorch.exir import to_edge
7979
from executorch.exir.backend.backend_api import disable_validation
8080
from executorch.exir.passes import PassManager
@@ -903,8 +903,7 @@ def test_qnn_backend_example_models(self):
903903
MV3Model(),
904904
MobileBertModelExample(),
905905
TorchVisionViTModel(),
906-
# Encountered undefined symbol in mainline. Reopen once resolved.
907-
# Wav2LetterModel(),
906+
Wav2LetterModel(),
908907
]
909908
expected_partitions = [
910909
1,
@@ -1956,12 +1955,11 @@ def test_qnn_backend_example_models(self):
19561955
QCOM_ANNOTATION: (),
19571956
QCOM_QUANT_DTYPE: QuantDtype.use_8a8w,
19581957
},
1959-
# Encountered undefined symbol in mainline. Reopen once resolved.
1960-
# {
1961-
# QCOM_MODULE: Wav2LetterModel(),
1962-
# QCOM_ANNOTATION: (),
1963-
# QCOM_QUANT_DTYPE: QuantDtype.use_8a8w,
1964-
# },
1958+
{
1959+
QCOM_MODULE: Wav2LetterModel(),
1960+
QCOM_ANNOTATION: (),
1961+
QCOM_QUANT_DTYPE: QuantDtype.use_8a8w,
1962+
},
19651963
]
19661964
expected_partitions = [
19671965
1,
@@ -1974,7 +1972,7 @@ def test_qnn_backend_example_models(self):
19741972
# For MobileBertModelExample
19751973
# 1,
19761974
1,
1977-
# 1, For Wav2LetterModel
1975+
1,
19781976
]
19791977
# TODO: Due to trigger maximum recursion depth exceeded, need to check it.
19801978
disable_validation()

examples/qualcomm/oss_scripts/llama/llama.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,7 @@ def post_process():
851851
)
852852
post_process()
853853
else:
854+
performance_output_path = "outputs/inference_speed.txt"
854855
runner_cmd = " ".join(
855856
[
856857
f"cd {workspace} &&",
@@ -859,6 +860,7 @@ def post_process():
859860
f"--model_path {pte_filename}.pte",
860861
f"--seq_len {seq_len}",
861862
"--output_path outputs/outputs.txt",
863+
f"--performance_output_path {performance_output_path}",
862864
f"--kv_updator {'SmartMask' if args.kv_updator == smart_mask_updator else 'ShiftPointer'}",
863865
runner_args,
864866
]
@@ -882,7 +884,7 @@ def post_process():
882884
adb.pull(output_path=args.artifact, callback=post_process)
883885
if args.ip and args.port != -1:
884886
inference_speed = 0
885-
with open(f"{args.artifact}/outputs/inference_speed.txt", "r") as f:
887+
with open(f"{args.artifact}/{performance_output_path}", "r") as f:
886888
inference_speed = float(f.read())
887889

888890
pte_size = os.path.getsize(pte_path)

examples/qualcomm/oss_scripts/llama/qnn_llama_runner.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ DEFINE_string(
3030
output_path,
3131
"outputs.txt",
3232
"Executorch inference data output path.");
33+
DEFINE_string(
34+
performance_output_path,
35+
"inference_speed.txt",
36+
"Records inference speed. For CI purpose.");
3337
DEFINE_string(tokenizer_path, "tokenizer.bin", "Tokenizer stuff.");
3438
DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt.");
3539
DEFINE_string(
@@ -63,6 +67,7 @@ int main(int argc, char** argv) {
6367
example::Runner runner(
6468
{FLAGS_model_path},
6569
FLAGS_tokenizer_path.c_str(),
70+
FLAGS_performance_output_path.c_str(),
6671
FLAGS_logits_scale,
6772
FLAGS_logits_offset,
6873
FLAGS_temperature,

examples/qualcomm/oss_scripts/llama/runner/runner.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@ namespace example {
3434

3535
namespace {
3636
static constexpr auto kTopp = 0.9f;
37-
void printReport(const Runner::Stats& stats);
37+
void printReport(
38+
const Runner::Stats& stats,
39+
const std::string& performance_output_path);
3840
std::string statsToJsonString(const Runner::Stats& stats);
3941
} // namespace
4042

4143
Runner::Runner(
4244
const std::vector<std::string>& models_path,
4345
const std::string& tokenizer_path,
46+
const std::string& performance_output_path,
4447
const float logits_scale,
4548
const int32_t logits_offset,
4649
const float temperature,
@@ -49,6 +52,7 @@ Runner::Runner(
4952
: n_bos_(1),
5053
n_eos_(1),
5154
tokenizer_path_(tokenizer_path),
55+
performance_output_path_(performance_output_path),
5256
logits_scale_(logits_scale),
5357
logits_offset_(logits_offset),
5458
temperature_(temperature),
@@ -452,7 +456,7 @@ Error Runner::generate(
452456

453457
stats_.num_prompt_tokens = num_prompt_tokens;
454458
stats_.num_generated_tokens = pos - num_prompt_tokens;
455-
printReport(stats_);
459+
printReport(stats_, performance_output_path_);
456460
if (stats_callback) {
457461
stats_callback(stats_);
458462
}
@@ -461,7 +465,9 @@ Error Runner::generate(
461465
}
462466

463467
namespace {
464-
void printReport(const Runner::Stats& stats) {
468+
void printReport(
469+
const Runner::Stats& stats,
470+
const std::string& performance_output_path) {
465471
printf("PyTorchObserver %s\n", statsToJsonString(stats).c_str());
466472

467473
ET_LOG(
@@ -522,7 +528,8 @@ void printReport(const Runner::Stats& stats) {
522528

523529
// For now, we just print the total inference time for CI, can save more info
524530
// in future if needed.
525-
std::ofstream outfile("outputs/inference_speed.txt");
531+
532+
std::ofstream outfile(performance_output_path.c_str());
526533
if (outfile.is_open()) {
527534
double num_tok = (stats.num_generated_tokens) /
528535
(double)(stats.inference_end_ms - stats.inference_start_ms) *

examples/qualcomm/oss_scripts/llama/runner/runner.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class Runner {
2929
explicit Runner(
3030
const std::vector<std::string>& models_path,
3131
const std::string& tokenizer_path,
32+
const std::string& performance_output_path_,
3233
const float logits_scale,
3334
const int32_t logits_offset,
3435
const float temperature,
@@ -98,6 +99,7 @@ class Runner {
9899
const int32_t n_eos_;
99100
std::vector<std::shared_ptr<executorch::extension::Module>> modules_;
100101
std::string tokenizer_path_;
102+
std::string performance_output_path_;
101103
float logits_scale_;
102104
int32_t logits_offset_;
103105
float temperature_;

examples/qualcomm/scripts/mobilebert_fine_tune.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def get_fine_tuned_mobilebert(artifacts_dir, pretrained_weight, batch_size):
169169
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
170170
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)
171171

172-
epochs = 5
172+
epochs = args.num_epochs
173173
dataloader_train = DataLoader(
174174
dataset_train,
175175
sampler=RandomSampler(dataset_train),
@@ -366,6 +366,13 @@ def calibrator(gm):
366366
type=str,
367367
)
368368

369+
parser.add_argument(
370+
"--num_epochs",
371+
help="If no pretrained weights are provided, set number of epochs to train the model",
372+
default=5,
373+
type=int,
374+
)
375+
369376
parser.add_argument(
370377
"-F",
371378
"--use_fp16",

examples/qualcomm/scripts/wav2letter.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# LICENSE file in the root directory of this source tree.
66

77
import json
8+
import logging
89
import os
910
import sys
1011
from multiprocessing.connection import Client
@@ -111,7 +112,12 @@ def main(args):
111112
# target labels " abcdefghijklmnopqrstuvwxyz'*"
112113
instance.vocab_size = 29
113114
model = instance.get_eager_model().eval()
114-
model.load_state_dict(torch.load(args.pretrained_weight, weights_only=True))
115+
if args.pretrained_weight:
116+
model.load_state_dict(torch.load(args.pretrained_weight, weights_only=True))
117+
else:
118+
logging.warning(
119+
"It is strongly recommended to provide pretrained weights or else accuracy will be bad. This option is here mainly for CI purpose to ensure compile is successful."
120+
)
115121

116122
# convert conv1d to conv2d in nn.Module level will only introduce 2 permute
117123
# nodes around input & output, which is more quantization friendly.
@@ -128,9 +134,12 @@ def main(args):
128134

129135
# retrieve dataset, will take some time to download
130136
data_num = 100
131-
inputs, targets, input_list = get_dataset(
132-
data_size=data_num, artifact_dir=args.artifact
133-
)
137+
if args.compile_only:
138+
inputs = [(torch.rand(1, 1, 454560, 1),)]
139+
else:
140+
inputs, targets, input_list = get_dataset(
141+
data_size=data_num, artifact_dir=args.artifact
142+
)
134143
pte_filename = "w2l_qnn"
135144
build_executorch_binary(
136145
model,
@@ -212,7 +221,7 @@ def main(args):
212221
),
213222
default=None,
214223
type=str,
215-
required=True,
224+
required=False,
216225
)
217226

218227
args = parser.parse_args()

0 commit comments

Comments
 (0)