Skip to content

Commit 3c5118a

Browse files
committed
Merge remote-tracking branch 'origin/main' into mtk-6
2 parents b06fd66 + aa852cc commit 3c5118a

File tree

501 files changed

+7553
-6038
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

501 files changed

+7553
-6038
lines changed

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ else
171171
fi
172172

173173
# Check dtype.
174-
EXPORTED_MODEL_NAME="llama2"
174+
EXPORTED_MODEL_NAME="tinyllama_${MODE}_${DTYPE}"
175175
if [[ "${DTYPE}" == "fp16" ]]; then
176176
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_h"
177177
elif [[ "${DTYPE}" == "bf16" ]]; then

.ci/scripts/test_model.sh

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,30 +155,24 @@ test_model_with_qnn() {
155155

156156
if [[ "${MODEL_NAME}" == "dl3" ]]; then
157157
EXPORT_SCRIPT=deeplab_v3
158-
EXPORTED_MODEL_NAME=dlv3_qnn.pte
159158
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
160159
EXPORT_SCRIPT=mobilenet_v3
161-
EXPORTED_MODEL_NAME=mv3_qnn.pte
162160
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
163161
EXPORT_SCRIPT=mobilenet_v2
164-
EXPORTED_MODEL_NAME=mv2_qnn.pte
165162
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
166163
EXPORT_SCRIPT=inception_v4
167-
EXPORTED_MODEL_NAME=ic4_qnn.pte
168164
elif [[ "${MODEL_NAME}" == "ic3" ]]; then
169165
EXPORT_SCRIPT=inception_v3
170-
EXPORTED_MODEL_NAME=ic3_qnn.pte
171166
elif [[ "${MODEL_NAME}" == "vit" ]]; then
172167
EXPORT_SCRIPT=torchvision_vit
173-
EXPORTED_MODEL_NAME=vit_qnn.pte
174168
fi
175169

176170
# Use SM8450 for S22, SM8550 for S23, and SM8560 for S24
177171
# TODO(guangyang): Make QNN chipset matches the target device
178172
QNN_CHIPSET=SM8450
179173

180174
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only
181-
EXPORTED_MODEL=./${EXPORT_SCRIPT}/${EXPORTED_MODEL_NAME}
175+
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
182176
}
183177

184178
test_model_with_coreml() {
@@ -187,7 +181,24 @@ test_model_with_coreml() {
187181
exit 1
188182
fi
189183

190-
"${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}"
184+
DTYPE=float16
185+
186+
"${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}"
187+
EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
188+
# TODO:
189+
if [ -n "$EXPORTED_MODEL" ]; then
190+
EXPORTED_MODEL_WITH_DTYPE="${EXPORTED_MODEL%.pte}_${DTYPE}.pte"
191+
mv "$EXPORTED_MODEL" "$EXPORTED_MODEL_WITH_DTYPE"
192+
EXPORTED_MODEL="$EXPORTED_MODEL_WITH_DTYPE"
193+
echo "Renamed file path: $EXPORTED_MODEL"
194+
else
195+
echo "No .pte file found"
196+
exit 1
197+
fi
198+
}
199+
200+
test_model_with_mps() {
201+
"${PYTHON_EXECUTABLE}" -m examples.apple.mps.scripts.mps_example --model_name="${MODEL_NAME}" --use_fp16
191202
EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
192203
}
193204

@@ -206,6 +217,12 @@ elif [[ "${BACKEND}" == "coreml" ]]; then
206217
if [[ $? -eq 0 ]]; then
207218
prepare_artifacts_upload
208219
fi
220+
elif [[ "${BACKEND}" == "mps" ]]; then
221+
echo "Testing ${MODEL_NAME} with mps..."
222+
test_model_with_mps
223+
if [[ $? -eq 0 ]]; then
224+
prepare_artifacts_upload
225+
fi
209226
elif [[ "${BACKEND}" == "xnnpack" ]]; then
210227
echo "Testing ${MODEL_NAME} with xnnpack..."
211228
WITH_QUANTIZATION=true

.github/scripts/extract_benchmark_results.py

Lines changed: 151 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from argparse import Action, ArgumentParser, Namespace
1515
from io import BytesIO
1616
from logging import info, warning
17-
from typing import Any, List, Optional
17+
from typing import Any, Dict, List, Optional
1818
from urllib import error, request
1919

2020

@@ -24,6 +24,15 @@
2424
BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
2525
ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
2626

27+
# iOS-related regexes and variables
28+
IOS_TEST_SPEC_REGEX = re.compile(
29+
r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>\w+)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+),"
30+
)
31+
IOS_TEST_NAME_REGEX = re.compile(
32+
r"test_(?P<method>forward|load|generate)_(?P<model_name>\w+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)"
33+
)
34+
IOS_MODEL_NAME_REGEX = re.compile(r"(?P<model>[^_]+)_(?P<backend>\w+)_(?P<dtype>\w+)")
35+
2736

2837
class ValidateArtifacts(Action):
2938
def __call__(
@@ -135,6 +144,130 @@ def extract_android_benchmark_results(
135144
return []
136145

137146

147+
def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
148+
"""
149+
Extract the benchmark metadata from the test name, for example:
150+
test_forward_llama2_pte_iOS_17_2_1_iPhone15_4
151+
test_load_resnet50_xnnpack_q8_pte_iOS_17_2_1_iPhone15_4
152+
"""
153+
m = IOS_TEST_NAME_REGEX.match(test_name)
154+
if not m:
155+
return {}
156+
157+
method = m.group("method")
158+
model_name = m.group("model_name")
159+
ios_ver = m.group("ios_ver").replace("_", ".")
160+
iphone_ver = m.group("iphone_ver").replace("_", ".")
161+
162+
# NB: This looks brittle, but unless we can return iOS benchmark results in JSON
163+
# format by the test, the mapping is needed to match with Android test
164+
if method == "load":
165+
metric = "model_load_time(ms)"
166+
elif method == "forward":
167+
metric = (
168+
"generate_time(ms)"
169+
if "llama" in model_name
170+
else "avg_inference_latency(ms)"
171+
)
172+
elif method == "generate":
173+
metric = "token_per_sec"
174+
175+
backend = ""
176+
quantization = "unknown"
177+
178+
m = IOS_MODEL_NAME_REGEX.match(model_name)
179+
if m:
180+
backend = m.group("backend")
181+
quantization = m.group("dtype")
182+
model_name = m.group("model")
183+
184+
return {
185+
"benchmarkModel": {
186+
"backend": backend,
187+
"quantization": quantization,
188+
"name": model_name,
189+
},
190+
"deviceInfo": {
191+
"arch": f"iPhone {iphone_ver}",
192+
"device": f"iPhone {iphone_ver}",
193+
"os": f"iOS {ios_ver}",
194+
"availMem": 0,
195+
"totalMem": 0,
196+
},
197+
"metric": metric,
198+
# These fields will be populated later by extract_ios_metric
199+
"actualValue": 0,
200+
"targetValue": 0,
201+
}
202+
203+
204+
def extract_ios_metric(
205+
benchmark_result: Dict[str, Any],
206+
test_name: str,
207+
metric_name: str,
208+
metric_value: float,
209+
) -> Dict[str, Any]:
210+
"""
211+
Map the metric name from iOS xcresult to the benchmark result
212+
"""
213+
if metric_name == "Clock Monotonic Time, s":
214+
# The benchmark value is in ms
215+
benchmark_result["actualValue"] = metric_value * 1000
216+
elif metric_name == "Tokens Per Second, t/s":
217+
benchmark_result["actualValue"] = metric_value
218+
219+
return benchmark_result
220+
221+
222+
def extract_ios_benchmark_results(
223+
job_name: str, artifact_type: str, artifact_s3_url: str
224+
) -> List:
225+
"""
226+
The benchmark results from iOS are currently from xcresult, which could either
227+
be parsed from CUSTOMER_ARTIFACT or get from the test spec output. The latter
228+
is probably easier to process
229+
"""
230+
if artifact_type != "TESTSPEC_OUTPUT":
231+
return []
232+
233+
try:
234+
benchmark_results = []
235+
236+
with request.urlopen(artifact_s3_url) as data:
237+
current_test_name = ""
238+
current_record = {}
239+
240+
for line in data.read().decode("utf8").splitlines():
241+
s = IOS_TEST_SPEC_REGEX.search(line)
242+
if not s:
243+
continue
244+
245+
test_class = s.group("test_class")
246+
test_name = s.group("test_name")
247+
metric_name = s.group("metric")
248+
metric_value = float(s.group("value"))
249+
250+
if test_name != current_test_name:
251+
if current_record:
252+
# Save the benchmark result in the same format used by Android
253+
benchmark_results.append(current_record.copy())
254+
255+
current_test_name = test_name
256+
current_record = initialize_ios_metadata(current_test_name)
257+
258+
current_record = extract_ios_metric(
259+
current_record, test_name, metric_name, metric_value
260+
)
261+
262+
benchmark_results.append(current_record.copy())
263+
264+
return benchmark_results
265+
266+
except error.HTTPError:
267+
warning(f"Fail to {artifact_type} {artifact_s3_url}")
268+
return []
269+
270+
138271
def extract_job_id(artifacts_filename: str) -> int:
139272
"""
140273
Extract the job id from the artifacts filename
@@ -222,23 +355,25 @@ def main() -> None:
222355
benchmark_results = extract_android_benchmark_results(
223356
job_name, artifact_type, artifact_s3_url
224357
)
225-
if benchmark_results:
226-
benchmark_results = transform(
227-
app_type,
228-
benchmark_results,
229-
args.repo,
230-
args.head_branch,
231-
args.workflow_name,
232-
args.workflow_run_id,
233-
args.workflow_run_attempt,
234-
job_name,
235-
extract_job_id(args.artifacts),
236-
)
237-
all_benchmark_results.extend(benchmark_results)
238358

239359
if app_type == "IOS_APP":
240-
# TODO (huydhn): Implement the logic for iOS next
241-
pass
360+
benchmark_results = extract_ios_benchmark_results(
361+
job_name, artifact_type, artifact_s3_url
362+
)
363+
364+
if benchmark_results:
365+
benchmark_results = transform(
366+
app_type,
367+
benchmark_results,
368+
args.repo,
369+
args.head_branch,
370+
args.workflow_name,
371+
args.workflow_run_id,
372+
args.workflow_run_attempt,
373+
job_name,
374+
extract_job_id(args.artifacts),
375+
)
376+
all_benchmark_results.extend(benchmark_results)
242377

243378
if all_benchmark_results:
244379
output_file = os.path.basename(args.artifacts)

.github/workflows/android-perf.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ jobs:
135135
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
136136
fail-fast: false
137137
with:
138-
runner: linux.2xlarge
138+
runner: linux.4xlarge
139139
docker-image: executorch-ubuntu-22.04-clang12-android
140140
submodules: 'true'
141141
timeout: 60
@@ -205,6 +205,7 @@ jobs:
205205
206206
# Let's see how expensive this job is, we might want to tone it down by running it periodically
207207
benchmark-on-device:
208+
if: always()
208209
permissions:
209210
id-token: write
210211
contents: read

.github/workflows/apple-perf.yml

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ jobs:
7676
# on-demand and periodic benchmarking.
7777
CRON_DEFAULT_MODELS: "stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l"
7878
CRON_DEFAULT_DEVICES: "apple_iphone_15"
79-
CRON_DEFAULT_DELEGATES: "xnnpack,coreml"
79+
CRON_DEFAULT_DELEGATES: "xnnpack,coreml,mps"
8080
run: |
8181
set -ex
8282
MODELS="${{ inputs.models }}"
@@ -169,6 +169,8 @@ jobs:
169169
DELEGATE_CONFIG="xnnpack+custom+qe"
170170
elif [[ ${{ matrix.delegate }} == "coreml" ]]; then
171171
DELEGATE_CONFIG="coreml"
172+
elif [[ ${{ matrix.delegate }} == "mps" ]]; then
173+
DELEGATE_CONFIG="mps"
172174
fi
173175
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
174176
bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
@@ -277,6 +279,7 @@ jobs:
277279
path: ${{ runner.temp }}/artifacts/
278280

279281
benchmark-on-device:
282+
if: always()
280283
needs:
281284
- set-parameters
282285
- upload-benchmark-app
@@ -306,3 +309,78 @@ jobs:
306309
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
307310
test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
308311
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
312+
313+
upload-benchmark-results:
314+
needs:
315+
- benchmark-on-device
316+
if: always()
317+
runs-on: linux.2xlarge
318+
environment: upload-benchmark-results
319+
permissions:
320+
id-token: write
321+
contents: read
322+
steps:
323+
- uses: actions/checkout@v3
324+
with:
325+
submodules: false
326+
327+
- name: Authenticate with AWS
328+
uses: aws-actions/configure-aws-credentials@v4
329+
with:
330+
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
331+
# The max duration enforced by the server side
332+
role-duration-seconds: 18000
333+
aws-region: us-east-1
334+
335+
- name: Setup conda
336+
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
337+
with:
338+
python-version: '3.10'
339+
340+
- name: Download the list of artifacts from S3
341+
env:
342+
ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
343+
shell: bash
344+
run: |
345+
set -eux
346+
${CONDA_RUN} python -mpip install awscli==1.32.18
347+
348+
mkdir -p artifacts
349+
pushd artifacts
350+
${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
351+
popd
352+
353+
ls -lah artifacts
354+
355+
- name: Extract the benchmark results JSON
356+
shell: bash
357+
run: |
358+
set -eux
359+
360+
mkdir -p benchmark-results
361+
362+
for ARTIFACTS_BY_JOB in artifacts/*.json; do
363+
[ -f "${ARTIFACTS_BY_JOB}" ] || break
364+
echo "${ARTIFACTS_BY_JOB}"
365+
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
366+
--artifacts "${ARTIFACTS_BY_JOB}" \
367+
--output-dir benchmark-results \
368+
--repo ${{ github.repository }} \
369+
--head-branch ${{ github.head_ref || github.ref_name }} \
370+
--workflow-name "${{ github.workflow }}" \
371+
--workflow-run-id ${{ github.run_id }} \
372+
--workflow-run-attempt ${{ github.run_attempt }}
373+
done
374+
375+
ls -lah benchmark-results
376+
377+
for BENCHMARK_RESULTS in benchmark-results/*.json; do
378+
cat "${BENCHMARK_RESULTS}"
379+
echo
380+
done
381+
382+
- name: Upload the benchmark results
383+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
384+
with:
385+
benchmark-results-dir: 'benchmark-results'
386+
dry-run: false

0 commit comments

Comments
 (0)