Skip to content

Commit 68462e9

Browse files
committed
Merge remote-tracking branch 'origin/main' into android-ciflow
2 parents 690c773 + d519b4d commit 68462e9

File tree

173 files changed

+4500
-608
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+4500
-608
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
e4cd76cf8283c8ddbf95674b020fbfcff467cb4b
1+
00e3eea170ce5db8ea9c62ce5e48f13886cd6d20

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# LICENSE file in the root directory of this source tree.
77

88
set -eux
9+
set -o xtrace
910

1011
build_qnn_backend() {
1112
echo "Start building qnn backend."

.ci/scripts/test.sh

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,27 @@ test_model_with_qnn() {
156156
export PYTHONPATH=$EXECUTORCH_ROOT/..
157157

158158
if [[ "${MODEL_NAME}" == "dl3" ]]; then
159-
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.deeplab_v3 -b ${CMAKE_OUTPUT_DIR} -m SM8550 --compile_only --download
160-
EXPORTED_MODEL=./deeplab_v3/dlv3_qnn.pte
159+
EXPORT_SCRIPT=deeplab_v3
160+
EXPORTED_MODEL_NAME=dlv3_qnn.pte
161+
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
162+
EXPORT_SCRIPT=mobilenet_v3
163+
EXPORTED_MODEL_NAME=mv3_qnn.pte
164+
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
165+
EXPORT_SCRIPT=mobilenet_v2
166+
EXPORTED_MODEL_NAME=mv2_qnn.pte
167+
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
168+
EXPORT_SCRIPT=inception_v4
169+
EXPORTED_MODEL_NAME=ic4_qnn.pte
170+
elif [[ "${MODEL_NAME}" == "ic3" ]]; then
171+
EXPORT_SCRIPT=inception_v3
172+
EXPORTED_MODEL_NAME=ic3_qnn.pte
173+
elif [[ "${MODEL_NAME}" == "vit" ]]; then
174+
EXPORT_SCRIPT=torchvision_vit
175+
EXPORTED_MODEL_NAME=vit_qnn.pte
161176
fi
177+
178+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m SM8550 --compile_only
179+
EXPORTED_MODEL=./${EXPORT_SCRIPT}/${EXPORTED_MODEL_NAME}
162180
}
163181

164182
if [[ "${BACKEND}" == "portable" ]]; then

.github/pytorch-probot.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# The schema is from https://github.com/pytorch/pytorch/blob/main/.github/pytorch-probot.yml
22
ciflow_push_tags:
3+
- ciflow/android
4+
- ciflow/apple
35
- ciflow/nightly
46
- ciflow/trunk
57
- ciflow/binaries

.github/workflows/android-perf.yml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ jobs:
8484
# Separate default values from the workflow dispatch. To ensure defaults are accessible
8585
# during scheduled runs and to provide flexibility for different defaults between
8686
# on-demand and periodic benchmarking.
87-
CRON_DEFAULT_MODELS: "stories110M"
87+
CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit"
8888
CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x"
89-
CRON_DEFAULT_DELEGATES: "xnnpack"
89+
CRON_DEFAULT_DELEGATES: "xnnpack,qnn"
9090
run: |
9191
set -ex
9292
MODELS="${{ inputs.models }}"
@@ -162,6 +162,11 @@ jobs:
162162
# Test llama2
163163
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
164164
DELEGATE_CONFIG="xnnpack+custom+qe"
165+
elif [[ ${{ matrix.delegate }} == "qnn" ]]; then
166+
DELEGATE_CONFIG="qnn"
167+
else
168+
echo "Unsupported delegate ${{ matrix.delegate }}"
169+
exit 1
165170
fi
166171
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
167172
else
@@ -201,9 +206,6 @@ jobs:
201206
name: build-llm-demo
202207
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
203208
needs: set-parameters
204-
strategy:
205-
matrix:
206-
tokenizer: [bpe]
207209
with:
208210
runner: linux.2xlarge
209211
docker-image: executorch-ubuntu-22.04-clang12-android
@@ -222,7 +224,7 @@ jobs:
222224
223225
# TODO: This needs to be replaced with a generic loader .apk
224226
# Build LLM Demo for Android
225-
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
227+
bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
226228
227229
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
228230
upload-android-apps:
@@ -278,9 +280,8 @@ jobs:
278280
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
279281
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
280282
# one app+flavor that could load and run the model.
281-
# TODO: Hard code llm_demo_bpe for now in this job.
282-
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug.apk
283-
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug-androidTest.apk
283+
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
284+
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
284285
# NB: Need to set the default spec here so that it works for periodic too
285286
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
286287
# Uploaded to S3 from the previous job

.github/workflows/android.yml

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ jobs:
2626
build-llm-demo:
2727
name: build-llm-demo
2828
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
29-
strategy:
30-
matrix:
31-
tokenizer: [bpe, tiktoken]
3229
with:
3330
runner: linux.2xlarge
3431
docker-image: executorch-ubuntu-22.04-clang12-android
@@ -46,7 +43,7 @@ jobs:
4643
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
4744
4845
# Build LLM Demo for Android
49-
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
46+
bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
5047
5148
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
5249
upload-artifacts:
@@ -157,13 +154,6 @@ jobs:
157154
id-token: write
158155
contents: read
159156
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
160-
strategy:
161-
matrix:
162-
# https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/README.md#alternative-2-build-from-local-machine
163-
# mentions that tiktoken is only for Llama3. So, we can export it later in another archive
164-
# like https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip when this is
165-
# updated to run Llama3
166-
tokenizer: [bpe]
167157
with:
168158
device-type: android
169159
runner: linux.2xlarge
@@ -173,8 +163,8 @@ jobs:
173163
# This is the custom Android device pool that only includes Samsung Galaxy S2x
174164
device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
175165
# Uploaded to S3 from the previous job, the name of the app comes from the project itself
176-
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk
177-
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk
166+
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
167+
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
178168
test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
179169
# Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
180170
# days and the job will automatically re-upload the file when that happens.

.github/workflows/apple.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
- build/build_apple_frameworks.sh
1616
- build/create_frameworks.sh
1717
- build/test_ios_ci.sh
18-
- examples/demo-apps/**
18+
- examples/demo-apps/apple/**
1919
- extension/apple/**
2020
- extension/module/**
2121
workflow_dispatch:

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ jobs:
305305
strategy:
306306
matrix:
307307
dtype: [fp32]
308-
model: [dl3]
308+
model: [dl3, mv3, mv2, ic4, ic3, vit]
309309
fail-fast: false
310310
with:
311311
runner: linux.2xlarge

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,13 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
637637
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
638638
endif()
639639

640+
if(EXECUTORCH_BUILD_PTHREADPOOL
641+
AND EXECUTORCH_BUILD_CPUINFO
642+
AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
643+
)
644+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
645+
endif()
646+
640647
if(EXECUTORCH_BUILD_PYBIND)
641648
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
642649

backends/arm/arm_partitioner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
4545
exir_ops.edge.aten.hardtanh.default,
4646
exir_ops.edge.aten.convolution.default,
4747
exir_ops.edge.aten.div.Tensor,
48+
exir_ops.edge.aten.exp.default,
49+
exir_ops.edge.aten.log.default,
4850
exir_ops.edge.aten.split_with_sizes_copy.default,
4951
exir_ops.edge.aten.full.default,
5052
exir_ops.edge.aten.mul.Tensor,

backends/arm/operators/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
op_conv2d,
1515
op_dequant,
1616
op_div,
17+
op_exp,
1718
op_full,
1819
op_get_item,
1920
op_hardtanh,
21+
op_log,
2022
op_mean_dim,
2123
op_mm,
2224
op_mul,

backends/arm/operators/op_conv2d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def adjust_pad_if_needed(self, input, weight, stride, pad, dilation):
4040

4141
if mod_remainder > pad:
4242
raise RuntimeError(
43-
f"ignoring input element is not currently supported, got a large stride {stride}"
43+
"This case should be handled by the SizeAdjustConv2d pass, is it enabled?"
4444
)
4545
return pad - mod_remainder
4646

backends/arm/operators/op_exp.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright 2024 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
from typing import List
6+
7+
import numpy as np
8+
9+
import serializer.tosa_serializer as ts
10+
from executorch.backends.arm.operators.node_visitor import (
11+
NodeVisitor,
12+
register_node_visitor,
13+
)
14+
from executorch.backends.arm.tosa_mapping import TosaArg
15+
16+
from executorch.backends.arm.tosa_quant_utils import (
17+
dequantize_value,
18+
get_quant_node_args,
19+
QuantArgs,
20+
quantize_value,
21+
)
22+
from serializer.tosa_serializer import TosaOp
23+
from torch.fx import Node
24+
25+
26+
@register_node_visitor
27+
class ExpVisitor(NodeVisitor):
28+
target = "aten.exp.default"
29+
30+
def __init__(self, *args):
31+
super().__init__(*args)
32+
33+
def define_node(
34+
self,
35+
node: Node,
36+
tosa_graph: ts.TosaSerializer,
37+
inputs: List[TosaArg],
38+
output: TosaArg,
39+
is_quant_node: bool,
40+
) -> None:
41+
42+
assert len(node.all_input_nodes) == 1
43+
assert len(node.users) == 1
44+
45+
if is_quant_node:
46+
# Assume quantized input is 8 bit.
47+
48+
# Create attribute for 8 bit table lookup.
49+
input_node = node.all_input_nodes[0]
50+
in_quantargs = get_quant_node_args(input_node)
51+
output_node = list(node.users)[0]
52+
out_quantargs = get_quant_node_args(output_node)
53+
54+
table = exp_table_8bit(in_quantargs, out_quantargs)
55+
table_attr = ts.TosaSerializerAttribute()
56+
table_attr.TableAttribute(table)
57+
58+
tosa_graph.addOperator(
59+
TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr
60+
)
61+
else:
62+
tosa_graph.addOperator(TosaOp.Op().EXP, [inputs[0].name], [output.name])
63+
64+
65+
def exp_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs):
66+
"""
67+
Returns a table mapping 256 entries to exp([qmin,qmax])
68+
"""
69+
70+
def exp(x):
71+
# Convert quantized input to floating point exp input space.
72+
v = dequantize_value(x, in_quantargs)
73+
# Compute exp.
74+
v = np.exp(v)
75+
# Convert exp output back to quantized space.
76+
return quantize_value(v, out_quantargs)
77+
78+
return [
79+
exp(x)
80+
for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8)
81+
]

backends/arm/operators/op_log.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright 2024 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
from typing import List
6+
7+
import numpy as np
8+
9+
import serializer.tosa_serializer as ts
10+
from executorch.backends.arm.operators.node_visitor import (
11+
NodeVisitor,
12+
register_node_visitor,
13+
)
14+
from executorch.backends.arm.tosa_mapping import TosaArg
15+
16+
from executorch.backends.arm.tosa_quant_utils import (
17+
dequantize_value,
18+
get_quant_node_args,
19+
QuantArgs,
20+
quantize_value,
21+
)
22+
from serializer.tosa_serializer import TosaOp
23+
from torch.fx import Node
24+
25+
26+
@register_node_visitor
27+
class LogVisitor(NodeVisitor):
28+
target = "aten.log.default"
29+
30+
def __init__(self, *args):
31+
super().__init__(*args)
32+
33+
def define_node(
34+
self,
35+
node: Node,
36+
tosa_graph: ts.TosaSerializer,
37+
inputs: List[TosaArg],
38+
output: TosaArg,
39+
is_quant_node: bool,
40+
) -> None:
41+
42+
assert len(node.all_input_nodes) == 1
43+
assert len(node.users) == 1
44+
45+
if is_quant_node:
46+
# Assume quantized input is 8 bit.
47+
48+
# Create attribute for 8 bit table lookup.
49+
input_node = node.all_input_nodes[0]
50+
in_quantargs = get_quant_node_args(input_node)
51+
output_node = list(node.users)[0]
52+
out_quantargs = get_quant_node_args(output_node)
53+
54+
table = log_table_8bit(in_quantargs, out_quantargs)
55+
table_attr = ts.TosaSerializerAttribute()
56+
table_attr.TableAttribute(table)
57+
58+
tosa_graph.addOperator(
59+
TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr
60+
)
61+
else:
62+
tosa_graph.addOperator(TosaOp.Op().LOG, [inputs[0].name], [output.name])
63+
64+
65+
def log_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs):
66+
"""
67+
Returns a table mapping 256 entries to log([qmin,qmax])
68+
"""
69+
70+
def log(x):
71+
# Convert quantized input to floating point log input space.
72+
v = dequantize_value(x, in_quantargs)
73+
# Compute log.
74+
v = np.log(v)
75+
# Convert log output back to quantized space.
76+
return quantize_value(v, out_quantargs)
77+
78+
return [
79+
log(x)
80+
for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8)
81+
]

backends/arm/passes/annotate_channels_last_dim_order_pass.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ def call(self, graph_module: torch.fx.GraphModule):
4646
NHWC_Order = (0, 2, 3, 1)
4747
HWCM_Order = (2, 3, 0, 1)
4848
for node in graph_module.graph.nodes:
49-
if isinstance(node.meta["val"], tuple):
49+
if isinstance(
50+
node.meta["val"], (tuple, torch.fx.immutable_collections.immutable_list)
51+
):
5052
node_data = node.meta["val"][0].data
5153
else:
5254
node_data = node.meta["val"].data

0 commit comments

Comments
 (0)