Skip to content

Commit bb1dd12

Browse files
authored
Merge branch 'pytorch:main' into search-graph
2 parents 50d56a5 + 97a4600 commit bb1dd12

File tree

67 files changed

+1445
-423
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1445
-423
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
bd5482c7c3e1197e10c46ff739027f917d9c1fcc
1+
c8a648d4dffb9f0133ff4a2ea0e660b42105d3ad

.ci/docker/common/install_clang.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ install_ubuntu() {
1313
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
1414
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
1515
# Also require LLD linker from llvm and libomp to build PyTorch from source
16-
apt-get install -y lld "libomp-${CLANG_VERSION}-dev"
16+
apt-get install -y lld "libomp-${CLANG_VERSION}-dev" "libc++-${CLANG_VERSION}-dev"
1717

1818
# Use update-alternatives to make this version the default
1919
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50

.ci/docker/requirements-ci.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
mpmath==1.3.0
2-
numpy==1.22.0; python_version == '3.10'
2+
numpy==1.21.3; python_version == '3.10'
33
numpy==1.23.2; python_version == '3.11'
44
numpy; python_version >= '3.12'
55
PyYAML==6.0.1

.ci/scripts/setup-linux.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ else
1919
fi
2020

2121
# As Linux job is running inside a Docker container, all of its dependencies
22-
# have already been installed
23-
install_executorch
22+
# have already been installed, so we use PyTorch build from source here instead
23+
# of nightly. This allows CI to test against latest commits from PyTorch
24+
install_executorch "use-pt-pinned-commit"
2425
build_executorch_runner "${BUILD_TOOL}"

.ci/scripts/setup-qnn-deps.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ install_qnn() {
3131
}
3232

3333
setup_libc++() {
34+
clang_version=$1
3435
sudo apt-get update
35-
pkgs_to_check=('libc++-dev')
36+
pkgs_to_check=("libc++-${clang_version}-dev")
3637
j=0
3738
while [ $j -lt ${#pkgs_to_check[*]} ]; do
3839
install_status=$(verify_pkg_installed ${pkgs_to_check[$j]})
@@ -47,5 +48,6 @@ setup_libc++() {
4748
done
4849
}
4950

50-
setup_libc++
51+
# This needs to match with the clang version from the Docker image
52+
setup_libc++ 12
5153
install_qnn

.ci/scripts/utils.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@ install_executorch() {
2020
which pip
2121
# Install executorch, this assumes that Executorch is checked out in the
2222
# current directory.
23-
# TODO(T199538337): clean up install scripts to use install_requirements.sh
24-
./install_requirements.sh --pybind xnnpack
23+
if [[ "${1:-}" == "use-pt-pinned-commit" ]]; then
24+
./install_requirements.sh --pybind xnnpack --use-pt-pinned-commit
25+
else
26+
./install_requirements.sh --pybind xnnpack
27+
fi
2528
# Just print out the list of packages for debugging
2629
pip list
2730
}

.github/pull_request_template.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
### Summary
2+
[PLEASE REMOVE] See [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests) for ExecuTorch PR guidelines.
3+
4+
[PLEASE REMOVE] If this PR closes an issue, please add a `Fixes #<issue-id>` line.
5+
6+
[PLEASE REMOVE] If this PR introduces a fix or feature that should be the upcoming release notes, please add a "Release notes: <area>" label. For a list of available release notes labels, check out [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests).
7+
8+
### Test plan
9+
[PLEASE REMOVE] How did you test this PR? Please write down any manual commands you used and note down tests that you have written if applicable.

.github/workflows/_unittest.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ jobs:
3737
CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
3838
.ci/scripts/setup-linux.sh cmake
3939
40+
# Install llama3_2_vision dependencies.
41+
PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
42+
4043
# Run pytest with coverage
4144
pytest -n auto --cov=./ --cov-report=xml
4245
# Run gtest
@@ -67,6 +70,10 @@ jobs:
6770
${CONDA_RUN} --no-capture-output \
6871
.ci/scripts/setup-macos.sh cmake
6972
73+
# Install llama3_2_vision dependencies.
74+
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
75+
./examples/models/llama3_2_vision/install_requirements.sh
76+
7077
# Run pytest with coverage
7178
${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml
7279
# Run gtest

.github/workflows/trunk.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ jobs:
137137
docker-image: executorch-ubuntu-22.04-arm-sdk
138138
submodules: 'true'
139139
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
140+
timeout: 90
140141
script: |
141142
# The generic Linux job chooses to use base env, not the one setup by the image
142143
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -162,6 +163,7 @@ jobs:
162163
docker-image: executorch-ubuntu-22.04-arm-sdk
163164
submodules: 'true'
164165
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
166+
timeout: 90
165167
script: |
166168
# The generic Linux job chooses to use base env, not the one setup by the image
167169
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")

CONTRIBUTING.md

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,27 @@ for basics.
242242
- Give the PR a clear and thorough description. Don't just describe what the PR
243243
does: the diff will do that. Explain *why* you are making this change, in a
244244
way that will make sense to someone years from now.
245-
- Add the line `Test Plan:` (with that spelling, capitalization, and trailing
246-
colon character), followed by lines containing repeatable instructions for
245+
- Explain how you have tested your changes by including repeatable instructions for
247246
testing the PR.
248247
- If you added tests, this can be as simple as the command you used to run the
249248
tests.
250249
- If you tested the PR manually, include the steps and the outputs. Help a
251250
future editor understand how to test the code that you're modifying
252251
today.
252+
- If your PR contains or is representative of a feature/bug fix that should be
253+
called out in the release notes, please add a label for "Release notes: \<area\>",
254+
where \<area\> describes which part of ExecuTorch the change pertains to, e.g.
255+
"Release notes: runtime". Here are all of the categories:
256+
- `Release notes: runtime`: changes related to the core runtime which loads the program methods, initializes delegates, and runs the lowered graph.
257+
- `Release notes: exir`: changes to any internal representations, such as any edge-related dialects. Also any changes to passes that may modify the exir, such as memory planning.
258+
- `Release notes: quantization`: changes to quantization.
259+
- `Release notes: ops & kernels`: changes to the opset and any new / changed kernel implementations.
260+
- `Release notes: api`: changes to public facing apis (any interfaces, pybinded runtime methods, etc.).
261+
- `Release notes: backends`: changes to any of the backend delegates.
262+
- `Release notes: build`: changes related to the build system, including major dependency upgrades, notable build flags, optimizations, etc.
263+
- `Release notes: devtools`: changes to any of ExecuTorch's developer tools, for example the debugger & profiler.
264+
- `Release notes: examples`: changes to any code under `examples/`.
265+
- `Release notes: misc`: anything notable that doesn't belong in the above categories.
253266
- See https://github.com/pytorch/executorch/pull/3612 for an example PR that
254267
follows this advice.
255268
1. Before asking for a review, ensure that all [CI (continuous integration)

backends/apple/coreml/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ To quantize a Program in a Core ML favored way, the client may utilize **CoreMLQ
6565
import torch
6666
import executorch.exir
6767

68-
from torch._export import capture_pre_autograd_graph
68+
from torch.export import export_for_training
6969
from torch.ao.quantization.quantize_pt2e import (
7070
convert_pt2e,
7171
prepare_pt2e,
@@ -93,7 +93,7 @@ class Model(torch.nn.Module):
9393
source_model = Model()
9494
example_inputs = (torch.randn((1, 3, 256, 256)), )
9595

96-
pre_autograd_aten_dialect = capture_pre_autograd_graph(model, example_inputs)
96+
pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()
9797

9898
quantization_config = LinearQuantizerConfig.from_dict(
9999
{

backends/cadence/aot/functions.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@
154154
kernels:
155155
- arg_meta: null
156156
kernel_name: impl::reference::quantized_layer_norm_out
157+
- func: cadence::quantized_layer_norm.per_tensor_out(Tensor input, float in_scale, int in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
158+
kernels:
159+
- arg_meta: null
160+
kernel_name: impl::reference::quantized_layer_norm_per_tensor_out
157161

158162
- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
159163
kernels:

backends/cadence/aot/functions_hifi.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@
125125
kernels:
126126
- arg_meta: null
127127
kernel_name: cadence::impl::HiFi::quantized_layer_norm_out
128+
- func: cadence::quantized_layer_norm.per_tensor_out(Tensor input, float in_scale, int in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
129+
kernels:
130+
- arg_meta: null
131+
kernel_name: cadence::impl::HiFi::quantized_layer_norm_per_tensor_out
128132

129133
- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
130134
kernels:

backends/cadence/aot/ops_registrations.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@
3636
lib.define(
3737
"quantized_layer_norm.out(Tensor X, Tensor X_scale, Tensor X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor (a!)"
3838
)
39+
lib.define(
40+
"quantized_layer_norm.per_tensor(Tensor X, float X_scale, int X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point) -> (Tensor Y)"
41+
)
42+
lib.define(
43+
"quantized_layer_norm.per_tensor_out(Tensor X, float X_scale, int X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor (a!)"
44+
)
3945

4046
lib.define(
4147
"quantized_linear(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
@@ -180,6 +186,21 @@ def quantized_layer_norm_meta(
180186
return input.new_empty(input.size(), dtype=input.dtype)
181187

182188

189+
@register_fake("cadence::quantized_layer_norm.per_tensor")
190+
def quantized_layer_norm_per_tensor_meta(
191+
input: torch.Tensor,
192+
X_scale: float,
193+
X_zero_point: int,
194+
normalized_shape: int,
195+
weight: torch.Tensor,
196+
bias: torch.Tensor,
197+
eps: float,
198+
output_scale: float,
199+
output_zero_point: int,
200+
) -> torch.Tensor:
201+
return input.new_empty(input.size(), dtype=input.dtype)
202+
203+
183204
@register_fake("cadence::quantized_relu")
184205
def quantized_relu_meta(
185206
X: torch.Tensor,

backends/cadence/hifi/operators/quantized_layer_norm.cpp

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ namespace native {
2727
// Compute quantized layer_norm. The current implementation assumes that the
2828
// input is per-tensor quantized.
2929
template <typename T>
30-
void quantized_layer_norm_(
30+
void quantized_layer_norm_per_tensor_(
3131
const Tensor& input,
3232
float input_scale,
3333
int64_t input_zero_point,
@@ -107,7 +107,7 @@ void quantized_layer_norm_(
107107
int64_t input_zero_point = in_zero_point.const_data_ptr<int64_t>()[0];
108108

109109
// Call other overload
110-
quantized_layer_norm_<T>(
110+
quantized_layer_norm_per_tensor_<T>(
111111
input,
112112
input_scale,
113113
input_zero_point,
@@ -120,7 +120,7 @@ void quantized_layer_norm_(
120120
}
121121

122122
void quantized_layer_norm_out(
123-
KernelRuntimeContext& ctx,
123+
__ET_UNUSED KernelRuntimeContext& ctx,
124124
const Tensor& input,
125125
const Tensor& in_scale,
126126
const Tensor& in_zero_point,
@@ -157,6 +157,44 @@ void quantized_layer_norm_out(
157157
#undef typed_quantized_layer_norm
158158
}
159159

160+
void quantized_layer_norm_per_tensor_out(
161+
__ET_UNUSED KernelRuntimeContext& ctx,
162+
const Tensor& input,
163+
double in_scale,
164+
int64_t in_zero_point,
165+
__ET_UNUSED const IntArrayRef normalized_shape,
166+
const Tensor& weight,
167+
const Tensor& bias,
168+
double eps,
169+
double output_scale,
170+
int64_t output_zero_point,
171+
Tensor& out) {
172+
#define typed_quantized_layer_norm(ctype, dtype) \
173+
case ScalarType::dtype: { \
174+
quantized_layer_norm_per_tensor_<ctype>( \
175+
input, \
176+
in_scale, \
177+
in_zero_point, \
178+
weight, \
179+
bias, \
180+
eps, \
181+
output_scale, \
182+
output_zero_point, \
183+
out); \
184+
break; \
185+
}
186+
187+
ScalarType dtype = input.scalar_type();
188+
switch (dtype) {
189+
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_layer_norm)
190+
default:
191+
ET_DCHECK_MSG(
192+
false, "Unhandled dtype %s", torch::executor::toString(dtype));
193+
}
194+
195+
#undef typed_quantized_layer_norm
196+
}
197+
160198
}; // namespace native
161199
}; // namespace HiFi
162200
}; // namespace impl

backends/cadence/reference/operators/quantized_layer_norm.cpp

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111

1212
#include <cmath>
1313

14-
using executorch::aten::Tensor;
15-
using executorch::runtime::getLeadingDims;
16-
using executorch::runtime::KernelRuntimeContext;
14+
using ::executorch::aten::IntArrayRef;
15+
using ::executorch::aten::ScalarType;
16+
using ::executorch::aten::Tensor;
17+
using ::executorch::runtime::getLeadingDims;
18+
using ::executorch::runtime::KernelRuntimeContext;
1719

1820
namespace impl {
1921
namespace reference {
@@ -22,7 +24,7 @@ namespace native {
2224
// Compute quantized layer_norm. The current implementation assumes that the
2325
// input is per-tensor quantized.
2426
template <typename T>
25-
void quantized_layer_norm_(
27+
void quantized_layer_norm_per_tensor_(
2628
const Tensor& input,
2729
double input_scale,
2830
int64_t input_zero_point,
@@ -98,7 +100,7 @@ void quantized_layer_norm_(
98100
int64_t input_zero_point = in_zero_point.const_data_ptr<int64_t>()[0];
99101

100102
// Call other overload
101-
quantized_layer_norm_<T>(
103+
quantized_layer_norm_per_tensor_<T>(
102104
input,
103105
input_scale,
104106
input_zero_point,
@@ -111,11 +113,11 @@ void quantized_layer_norm_(
111113
}
112114

113115
void quantized_layer_norm_out(
114-
KernelRuntimeContext& ctx,
116+
__ET_UNUSED KernelRuntimeContext& ctx,
115117
const Tensor& input,
116118
const Tensor& in_scale,
117119
const Tensor& in_zero_point,
118-
const executorch::aten::IntArrayRef normalized_shape,
120+
__ET_UNUSED const executorch::aten::IntArrayRef normalized_shape,
119121
const Tensor& weight,
120122
const Tensor& bias,
121123
double eps,
@@ -152,6 +154,48 @@ void quantized_layer_norm_out(
152154
}
153155
}
154156

157+
void quantized_layer_norm_per_tensor_out(
158+
__ET_UNUSED KernelRuntimeContext& ctx,
159+
const Tensor& input,
160+
double in_scale,
161+
int64_t in_zero_point,
162+
__ET_UNUSED const executorch::aten::IntArrayRef normalized_shape,
163+
const Tensor& weight,
164+
const Tensor& bias,
165+
double eps,
166+
double output_scale,
167+
int64_t output_zero_point,
168+
Tensor& out) {
169+
if (input.scalar_type() == executorch::aten::ScalarType::Byte) {
170+
quantized_layer_norm_per_tensor_<uint8_t>(
171+
input,
172+
in_scale,
173+
in_zero_point,
174+
weight,
175+
bias,
176+
eps,
177+
output_scale,
178+
output_zero_point,
179+
out);
180+
} else if (input.scalar_type() == executorch::aten::ScalarType::Char) {
181+
quantized_layer_norm_per_tensor_<int8_t>(
182+
input,
183+
in_scale,
184+
in_zero_point,
185+
weight,
186+
bias,
187+
eps,
188+
output_scale,
189+
output_zero_point,
190+
out);
191+
} else {
192+
ET_CHECK_MSG(
193+
false,
194+
"Unhandled input dtype %hhd",
195+
static_cast<int8_t>(input.scalar_type()));
196+
}
197+
}
198+
155199
}; // namespace native
156200
}; // namespace reference
157201
}; // namespace impl

backends/qualcomm/runtime/backends/QnnFunctionInterface.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class QnnInterface {
7070
DEFINE_SHIM_FUNCTION_INTERFACE(log_set_log_level, logSetLogLevel);
7171
// --------- QnnProfile ---------
7272
DEFINE_SHIM_FUNCTION_INTERFACE(profile_create, profileCreate);
73+
DEFINE_SHIM_FUNCTION_INTERFACE(profile_set_config, profileSetConfig);
7374
DEFINE_SHIM_FUNCTION_INTERFACE(profile_get_events, profileGetEvents);
7475
DEFINE_SHIM_FUNCTION_INTERFACE(profile_get_sub_events, profileGetSubEvents);
7576
DEFINE_SHIM_FUNCTION_INTERFACE(profile_get_event_data, profileGetEventData);

0 commit comments

Comments
 (0)