Skip to content

Commit 650869c

Browse files
committed
Update base for Update on "[ET-VK] Introduce vTensorPtr to prevent reference invalidation and remove get_val() API"
## Context Currently when writing operators developers will save a reference to a `vTensor` retrieved from a `ComputeGraph`'s list of `values_` like so: ``` vTensor& vten = graph.get_val(vref).toTensor(); ``` However, this is dangerous since if any values are added once the reference has been stored, `values_` which is a `std::vector` may have been resized and therefore have its contents moved, meaning the reference is now invalid. To protect against this, this changeset introduces the `vTensorPtr` class which is a wrapper around a `vTensor*`. When constructed, it will increment a counter in the `ComputeGraph` instance, and when destroyed it will decrement the counter. `ComputeGraph` cannot add any values while the counter is not zero. Since `Value` can be converted to other non-trivial types, this changeset also removes the `get_val` function entirely to guard against unsafe behaviour. Differential Revision: [D55984187](https://our.internmc.facebook.com/intern/diff/D55984187/) [ghstack-poisoned]
2 parents 39dc8e6 + 62a4dd3 commit 650869c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+809
-327
lines changed

CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
144144

145145
option(EXECUTORCH_BUILD_CUSTOM "Build the custom kernels" OFF)
146146

147+
option(EXECUTORCH_BUILD_CUSTOM_OPS_AOT "Build the custom ops lib for AOT" OFF)
148+
147149
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
148150
OFF)
149151

@@ -185,12 +187,19 @@ cmake_dependent_option(
185187
cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
186188
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
187189

190+
if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
191+
set(EXECUTORCH_BUILD_CUSTOM ON)
192+
endif()
193+
188194
if(EXECUTORCH_BUILD_CUSTOM)
189195
set(EXECUTORCH_BUILD_OPTIMIZED ON)
190196
endif()
191197

192198
if(EXECUTORCH_BUILD_CPUINFO)
193199
# --- cpuinfo
200+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
201+
${CMAKE_POSITION_INDEPENDENT_CODE})
202+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
194203
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
195204
set(CPUINFO_BUILD_TOOLS
196205
OFF
@@ -212,10 +221,15 @@ if(EXECUTORCH_BUILD_CPUINFO)
212221
CACHE STRING "")
213222
set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
214223
add_subdirectory("${CPUINFO_SOURCE_DIR}")
224+
set(CMAKE_POSITION_INDEPENDENT_CODE
225+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
215226
endif()
216227

217228
if(EXECUTORCH_BUILD_PTHREADPOOL)
218229
# --- pthreadpool
230+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
231+
${CMAKE_POSITION_INDEPENDENT_CODE})
232+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
219233
set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
220234
set(PTHREADPOOL_BUILD_TESTS
221235
OFF
@@ -235,6 +249,8 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
235249
CACHE STRING "")
236250
endif()
237251
add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
252+
set(CMAKE_POSITION_INDEPENDENT_CODE
253+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
238254
endif()
239255

240256
if(NOT PYTHON_EXECUTABLE)
@@ -546,6 +562,9 @@ if(EXECUTORCH_BUILD_PYBIND)
546562
list(APPEND _dep_libs custom_ops)
547563
endif()
548564

565+
if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
566+
list(APPEND _dep_libs custom_ops_aot_lib)
567+
endif()
549568
# compile options for pybind
550569

551570
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti

backends/qualcomm/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ target_link_libraries(qnn_executorch_backend
253253
qnn_executorch_header
254254
qnn_schema
255255
qnn_manager
256-
executorch
256+
executorch_no_prim_ops
257257
qcir_utils
258258
)
259259
target_link_libraries(utils

backends/xnnpack/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ add_library(xnnpack_backend STATIC ${_xnnpack_backend__srcs})
8181
target_link_libraries(xnnpack_backend
8282
PRIVATE
8383
${xnnpack_third_party}
84-
executorch
84+
executorch_no_prim_ops
8585
xnnpack_schema)
8686

8787
target_include_directories(xnnpack_backend

docs/source/build-run-qualcomm-ai-engine-direct-backend.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct b
115115

116116
```bash
117117
cd $EXECUTORCH_ROOT
118+
# Workaround for fbs files in exir/_serialize
119+
cp schema/program.fbs exir/_serialize/program.fbs
120+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
121+
118122
mkdir build_x86_64
119123
cd build_x86_64
120124
cmake .. -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=${QNN_SDK_ROOT}
@@ -138,8 +142,8 @@ mkdir build_android
138142
cd build_android
139143
# build executorch & qnn_executorch_backend
140144
cmake .. \
141-
-DBUCK2=buck2 \
142145
-DCMAKE_INSTALL_PREFIX=$PWD \
146+
-DEXECUTORCH_BUILD_SDK=ON \
143147
-DEXECUTORCH_BUILD_QNN=ON \
144148
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
145149
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
@@ -220,6 +224,7 @@ So, we can run `qnn_executor_runner` like
220224
```bash
221225
adb push ./deeplab_v3/dlv3_qnn.pte ${DEVICE_DIR}
222226
adb push ${EXECUTORCH_ROOT}/build_android/examples/qualcomm/qnn_executor_runner ${DEVICE_DIR}
227+
adb push ${EXECUTORCH_ROOT}/build_android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
223228
adb shell "cd ${DEVICE_DIR} \
224229
&& export LD_LIBRARY_PATH=${DEVICE_DIR} \
225230
&& export ADSP_LIBRARY_PATH=${DEVICE_DIR} \

docs/source/build-run-xtensa.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ Step 2. Make sure you have completed the ExecuTorch setup tutorials linked to at
6464
The working tree is:
6565

6666
```
67-
examples/xtensa/
67+
examples/cadence/
6868
├── aot
6969
├── kernels
7070
├── ops
@@ -75,7 +75,7 @@ examples/xtensa/
7575

7676
***AoT (Ahead-of-Time) Components***:
7777

78-
The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [meta_registrations.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/meta_registrations.py) and have corresponding implemetations in the other folders.
78+
The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [meta_registrations.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/meta_registrations.py) and have corresponding implemetations in the other folders.
7979

8080
***Operators***:
8181

@@ -101,14 +101,14 @@ python3 -m examples.portable.scripts.export --model_name="add"
101101
***Quantized Operators***:
102102

103103
The other, more complex model are custom operators, including:
104-
- a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/xtensa/tests/quantized_linear_example.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models.
105-
- a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/xtensa/tests/quantized_conv1d_example.py#L36). Convolutions are important in wake word and many denoising models.
104+
- a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_linear_example.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models.
105+
- a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_conv1d_example.py#L36). Convolutions are important in wake word and many denoising models.
106106

107107
In both cases the generated file is called `XtensaDemoModel.pte`.
108108

109109
```bash
110110
cd executorch
111-
python3 -m examples.xtensa.tests.quantized_<linear,conv1d>_example
111+
python3 -m examples.cadence.tests.quantized_<linear,conv1d>_example
112112
```
113113

114114
***Small Model: RNNT predictor***:
@@ -118,7 +118,7 @@ The predictor is a sequence of basic ops (embedding, ReLU, linear, layer norm) a
118118

119119
```bash
120120
cd executorch
121-
python3 -m examples.xtensa.tests.rnnt_predictor_quantized_example
121+
python3 -m examples.cadence.tests.rnnt_predictor_quantized_example
122122
```
123123

124124
The generated file is called `XtensaDemoModel.pte`.
@@ -131,7 +131,7 @@ In this step, you'll be building the DSP firmware image that consists of the sam
131131
***Step 1***. Configure the environment variables needed to point to the Xtensa toolchain that you have installed in the previous step. The three environment variables that need to be set include:
132132
```bash
133133
# Directory in which the Xtensa toolchain was installed
134-
export XTENSA_TOOLCHAIN=/home/user_name/xtensa/XtDevTools/install/tools
134+
export XTENSA_TOOLCHAIN=/home/user_name/cadence/XtDevTools/install/tools
135135
# The version of the toolchain that was installed. This is essentially the name of the directory
136136
# that is present in the XTENSA_TOOLCHAIN directory from above.
137137
export TOOLCHAIN_VER=RI-2021.8-linux
@@ -151,7 +151,7 @@ cd executorch
151151
rm -rf cmake-out
152152
# prebuild and install executorch library
153153
cmake -DBUCK2=buck2 \
154-
-DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/xtensa/xtensa.cmake \
154+
-DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/cadence/cadence.cmake \
155155
-DCMAKE_INSTALL_PREFIX=cmake-out \
156156
-DCMAKE_BUILD_TYPE=Debug \
157157
-DPYTHON_EXECUTABLE=python3 \
@@ -165,18 +165,18 @@ cmake -DBUCK2=buck2 \
165165
-Bcmake-out .
166166

167167
cmake --build cmake-out -j8 --target install --config Debug
168-
# build xtensa runner
168+
# build cadence runner
169169
cmake -DCMAKE_BUILD_TYPE=Debug \
170-
-DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/xtensa/xtensa.cmake \
170+
-DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/cadence/cadence.cmake \
171171
-DCMAKE_PREFIX_PATH=<path_to_executorch>/cmake-out \
172172
-DMODEL_PATH=<path_to_program_file_generated_in_previous_step> \
173173
-DNXP_SDK_ROOT_DIR=<path_to_nxp_sdk_root> -DEXECUTORCH_BUILD_FLATC=0 \
174174
-DFLATC_EXECUTABLE="$(which flatc)" \
175175
-DNN_LIB_BASE_DIR=<path_to_nnlib_cloned_in_step_2> \
176-
-Bcmake-out/examples/xtensa \
177-
examples/xtensa
176+
-Bcmake-out/examples/cadence \
177+
examples/cadence
178178

179-
cmake --build cmake-out/examples/xtensa -j8 -t xtensa_executorch_example
179+
cmake --build cmake-out/examples/cadence -j8 -t cadence_executorch_example
180180
```
181181

182182
After having succesfully run the above step you should see two binary files in their CMake output directory.
@@ -213,6 +213,6 @@ First 20 elements of output 0
213213

214214
In this tutorial, you have learned how to export a quantized operation, build the ExecuTorch runtime and run this model on the Xtensa HiFi4 DSP chip.
215215

216-
The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/examples/xtensa/ops) and [kernels](https://github.com/pytorch/executorch/blob/main/examples/xtensa/kernels).
216+
The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/examples/cadence/ops) and [kernels](https://github.com/pytorch/executorch/blob/main/examples/cadence/kernels).
217217

218218
Other models can be created following the same structure, always assuming that operators and kernels are available.

examples/xtensa/CMakeLists.txt renamed to examples/cadence/CMakeLists.txt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ if(NOT CMAKE_CXX_STANDARD)
1212
endif()
1313

1414
# Set the project name.
15-
project(xtensa_executorch_example)
15+
project(cadence_executorch_example)
1616

1717
# Source root directory for executorch.
1818
if(NOT EXECUTORCH_ROOT)
@@ -100,21 +100,21 @@ add_custom_command(
100100

101101
add_custom_target(gen_model_header DEPENDS ${CMAKE_BINARY_DIR}/model_pte.h)
102102

103-
add_executable(xtensa_executorch_example executor_runner.cpp)
104-
add_dependencies(xtensa_executorch_example gen_model_header)
103+
add_executable(cadence_executorch_example executor_runner.cpp)
104+
add_dependencies(cadence_executorch_example gen_model_header)
105105

106106
# lint_cmake: -linelength
107-
target_include_directories(xtensa_executorch_example PUBLIC ${ROOT_DIR}/..
107+
target_include_directories(cadence_executorch_example PUBLIC ${ROOT_DIR}/..
108108
${CMAKE_BINARY_DIR}
109109
${_common_include_directories})
110110

111-
target_link_options(xtensa_executorch_example PRIVATE
111+
target_link_options(cadence_executorch_example PRIVATE
112112
-mlsp=${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/xtensa/min-rt)
113-
target_link_libraries(xtensa_executorch_example dsp_mu_polling_libs
114-
xtensa_ops_lib extension_runner_util executorch)
113+
target_link_libraries(cadence_executorch_example dsp_mu_polling_libs
114+
cadence_ops_lib extension_runner_util executorch)
115115

116116
add_custom_command(
117-
TARGET xtensa_executorch_example
117+
TARGET cadence_executorch_example
118118
POST_BUILD
119119
COMMAND
120120
${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/utils/post_compilation.py
File renamed without changes.

examples/xtensa/aot/export_example.py renamed to examples/cadence/aot/export_example.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,20 @@
1717

1818
from .compiler import export_to_edge
1919
from .quantizer import (
20+
CadenceBaseQuantizer,
2021
QuantFusion,
21-
ReplacePT2DequantWithXtensaDequant,
22-
ReplacePT2QuantWithXtensaQuant,
23-
XtensaBaseQuantizer,
22+
ReplacePT2DequantWithCadenceDequant,
23+
ReplacePT2QuantWithCadenceQuant,
2424
)
2525

2626

2727
FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
2828
logging.basicConfig(level=logging.INFO, format=FORMAT)
2929

3030

31-
def export_xtensa_model(model, example_inputs):
31+
def export_model(model, example_inputs):
3232
# Quantizer
33-
quantizer = XtensaBaseQuantizer()
33+
quantizer = CadenceBaseQuantizer()
3434

3535
# Export
3636
model_exp = capture_pre_autograd_graph(model, example_inputs)
@@ -42,24 +42,24 @@ def export_xtensa_model(model, example_inputs):
4242
# Convert
4343
converted_model = convert_pt2e(prepared_model)
4444

45-
# pyre-fixme[16]: Pyre doesn't get that XtensaQuantizer has a patterns attribute
45+
# pyre-fixme[16]: Pyre doesn't get that CadenceQuantizer has a patterns attribute
4646
patterns = [q.pattern for q in quantizer.quantizers]
4747
QuantFusion(patterns)(converted_model)
4848

49-
# Get edge program (note: the name will change to export_to_xtensa in future PRs)
49+
# Get edge program (note: the name will change to export_to_cadence in future PRs)
5050
edge_prog_manager = export_to_edge(converted_model, example_inputs, pt2_quant=True)
5151

5252
# Run a couple required passes for quant/dequant ops
53-
xtensa_prog_manager = edge_prog_manager.transform(
54-
[ReplacePT2QuantWithXtensaQuant(), ReplacePT2DequantWithXtensaDequant()],
53+
cadence_prog_manager = edge_prog_manager.transform(
54+
[ReplacePT2QuantWithCadenceQuant(), ReplacePT2DequantWithCadenceDequant()],
5555
check_ir_validity=False,
5656
)
5757

58-
exec_prog = xtensa_prog_manager.to_executorch()
58+
exec_prog = cadence_prog_manager.to_executorch()
5959

6060
logging.info(
6161
f"Final exported graph module:\n{exec_prog.exported_program().graph_module}"
6262
)
6363

64-
# Save the program as XtensaDemoModel.pte
65-
save_pte_program(exec_prog, "XtensaDemoModel")
64+
# Save the program as CadenceDemoModel.pte
65+
save_pte_program(exec_prog, "CadenceDemoModel")

examples/xtensa/aot/meta_registrations.py renamed to examples/cadence/aot/meta_registrations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from .utils import get_conv1d_output_size
1414

15-
lib = Library("xtensa", "DEF")
15+
lib = Library("cadence", "DEF")
1616

1717
lib.define(
1818
"quantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
@@ -56,7 +56,7 @@
5656
"quantized_conv.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"
5757
)
5858

59-
m = Library("xtensa", "IMPL", "Meta")
59+
m = Library("cadence", "IMPL", "Meta")
6060

6161

6262
@impl(m, "quantize_per_tensor")

0 commit comments

Comments
 (0)