Skip to content

Commit 6ccb290

Browse files
authored
Switch to the new tensor API internally.
Differential Revision: D61959575 Pull Request resolved: #5111
1 parent cea5abb commit 6ccb290

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+206
-265
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ set_up_aot() {
2929
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
3030
-DEXECUTORCH_BUILD_SDK=ON \
3131
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
32+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3233
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
3334
-DPYTHON_EXECUTABLE=python3 \
3435
-DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF

.ci/scripts/build_llama_android.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ install_executorch_and_backend_lib() {
2222
-DANDROID_PLATFORM=android-23 \
2323
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
2424
-DCMAKE_BUILD_TYPE=Release \
25-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
2625
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
26+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
27+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
2728
-DEXECUTORCH_BUILD_XNNPACK=ON \
2829
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2930
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.ci/scripts/test_llama.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,9 @@ cmake_install_executorch_libraries() {
107107
retry cmake \
108108
-DCMAKE_INSTALL_PREFIX=cmake-out \
109109
-DCMAKE_BUILD_TYPE=Debug \
110-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
111110
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
111+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
112+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
112113
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
113114
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
114115
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.ci/scripts/test_llava.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ cmake_install_executorch_libraries() {
2020
cmake \
2121
-DCMAKE_INSTALL_PREFIX=cmake-out \
2222
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
23-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
2423
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
24+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
25+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
2526
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
2627
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2728
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
@@ -61,7 +62,7 @@ export_llava() {
6162
# Download a new image with different size, to test if the model can handle different image sizes
6263
prepare_image_tensor() {
6364
echo "Downloading image"
64-
curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
65+
curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
6566
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
6667
}
6768

backends/qualcomm/scripts/build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ if [ "$BUILD_AARCH64" = true ]; then
8181
-DEXECUTORCH_BUILD_QNN=ON \
8282
-DEXECUTORCH_BUILD_SDK=ON \
8383
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
84+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
8485
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
8586
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
8687
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
@@ -124,6 +125,7 @@ if [ "$BUILD_X86_64" = true ]; then
124125
-DEXECUTORCH_BUILD_QNN=ON \
125126
-DEXECUTORCH_BUILD_SDK=ON \
126127
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
128+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
127129
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
128130
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
129131
-S $PRJ_ROOT \

backends/vulkan/docs/android_demo.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,9 @@ binary using the Android NDK toolchain.
9494
cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
9595
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
9696
-DANDROID_ABI=$ANDROID_ABI \
97-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
9897
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
98+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
99+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
99100
-DEXECUTORCH_BUILD_VULKAN=ON \
100101
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
101102
-DPYTHON_EXECUTABLE=python \

backends/xnnpack/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,10 @@ mkdir cmake-out
105105
cmake \
106106
-DCMAKE_INSTALL_PREFIX=cmake-out \
107107
-DCMAKE_BUILD_TYPE=Release \
108+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
108109
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
110+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
109111
-DEXECUTORCH_BUILD_XNNPACK=ON \
110-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
111112
-DEXECUTORCH_ENABLE_LOGGING=ON \
112113
-DPYTHON_EXECUTABLE=python \
113114
-Bcmake-out .

build/build_android_llm_demo.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ build_android_native_library() {
3838
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
3939
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
41+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
4142
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
4243
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
4344
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \

docs/source/build-run-qualcomm-ai-engine-direct-backend.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ cmake .. \
136136
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
137137
-DEXECUTORCH_BUILD_SDK=ON \
138138
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
139+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
139140
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
140141
-DPYTHON_EXECUTABLE=python3 \
141142
-DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
@@ -167,6 +168,7 @@ cmake .. \
167168
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
168169
-DEXECUTORCH_BUILD_SDK=ON \
169170
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
171+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
170172
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
171173
-DPYTHON_EXECUTABLE=python3 \
172174
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \

docs/source/llm/getting-started.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ Create a file called main.cpp with the following contents:
201201

202202
#include "basic_sampler.h"
203203
#include "basic_tokenizer.h"
204-
#include "managed_tensor.h"
205204

206205
#include <executorch/extension/module/module.h>
206+
#include <executorch/extension/tensor/tensor.h>
207207
#include <executorch/runtime/core/evalue.h>
208208
#include <executorch/runtime/core/exec_aten/exec_aten.h>
209209
#include <executorch/runtime/core/result.h>
@@ -244,14 +244,13 @@ std::string generate(
244244
for (auto i = 0u; i < max_output_length; i++) {
245245
// Convert the input_tokens from a vector of int64_t to EValue.
246246
// EValue is a unified data type in the ExecuTorch runtime.
247-
ManagedTensor tensor_tokens(
247+
auto inputs = from_blob(
248248
input_tokens.data(),
249249
{1, static_cast<int>(input_tokens.size())},
250250
ScalarType::Long);
251-
std::vector<EValue> inputs = {tensor_tokens.get_tensor()};
252251

253252
// Run the model. It will return a tensor of logits (log-probabilities).
254-
Result<std::vector<EValue>> logits_evalue = llm_model.forward(inputs);
253+
auto logits_evalue = llm_model.forward(inputs);
255254

256255
// Convert the output logits from EValue to std::vector, which is what
257256
// the sampler expects.
@@ -339,7 +338,6 @@ Finally, download the following files into the same directory as main.h:
339338
```
340339
curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/basic_sampler.h
341340
curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/basic_tokenizer.h
342-
curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/managed_tensor.h
343341
```
344342

345343
To learn more, see the [Runtime APIs Tutorial](../extension-module.md).
@@ -364,6 +362,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
364362
# Set options for executorch build.
365363
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
366364
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
365+
option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
367366
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
368367
369368
# Include the executorch subdirectory.
@@ -377,6 +376,7 @@ target_link_libraries(
377376
PRIVATE
378377
executorch
379378
extension_module_static # Provides the Module class
379+
extension_tensor # Provides the TensorPtr class
380380
optimized_native_cpu_ops_lib) # Provides baseline cross-platform kernels
381381
```
382382

@@ -386,7 +386,6 @@ At this point, the working directory should contain the following files:
386386
- main.cpp
387387
- basic_tokenizer.h
388388
- basic_sampler.h
389-
- managed_tensor.h
390389
- export_nanogpt.py
391390
- model.py
392391
- vocab.json
@@ -518,6 +517,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
518517
# Set options for executorch build.
519518
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
520519
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
520+
option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
521521
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
522522
option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend
523523
@@ -534,6 +534,7 @@ target_link_libraries(
534534
PRIVATE
535535
executorch
536536
extension_module_static # Provides the Module class
537+
extension_tensor # Provides the TensorPtr class
537538
optimized_native_cpu_ops_lib # Provides baseline cross-platform kernels
538539
xnnpack_backend) # Provides the XNNPACK CPU acceleration backend
539540
```
@@ -548,7 +549,6 @@ At this point, the working directory should contain the following files:
548549
- main.cpp
549550
- basic_tokenizer.h
550551
- basic_sampler.h
551-
- managed_tensor.h
552552
- export_nanogpt.py
553553
- model.py
554554
- vocab.json

docs/source/tutorial-xnnpack-delegate-lowering.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,10 @@ mkdir cmake-out
149149
cmake \
150150
-DCMAKE_INSTALL_PREFIX=cmake-out \
151151
-DCMAKE_BUILD_TYPE=Release \
152+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
152153
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
154+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
153155
-DEXECUTORCH_BUILD_XNNPACK=ON \
154-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
155156
-DEXECUTORCH_ENABLE_LOGGING=ON \
156157
-DPYTHON_EXECUTABLE=python \
157158
-Bcmake-out .

examples/demo-apps/android/ExecuTorchDemo/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
7878
-DEXECUTORCH_BUILD_XNNPACK=ON \
7979
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
8080
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
81+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
8182
-Bcmake-android-out
8283

8384
cmake --build cmake-android-out -j16 --target install
@@ -119,6 +120,7 @@ cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
119120
-DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
120121
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
121122
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
123+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
122124
-Bcmake-android-out
123125

124126
cmake --build cmake-android-out -j16 --target install

examples/demo-apps/android/ExecuTorchDemo/setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1515
-DEXECUTORCH_BUILD_XNNPACK=ON \
1616
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
18+
-DEXECUTORCH_BUILD_EXTENSION_TESNOR=ON \
1819
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
1920
-DCMAKE_BUILD_TYPE=Release \
2021
-B"${CMAKE_OUT}"

examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1616
-DEXECUTORCH_BUILD_XNNPACK=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1818
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
19+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
1920
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
2021
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2122
-DEXECUTORCH_BUILD_QNN=ON \

examples/demo-apps/android/LlamaDemo/setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1616
-DEXECUTORCH_BUILD_XNNPACK=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1818
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
19+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
1920
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2021
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
2122
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \

examples/llm_manual/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
1313
# Set options for executorch build.
1414
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
1515
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
16+
option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
1617
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
1718
option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend
1819

@@ -29,6 +30,7 @@ target_link_libraries(
2930
nanogpt_runner
3031
PRIVATE executorch
3132
extension_module_static # Provides the Module class
33+
extension_tensor # Provides the TensorPtr class
3234
optimized_native_cpu_ops_lib # Provides baseline cross-platform
3335
# kernels
3436
xnnpack_backend

examples/llm_manual/main.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
#include "basic_sampler.h"
1212
#include "basic_tokenizer.h"
13-
#include "managed_tensor.h"
1413

1514
#include <executorch/extension/module/module.h>
15+
#include <executorch/extension/tensor/tensor.h>
1616
#include <executorch/runtime/core/evalue.h>
1717
#include <executorch/runtime/core/exec_aten/exec_aten.h>
1818
#include <executorch/runtime/core/result.h>
@@ -42,14 +42,13 @@ std::string generate(
4242
for (auto i = 0u; i < max_output_length; i++) {
4343
// Convert the input_tokens from a vector of int64_t to EValue.
4444
// EValue is a unified data type in the ExecuTorch runtime.
45-
ManagedTensor tensor_tokens(
45+
auto inputs = from_blob(
4646
input_tokens.data(),
4747
{1, static_cast<int>(input_tokens.size())},
4848
ScalarType::Long);
49-
std::vector<EValue> inputs = {tensor_tokens.get_tensor()};
5049

5150
// Run the model. It will return a tensor of logits (log-probabilities).
52-
Result<std::vector<EValue>> logits_evalue = llm_model.forward(inputs);
51+
auto logits_evalue = llm_model.forward(inputs);
5352

5453
// Convert the output logits from EValue to std::vector, which is what
5554
// the sampler expects.

examples/llm_manual/managed_tensor.h

Lines changed: 0 additions & 44 deletions
This file was deleted.

examples/models/flamingo/cross_attention/cross_attention_mask.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,11 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <executorch/examples/models/flamingo/cross_attention/cross_attention_mask.h>
10+
911
#include <algorithm>
1012
#include <string>
1113

12-
#include <executorch/examples/models/flamingo/cross_attention/cross_attention_mask.h>
13-
#include <executorch/extension/runner_util/managed_tensor.h>
14-
1514
namespace torch::executor {
1615

1716
// Fowrward declaration needed for ARM compilers.
@@ -97,7 +96,7 @@ std::vector<std::vector<int>> _get_image_attention_intervals(
9796
return vision_masks;
9897
}
9998

100-
std::vector<ManagedTensor> cross_attention_mask(
99+
std::vector<executorch::extension::TensorPtr> cross_attention_mask(
101100
const std::vector<int>& tokens,
102101
const std::vector<Tensor>& images,
103102
size_t tile_size,
@@ -121,7 +120,7 @@ std::vector<ManagedTensor> cross_attention_mask(
121120
// Create mask for each individual image based on its number of tokens,
122121
// which can vary based on number of tiles since they are not yet tile padded.
123122
// The masks are padded and concatenated together in the batch collator.
124-
std::vector<ManagedTensor> cross_attention_masks;
123+
std::vector<executorch::extension::TensorPtr> cross_attention_masks;
125124
size_t text_seq_len = tokens.size();
126125
for (size_t image_idx = 0; image_idx < image_intervals.size(); ++image_idx) {
127126
size_t n_tiles = images[image_idx].size(0);
@@ -140,7 +139,8 @@ std::vector<ManagedTensor> cross_attention_mask(
140139
size_t stride = image_seq_len;
141140
std::vector<int> mask_data(num_elements);
142141

143-
ManagedTensor mask(mask_data.data(), sizes, ScalarType::Int);
142+
auto mask = executorch::extension::from_blob(
143+
mask_data.data(), sizes, ScalarType::Int);
144144
cross_attention_masks.emplace_back(std::move(mask));
145145

146146
// Add the allocated data to the output vector.

0 commit comments

Comments
 (0)