Skip to content

Commit d8fba6e

Browse files
committed
Update base for Update on "[ET-VK] Integrate axis mapping into staging <-> image transfer shaders"
## Context Building on the previous diff, this diff integrates axis mapping into staging <-> image transfer shaders. Alternative versions of indexing utility functions are introduced to account for axis mapping. The impact of shader latency of using axis mapping on transfer shaders is examined in the next diff. Differential Revision: [D62210117](https://our.internmc.facebook.com/intern/diff/D62210117/) [ghstack-poisoned]
2 parents 7535ad3 + 9739609 commit d8fba6e

File tree

112 files changed

+1796
-759
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+1796
-759
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ set_up_aot() {
2929
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
3030
-DEXECUTORCH_BUILD_SDK=ON \
3131
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
32+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3233
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
3334
-DPYTHON_EXECUTABLE=python3 \
3435
-DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF

.ci/scripts/build_llama_android.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ install_executorch_and_backend_lib() {
2222
-DANDROID_PLATFORM=android-23 \
2323
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
2424
-DCMAKE_BUILD_TYPE=Release \
25-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
2625
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
26+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
27+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
2728
-DEXECUTORCH_BUILD_XNNPACK=ON \
2829
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2930
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.ci/scripts/test_llama.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,9 @@ cmake_install_executorch_libraries() {
107107
retry cmake \
108108
-DCMAKE_INSTALL_PREFIX=cmake-out \
109109
-DCMAKE_BUILD_TYPE=Debug \
110-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
111110
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
111+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
112+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
112113
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
113114
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
114115
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.ci/scripts/test_llava.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ cmake_install_executorch_libraries() {
2020
cmake \
2121
-DCMAKE_INSTALL_PREFIX=cmake-out \
2222
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
23-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
2423
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
24+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
25+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
2526
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
2627
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2728
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
@@ -61,7 +62,7 @@ export_llava() {
6162
# Download a new image with different size, to test if the model can handle different image sizes
6263
prepare_image_tensor() {
6364
echo "Downloading image"
64-
curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
65+
curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
6566
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
6667
}
6768

.github/workflows/upload-android-test-specs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
with:
4242
# Just use a small model here with a minimal amount of configuration to test the spec
4343
models: stories110M
44-
devices: samsung_galaxy_s2x
44+
devices: samsung_galaxy_s22
4545
delegates: xnnpack
4646
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/android-llm-device-farm-test-spec.yml
4747

backends/qualcomm/scripts/build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ if [ "$BUILD_AARCH64" = true ]; then
8181
-DEXECUTORCH_BUILD_QNN=ON \
8282
-DEXECUTORCH_BUILD_SDK=ON \
8383
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
84+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
8485
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
8586
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
8687
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
@@ -124,6 +125,7 @@ if [ "$BUILD_X86_64" = true ]; then
124125
-DEXECUTORCH_BUILD_QNN=ON \
125126
-DEXECUTORCH_BUILD_SDK=ON \
126127
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
128+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
127129
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
128130
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
129131
-S $PRJ_ROOT \

backends/vulkan/docs/android_demo.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,9 @@ binary using the Android NDK toolchain.
9494
cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
9595
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
9696
-DANDROID_ABI=$ANDROID_ABI \
97-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
9897
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
98+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
99+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
99100
-DEXECUTORCH_BUILD_VULKAN=ON \
100101
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
101102
-DPYTHON_EXECUTABLE=python \

backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ constexpr DimIndex kChannel4D = DimIndex::DIM_3RD_LAST;
3232
constexpr DimIndex kBatch4D = DimIndex::DIM_4TH_LAST;
3333

3434
inline DimIndex normalize_to_dim_index(const api::vTensor& v_in, int32_t dim) {
35-
return static_cast<DimIndex>(dim - v_in.dim());
35+
return dim < 0 ? static_cast<DimIndex>(dim)
36+
: static_cast<DimIndex>(dim - v_in.dim());
3637
}
3738

3839
/*

backends/xnnpack/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,10 @@ mkdir cmake-out
105105
cmake \
106106
-DCMAKE_INSTALL_PREFIX=cmake-out \
107107
-DCMAKE_BUILD_TYPE=Release \
108+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
108109
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
110+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
109111
-DEXECUTORCH_BUILD_XNNPACK=ON \
110-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
111112
-DEXECUTORCH_ENABLE_LOGGING=ON \
112113
-DPYTHON_EXECUTABLE=python \
113114
-Bcmake-out .

build/build_android_llm_demo.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ build_android_native_library() {
3838
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
3939
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
41+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
4142
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
4243
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
4344
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \

docs/source/build-run-qualcomm-ai-engine-direct-backend.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ cmake .. \
136136
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
137137
-DEXECUTORCH_BUILD_SDK=ON \
138138
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
139+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
139140
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
140141
-DPYTHON_EXECUTABLE=python3 \
141142
-DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
@@ -167,6 +168,7 @@ cmake .. \
167168
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
168169
-DEXECUTORCH_BUILD_SDK=ON \
169170
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
171+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
170172
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
171173
-DPYTHON_EXECUTABLE=python3 \
172174
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \

docs/source/llm/getting-started.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ Create a file called main.cpp with the following contents:
201201

202202
#include "basic_sampler.h"
203203
#include "basic_tokenizer.h"
204-
#include "managed_tensor.h"
205204

206205
#include <executorch/extension/module/module.h>
206+
#include <executorch/extension/tensor/tensor.h>
207207
#include <executorch/runtime/core/evalue.h>
208208
#include <executorch/runtime/core/exec_aten/exec_aten.h>
209209
#include <executorch/runtime/core/result.h>
@@ -244,14 +244,13 @@ std::string generate(
244244
for (auto i = 0u; i < max_output_length; i++) {
245245
// Convert the input_tokens from a vector of int64_t to EValue.
246246
// EValue is a unified data type in the ExecuTorch runtime.
247-
ManagedTensor tensor_tokens(
247+
auto inputs = from_blob(
248248
input_tokens.data(),
249249
{1, static_cast<int>(input_tokens.size())},
250250
ScalarType::Long);
251-
std::vector<EValue> inputs = {tensor_tokens.get_tensor()};
252251

253252
// Run the model. It will return a tensor of logits (log-probabilities).
254-
Result<std::vector<EValue>> logits_evalue = llm_model.forward(inputs);
253+
auto logits_evalue = llm_model.forward(inputs);
255254

256255
// Convert the output logits from EValue to std::vector, which is what
257256
// the sampler expects.
@@ -339,7 +338,6 @@ Finally, download the following files into the same directory as main.h:
339338
```
340339
curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/basic_sampler.h
341340
curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/basic_tokenizer.h
342-
curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/managed_tensor.h
343341
```
344342

345343
To learn more, see the [Runtime APIs Tutorial](../extension-module.md).
@@ -364,6 +362,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
364362
# Set options for executorch build.
365363
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
366364
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
365+
option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
367366
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
368367
369368
# Include the executorch subdirectory.
@@ -377,6 +376,7 @@ target_link_libraries(
377376
PRIVATE
378377
executorch
379378
extension_module_static # Provides the Module class
379+
extension_tensor # Provides the TensorPtr class
380380
optimized_native_cpu_ops_lib) # Provides baseline cross-platform kernels
381381
```
382382

@@ -386,7 +386,6 @@ At this point, the working directory should contain the following files:
386386
- main.cpp
387387
- basic_tokenizer.h
388388
- basic_sampler.h
389-
- managed_tensor.h
390389
- export_nanogpt.py
391390
- model.py
392391
- vocab.json
@@ -518,6 +517,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
518517
# Set options for executorch build.
519518
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
520519
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
520+
option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
521521
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
522522
option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend
523523
@@ -534,6 +534,7 @@ target_link_libraries(
534534
PRIVATE
535535
executorch
536536
extension_module_static # Provides the Module class
537+
extension_tensor # Provides the TensorPtr class
537538
optimized_native_cpu_ops_lib # Provides baseline cross-platform kernels
538539
xnnpack_backend) # Provides the XNNPACK CPU acceleration backend
539540
```
@@ -548,7 +549,6 @@ At this point, the working directory should contain the following files:
548549
- main.cpp
549550
- basic_tokenizer.h
550551
- basic_sampler.h
551-
- managed_tensor.h
552552
- export_nanogpt.py
553553
- model.py
554554
- vocab.json

docs/source/tutorial-xnnpack-delegate-lowering.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,10 @@ mkdir cmake-out
149149
cmake \
150150
-DCMAKE_INSTALL_PREFIX=cmake-out \
151151
-DCMAKE_BUILD_TYPE=Release \
152+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
152153
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
154+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
153155
-DEXECUTORCH_BUILD_XNNPACK=ON \
154-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
155156
-DEXECUTORCH_ENABLE_LOGGING=ON \
156157
-DPYTHON_EXECUTABLE=python \
157158
-Bcmake-out .

examples/demo-apps/android/ExecuTorchDemo/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
7878
-DEXECUTORCH_BUILD_XNNPACK=ON \
7979
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
8080
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
81+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
8182
-Bcmake-android-out
8283

8384
cmake --build cmake-android-out -j16 --target install
@@ -119,6 +120,7 @@ cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
119120
-DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
120121
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
121122
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
123+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
122124
-Bcmake-android-out
123125

124126
cmake --build cmake-android-out -j16 --target install

examples/demo-apps/android/ExecuTorchDemo/setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1515
-DEXECUTORCH_BUILD_XNNPACK=ON \
1616
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
18+
-DEXECUTORCH_BUILD_EXTENSION_TESNOR=ON \
1819
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
1920
-DCMAKE_BUILD_TYPE=Release \
2021
-B"${CMAKE_OUT}"

examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,30 @@ phases:
7373
fi
7474
fi;
7575
76+
# Run the new generic benchmark activity https://developer.android.com/tools/adb#am
77+
- echo "Run LLM benchmark"
78+
- |
79+
adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n com.example.executorchllamademo/.LlmBenchmarkRunner \
80+
--es "model_dir" "/data/local/tmp/llama" \
81+
--es "tokenizer_path" "/data/local/tmp/llama/tokenizer.bin"
82+
7683
post_test:
7784
commands:
85+
- echo "Gather LLM benchmark results"
86+
- |
87+
BENCHMARK_RESULTS=""
88+
ATTEMPT=0
89+
MAX_ATTEMPT=10
90+
while [ -z "${BENCHMARK_RESULTS}" ] && [ $ATTEMPT -lt $MAX_ATTEMPT ]; do
91+
echo "Waiting for benchmark results..."
92+
BENCHMARK_RESULTS=$(adb -s $DEVICEFARM_DEVICE_UDID shell run-as com.example.executorchllamademo cat files/benchmark_results.json)
93+
sleep 30
94+
((ATTEMPT++))
95+
done
96+
97+
adb -s $DEVICEFARM_DEVICE_UDID shell run-as com.example.executorchllamademo ls -la files/
98+
# Trying to pull the file using adb ends up with permission error, but this works too, so why not
99+
echo "${BENCHMARK_RESULTS}" > $DEVICEFARM_LOG_DIR/benchmark_results.json
78100
79101
artifacts:
80102
# By default, Device Farm will collect your artifacts from the $DEVICEFARM_LOG_DIR directory.

examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414
import android.util.Log;
1515
import android.widget.TextView;
1616
import androidx.annotation.NonNull;
17+
import com.google.gson.Gson;
18+
import java.io.File;
1719
import java.io.FileWriter;
1820
import java.io.IOException;
21+
import java.util.Arrays;
1922

2023
public class LlmBenchmarkRunner extends Activity implements ModelRunnerCallback {
2124
ModelRunner mModelRunner;
@@ -32,7 +35,12 @@ protected void onCreate(Bundle savedInstanceState) {
3235

3336
Intent intent = getIntent();
3437

35-
String modelPath = intent.getStringExtra("model_path");
38+
File modelDir = new File(intent.getStringExtra("model_dir"));
39+
File model =
40+
Arrays.stream(modelDir.listFiles())
41+
.filter(file -> file.getName().endsWith(".pte"))
42+
.findFirst()
43+
.get();
3644
String tokenizerPath = intent.getStringExtra("tokenizer_path");
3745

3846
float temperature = intent.getFloatExtra("temperature", 0.8f);
@@ -42,7 +50,7 @@ protected void onCreate(Bundle savedInstanceState) {
4250
}
4351

4452
mStatsDump = new StatsDump();
45-
mModelRunner = new ModelRunner(modelPath, tokenizerPath, temperature, this);
53+
mModelRunner = new ModelRunner(model.getPath(), tokenizerPath, temperature, this);
4654
mStatsDump.loadStart = System.currentTimeMillis();
4755
}
4856

@@ -79,11 +87,21 @@ public void onGenerationStopped() {
7987
mTextView.append(mStatsDump.toString());
8088
});
8189

90+
// TODO (huydhn): Remove txt files here once the JSON format is ready
8291
try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.txt")) {
8392
writer.write(mStatsDump.toString());
8493
} catch (IOException e) {
8594
e.printStackTrace();
8695
}
96+
97+
// TODO (huydhn): Figure out on what the final JSON results looks like, we need something
98+
// with the same number of fields as https://github.com/pytorch/pytorch/pull/135042
99+
try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) {
100+
Gson gson = new Gson();
101+
writer.write(gson.toJson(mStatsDump));
102+
} catch (IOException e) {
103+
e.printStackTrace();
104+
}
87105
}
88106
}
89107

examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1616
-DEXECUTORCH_BUILD_XNNPACK=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1818
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
19+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
1920
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
2021
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2122
-DEXECUTORCH_BUILD_QNN=ON \

examples/demo-apps/android/LlamaDemo/setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1616
-DEXECUTORCH_BUILD_XNNPACK=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1818
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
19+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
1920
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
2021
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
2122
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \

examples/llm_manual/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
1313
# Set options for executorch build.
1414
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
1515
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
16+
option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
1617
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
1718
option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend
1819

@@ -29,6 +30,7 @@ target_link_libraries(
2930
nanogpt_runner
3031
PRIVATE executorch
3132
extension_module_static # Provides the Module class
33+
extension_tensor # Provides the TensorPtr class
3234
optimized_native_cpu_ops_lib # Provides baseline cross-platform
3335
# kernels
3436
xnnpack_backend

examples/llm_manual/main.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
#include "basic_sampler.h"
1212
#include "basic_tokenizer.h"
13-
#include "managed_tensor.h"
1413

1514
#include <executorch/extension/module/module.h>
15+
#include <executorch/extension/tensor/tensor.h>
1616
#include <executorch/runtime/core/evalue.h>
1717
#include <executorch/runtime/core/exec_aten/exec_aten.h>
1818
#include <executorch/runtime/core/result.h>
@@ -42,14 +42,13 @@ std::string generate(
4242
for (auto i = 0u; i < max_output_length; i++) {
4343
// Convert the input_tokens from a vector of int64_t to EValue.
4444
// EValue is a unified data type in the ExecuTorch runtime.
45-
ManagedTensor tensor_tokens(
45+
auto inputs = from_blob(
4646
input_tokens.data(),
4747
{1, static_cast<int>(input_tokens.size())},
4848
ScalarType::Long);
49-
std::vector<EValue> inputs = {tensor_tokens.get_tensor()};
5049

5150
// Run the model. It will return a tensor of logits (log-probabilities).
52-
Result<std::vector<EValue>> logits_evalue = llm_model.forward(inputs);
51+
auto logits_evalue = llm_model.forward(inputs);
5352

5453
// Convert the output logits from EValue to std::vector, which is what
5554
// the sampler expects.

0 commit comments

Comments
 (0)