pytorch
diff --git a/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/build_llama_android.sh
Lines changed: 2 additions & 1 deletion b/‎.ci/scripts/build_llama_android.sh
Lines changed: 2 additions & 1 deletion
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 2 additions & 1 deletion b/‎.ci/scripts/test_llama.sh
Lines changed: 2 additions & 1 deletion
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 3 additions & 2 deletions b/‎.ci/scripts/test_llava.sh
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/qualcomm/scripts/build.sh
Lines changed: 2 additions & 0 deletions b/‎backends/qualcomm/scripts/build.sh
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/vulkan/docs/android_demo.md
Lines changed: 2 additions & 1 deletion b/‎backends/vulkan/docs/android_demo.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/xnnpack/README.md
Lines changed: 2 additions & 1 deletion b/‎backends/xnnpack/README.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎build/build_android_llm_demo.sh
Lines changed: 1 addition & 0 deletions b/‎build/build_android_llm_demo.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/build-run-qualcomm-ai-engine-direct-backend.md
Lines changed: 2 additions & 0 deletions b/‎docs/source/build-run-qualcomm-ai-engine-direct-backend.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/llm/getting-started.md
Lines changed: 7 additions & 7 deletions b/‎docs/source/llm/getting-started.md
Lines changed: 7 additions & 7 deletions
diff --git a/‎docs/source/tutorial-xnnpack-delegate-lowering.md
Lines changed: 2 additions & 1 deletion b/‎docs/source/tutorial-xnnpack-delegate-lowering.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/demo-apps/android/ExecuTorchDemo/README.md
Lines changed: 2 additions & 0 deletions b/‎examples/demo-apps/android/ExecuTorchDemo/README.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/demo-apps/android/ExecuTorchDemo/setup.sh
Lines changed: 1 addition & 0 deletions b/‎examples/demo-apps/android/ExecuTorchDemo/setup.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh
Lines changed: 1 addition & 0 deletions b/‎examples/demo-apps/android/LlamaDemo/setup-with-qnn.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/demo-apps/android/LlamaDemo/setup.sh
Lines changed: 1 addition & 0 deletions b/‎examples/demo-apps/android/LlamaDemo/setup.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/llm_manual/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎examples/llm_manual/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/llm_manual/main.cpp
Lines changed: 3 additions & 4 deletions b/‎examples/llm_manual/main.cpp
Lines changed: 3 additions & 4 deletions
diff --git a/‎examples/llm_manual/managed_tensor.h
Lines changed: 0 additions & 44 deletions b/‎examples/llm_manual/managed_tensor.h
Lines changed: 0 additions & 44 deletions
diff --git a/‎examples/models/flamingo/cross_attention/cross_attention_mask.cpp
Lines changed: 6 additions & 6 deletions b/‎examples/models/flamingo/cross_attention/cross_attention_mask.cpp
Lines changed: 6 additions & 6 deletions
@@ -29,6 +29,7 @@ set_up_aot() {
       -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
       -DEXECUTORCH_BUILD_SDK=ON \
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
       -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
       -DPYTHON_EXECUTABLE=python3 \
       -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
 
@@ -22,8 +22,9 @@ install_executorch_and_backend_lib() {
     -DANDROID_PLATFORM=android-23 \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
 
@@ -107,8 +107,9 @@ cmake_install_executorch_libraries() {
     retry cmake \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Debug \
-        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
 
@@ -20,8 +20,9 @@ cmake_install_executorch_libraries() {
     cmake                                               \
         -DCMAKE_INSTALL_PREFIX=cmake-out                \
         -DCMAKE_BUILD_TYPE=${BUILD_TYPE}                \
-        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON          \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON     \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON          \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON          \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON            \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON         \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON         \
@@ -61,7 +62,7 @@ export_llava() {
 # Download a new image with different size, to test if the model can handle different image sizes
 prepare_image_tensor() {
     echo "Downloading image"
-    curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg 
+    curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
     $PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
 }
 
 
@@ -81,6 +81,7 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DEXECUTORCH_BUILD_QNN=ON \
         -DEXECUTORCH_BUILD_SDK=ON \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
         -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
@@ -124,6 +125,7 @@ if [ "$BUILD_X86_64" = true ]; then
         -DEXECUTORCH_BUILD_QNN=ON \
         -DEXECUTORCH_BUILD_SDK=ON \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
         -S $PRJ_ROOT \
 
@@ -94,8 +94,9 @@ binary using the Android NDK toolchain.
   cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI=$ANDROID_ABI \
-    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_VULKAN=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DPYTHON_EXECUTABLE=python \
 
@@ -105,9 +105,10 @@ mkdir cmake-out
 cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
-    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_ENABLE_LOGGING=ON \
     -DPYTHON_EXECUTABLE=python \
     -Bcmake-out .
 
@@ -38,6 +38,7 @@ build_android_native_library() {
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
 
@@ -136,6 +136,7 @@ cmake .. \
   -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
   -DEXECUTORCH_BUILD_SDK=ON \
   -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
   -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
   -DPYTHON_EXECUTABLE=python3 \
   -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
@@ -167,6 +168,7 @@ cmake .. \
     -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
     -DEXECUTORCH_BUILD_SDK=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
     -DPYTHON_EXECUTABLE=python3 \
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
 
@@ -201,9 +201,9 @@ Create a file called main.cpp with the following contents:
 
 #include "basic_sampler.h"
 #include "basic_tokenizer.h"
-#include "managed_tensor.h"
 
 #include <executorch/extension/module/module.h>
+#include <executorch/extension/tensor/tensor.h>
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/result.h>
@@ -244,14 +244,13 @@ std::string generate(
     for (auto i = 0u; i < max_output_length; i++) {
         // Convert the input_tokens from a vector of int64_t to EValue.
         // EValue is a unified data type in the ExecuTorch runtime.
-        ManagedTensor tensor_tokens(
+        auto inputs = from_blob(
             input_tokens.data(),
             {1, static_cast<int>(input_tokens.size())},
             ScalarType::Long);
-        std::vector<EValue> inputs = {tensor_tokens.get_tensor()};
 
         // Run the model. It will return a tensor of logits (log-probabilities).
-        Result<std::vector<EValue>> logits_evalue = llm_model.forward(inputs);
+        auto logits_evalue = llm_model.forward(inputs);
 
         // Convert the output logits from EValue to std::vector, which is what
         // the sampler expects.
@@ -339,7 +338,6 @@ Finally, download the following files into the same directory as main.h:
 ```
 curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/basic_sampler.h
 curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/basic_tokenizer.h
-curl -O https://raw.githubusercontent.com/pytorch/executorch/main/examples/llm_manual/managed_tensor.h
 ```
 
 To learn more, see the [Runtime APIs Tutorial](../extension-module.md).
@@ -364,6 +362,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
 # Set options for executorch build.
 option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
 option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
+option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
 option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
 
 # Include the executorch subdirectory.
@@ -377,6 +376,7 @@ target_link_libraries(
     PRIVATE
     executorch
     extension_module_static # Provides the Module class
+    extension_tensor # Provides the TensorPtr class
     optimized_native_cpu_ops_lib) # Provides baseline cross-platform kernels
 ```
 
@@ -386,7 +386,6 @@ At this point, the working directory should contain the following files:
 - main.cpp
 - basic_tokenizer.h
 - basic_sampler.h
-- managed_tensor.h
 - export_nanogpt.py
 - model.py
 - vocab.json
@@ -518,6 +517,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
 # Set options for executorch build.
 option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
 option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
+option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
 option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
 option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend
 
@@ -534,6 +534,7 @@ target_link_libraries(
     PRIVATE
     executorch
     extension_module_static # Provides the Module class
+    extension_tensor # Provides the TensorPtr class
     optimized_native_cpu_ops_lib # Provides baseline cross-platform kernels
     xnnpack_backend) # Provides the XNNPACK CPU acceleration backend
 ```
@@ -548,7 +549,6 @@ At this point, the working directory should contain the following files:
 - main.cpp
 - basic_tokenizer.h
 - basic_sampler.h
-- managed_tensor.h
 - export_nanogpt.py
 - model.py
 - vocab.json
 
@@ -149,9 +149,10 @@ mkdir cmake-out
 cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
-    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_ENABLE_LOGGING=ON \
     -DPYTHON_EXECUTABLE=python \
     -Bcmake-out .
 
@@ -78,6 +78,7 @@ cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
   -DEXECUTORCH_BUILD_XNNPACK=ON \
   -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
   -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
   -Bcmake-android-out
 
 cmake --build cmake-android-out -j16 --target install
@@ -119,6 +120,7 @@ cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -Bcmake-android-out
 
 cmake --build cmake-android-out -j16 --target install
 
@@ -15,6 +15,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
   -DEXECUTORCH_BUILD_XNNPACK=ON \
   -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
   -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_TESNOR=ON \
   -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
   -DCMAKE_BUILD_TYPE=Release \
   -B"${CMAKE_OUT}"
 
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
   -DEXECUTORCH_BUILD_XNNPACK=ON \
   -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
   -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
   -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
   -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
   -DEXECUTORCH_BUILD_QNN=ON \
 
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
   -DEXECUTORCH_BUILD_XNNPACK=ON \
   -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
   -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
   -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
   -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
   -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
 
@@ -13,6 +13,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
 # Set options for executorch build.
 option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
 option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
+option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
 option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
 option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend
 
@@ -29,6 +30,7 @@ target_link_libraries(
   nanogpt_runner
   PRIVATE executorch
           extension_module_static # Provides the Module class
+          extension_tensor # Provides the TensorPtr class
           optimized_native_cpu_ops_lib # Provides baseline cross-platform
                                        # kernels
           xnnpack_backend
 
@@ -10,9 +10,9 @@
 
 #include "basic_sampler.h"
 #include "basic_tokenizer.h"
-#include "managed_tensor.h"
 
 #include <executorch/extension/module/module.h>
+#include <executorch/extension/tensor/tensor.h>
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/result.h>
@@ -42,14 +42,13 @@ std::string generate(
   for (auto i = 0u; i < max_output_length; i++) {
     // Convert the input_tokens from a vector of int64_t to EValue.
     // EValue is a unified data type in the ExecuTorch runtime.
-    ManagedTensor tensor_tokens(
+    auto inputs = from_blob(
         input_tokens.data(),
         {1, static_cast<int>(input_tokens.size())},
         ScalarType::Long);
-    std::vector<EValue> inputs = {tensor_tokens.get_tensor()};
 
     // Run the model. It will return a tensor of logits (log-probabilities).
-    Result<std::vector<EValue>> logits_evalue = llm_model.forward(inputs);
+    auto logits_evalue = llm_model.forward(inputs);
 
     // Convert the output logits from EValue to std::vector, which is what
     // the sampler expects.
 
@@ -6,12 +6,11 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/examples/models/flamingo/cross_attention/cross_attention_mask.h>
+
 #include <algorithm>
 #include <string>
 
-#include <executorch/examples/models/flamingo/cross_attention/cross_attention_mask.h>
-#include <executorch/extension/runner_util/managed_tensor.h>
-
 namespace torch::executor {
 
 // Fowrward declaration needed for ARM compilers.
@@ -97,7 +96,7 @@ std::vector<std::vector<int>> _get_image_attention_intervals(
   return vision_masks;
 }
 
-std::vector<ManagedTensor> cross_attention_mask(
+std::vector<executorch::extension::TensorPtr> cross_attention_mask(
     const std::vector<int>& tokens,
     const std::vector<Tensor>& images,
     size_t tile_size,
@@ -121,7 +120,7 @@ std::vector<ManagedTensor> cross_attention_mask(
   // Create mask for each individual image based on its number of tokens,
   // which can vary based on number of tiles since they are not yet tile padded.
   // The masks are padded and concatenated together in the batch collator.
-  std::vector<ManagedTensor> cross_attention_masks;
+  std::vector<executorch::extension::TensorPtr> cross_attention_masks;
   size_t text_seq_len = tokens.size();
   for (size_t image_idx = 0; image_idx < image_intervals.size(); ++image_idx) {
     size_t n_tiles = images[image_idx].size(0);
@@ -140,7 +139,8 @@ std::vector<ManagedTensor> cross_attention_mask(
     size_t stride = image_seq_len;
     std::vector<int> mask_data(num_elements);
 
-    ManagedTensor mask(mask_data.data(), sizes, ScalarType::Int);
+    auto mask = executorch::extension::from_blob(
+        mask_data.data(), sizes, ScalarType::Int);
     cross_attention_masks.emplace_back(std::move(mask));
 
     // Add the allocated data to the output vector.