pytorch
diff --git a/‎CMakeLists.txt
Lines changed: 19 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 19 additions & 0 deletions
diff --git a/‎backends/qualcomm/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.cpp
Lines changed: 6 additions & 6 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.cpp
Lines changed: 6 additions & 6 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.h
Lines changed: 3 additions & 1 deletion b/‎backends/vulkan/runtime/graph/ComputeGraph.h
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Lines changed: 5 additions & 5 deletions b/‎backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Lines changed: 5 additions & 5 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp
Lines changed: 16 additions & 11 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp
Lines changed: 16 additions & 11 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Staging.cpp
Lines changed: 1 addition & 1 deletion b/‎backends/vulkan/runtime/graph/ops/impl/Staging.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp
Lines changed: 1 addition & 1 deletion b/‎backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/test/op_tests/utils/codegen.py
Lines changed: 2 additions & 2 deletions b/‎backends/vulkan/test/op_tests/utils/codegen.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/xnnpack/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎backends/xnnpack/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/build-run-qualcomm-ai-engine-direct-backend.md
Lines changed: 6 additions & 1 deletion b/‎docs/source/build-run-qualcomm-ai-engine-direct-backend.md
Lines changed: 6 additions & 1 deletion
diff --git a/‎docs/source/build-run-xtensa.md
Lines changed: 14 additions & 14 deletions b/‎docs/source/build-run-xtensa.md
Lines changed: 14 additions & 14 deletions
@@ -144,6 +144,8 @@ option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
 
 option(EXECUTORCH_BUILD_CUSTOM "Build the custom kernels" OFF)
 
+option(EXECUTORCH_BUILD_CUSTOM_OPS_AOT "Build the custom ops lib for AOT" OFF)
+
 option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
        OFF)
 
@@ -185,12 +187,19 @@ cmake_dependent_option(
 cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
                        "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
 
+if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
+  set(EXECUTORCH_BUILD_CUSTOM ON)
+endif()
+
 if(EXECUTORCH_BUILD_CUSTOM)
   set(EXECUTORCH_BUILD_OPTIMIZED ON)
 endif()
 
 if(EXECUTORCH_BUILD_CPUINFO)
   # --- cpuinfo
+  set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
+      ${CMAKE_POSITION_INDEPENDENT_CODE})
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
   set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
   set(CPUINFO_BUILD_TOOLS
       OFF
@@ -212,10 +221,15 @@ if(EXECUTORCH_BUILD_CPUINFO)
       CACHE STRING "")
   set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
   add_subdirectory("${CPUINFO_SOURCE_DIR}")
+  set(CMAKE_POSITION_INDEPENDENT_CODE
+      ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
 endif()
 
 if(EXECUTORCH_BUILD_PTHREADPOOL)
   # --- pthreadpool
+  set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
+      ${CMAKE_POSITION_INDEPENDENT_CODE})
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
   set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
   set(PTHREADPOOL_BUILD_TESTS
       OFF
@@ -235,6 +249,8 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
        CACHE STRING "")
   endif()
   add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
+  set(CMAKE_POSITION_INDEPENDENT_CODE
+      ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
 endif()
 
 if(NOT PYTHON_EXECUTABLE)
@@ -546,6 +562,9 @@ if(EXECUTORCH_BUILD_PYBIND)
     list(APPEND _dep_libs custom_ops)
   endif()
 
+  if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
+    list(APPEND _dep_libs custom_ops_aot_lib)
+  endif()
   # compile options for pybind
 
   set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
 
@@ -253,7 +253,7 @@ target_link_libraries(qnn_executorch_backend
     qnn_executorch_header
     qnn_schema
     qnn_manager
-    executorch
+    executorch_no_prim_ops
     qcir_utils
 )
 target_link_libraries(utils
 
@@ -37,6 +37,7 @@ namespace vkcompute {
   }
 
 VALUE_PTR_CLASS_IMPL(vTensorPtr, vTensor, Tensor)
+VALUE_PTR_CLASS_IMPL(TensorRefPtr, TensorRef, TensorRef)
 VALUE_PTR_CLASS_IMPL(StagingPtr, api::StorageBuffer, Staging)
 VALUE_PTR_CLASS_IMPL(IntListPtr, std::vector<int64_t>, IntList)
 VALUE_PTR_CLASS_IMPL(DoubleListPtr, std::vector<double>, DoubleList)
@@ -195,18 +196,17 @@ ValueRef ComputeGraph::add_tensor(
 }
 
 ValueRef ComputeGraph::add_tensor_like(
-    const ValueRef vref,
+    const ValueRef idx,
     const api::StorageType storage_type,
     const api::GPUMemoryLayout memory_layout) {
-  TensorRef tref = get_tref(vref);
-  return add_tensor(tref.sizes, tref.dtype, storage_type, memory_layout);
+  return add_tensor(
+      get_sizes_of(idx), get_dtype_of(idx), storage_type, memory_layout);
 }
 
 ValueRef ComputeGraph::add_tensor_like(
-    const ValueRef vref,
+    const ValueRef idx,
     const api::GPUMemoryLayout memory_layout) {
-  TensorRef tref = get_tref(vref);
-  return add_tensor(tref.sizes, tref.dtype, memory_layout);
+  return add_tensor(get_sizes_of(idx), get_dtype_of(idx), memory_layout);
 }
 
 ValueRef ComputeGraph::add_tensor(
 
@@ -55,6 +55,7 @@ class ComputeGraph;
   };
 
 DECL_VALUE_PTR_CLASS(vTensorPtr, vTensor)
+DECL_VALUE_PTR_CLASS(TensorRefPtr, TensorRef)
 DECL_VALUE_PTR_CLASS(StagingPtr, api::StorageBuffer)
 DECL_VALUE_PTR_CLASS(IntListPtr, std::vector<int64_t>)
 DECL_VALUE_PTR_CLASS(DoubleListPtr, std::vector<double>)
@@ -132,6 +133,7 @@ class ComputeGraph final {
   }
 
   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(vTensorPtr, tensor, Tensor)
+  GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(TensorRefPtr, tref, TensorRef)
   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(StagingPtr, staging, Staging)
   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(IntListPtr, int_list, IntList)
   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(DoubleListPtr, double_list, DoubleList)
@@ -148,7 +150,6 @@ class ComputeGraph final {
     return values_.at(idx).is##type_name();                         \
   }
 
-  GET_AND_CHECK_VAL_AS_TYPE_FNS(TensorRef, tref, TensorRef)
   GET_AND_CHECK_VAL_AS_TYPE_FNS(int64_t, int, Int)
   GET_AND_CHECK_VAL_AS_TYPE_FNS(double, double, Double)
   GET_AND_CHECK_VAL_AS_TYPE_FNS(bool, bool, Bool)
@@ -392,6 +393,7 @@ class ComputeGraph final {
   //
 
   friend class vTensorPtr;
+  friend class TensorRefPtr;
   friend class StagingPtr;
   friend class IntListPtr;
   friend class DoubleListPtr;
 
@@ -56,11 +56,11 @@ api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
     return staging;
   }
 
-  TensorRef tref = graph->get_tref(tref_);
-  size_t numel = api::utils::multiply_integers(tref.sizes);
-  api::StorageBuffer staging(graph->context(), tref.dtype, numel);
-  size_t nbytes = numel * api::element_size(tref.dtype);
-  copy_ptr_to_staging(tref.data, staging, nbytes);
+  TensorRefPtr tref = graph->get_tref(tref_);
+  size_t numel = api::utils::multiply_integers(tref->sizes);
+  api::StorageBuffer staging(graph->context(), tref->dtype, numel);
+  size_t nbytes = numel * api::element_size(tref->dtype);
+  copy_ptr_to_staging(tref->data, staging, nbytes);
   return staging;
 }
 
 
@@ -17,6 +17,8 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 
+#include <iostream>
+
 namespace vkcompute {
 
 void resize_conv2d_node(
@@ -35,8 +37,8 @@ void resize_conv2d_node(
     new_out_sizes.at(ndim - 4) = self->sizes().at(ndim - 4);
   }
 
-  TensorRef weight_ref = graph->get_tref(extra_args[0]);
-  const auto& weight_sizes = weight_ref.sizes;
+  TensorRefPtr weight_ref = graph->get_tref(extra_args[0]);
+  const auto& weight_sizes = weight_ref->sizes;
   new_out_sizes.at(ndim - 3) =
       transposed ? weight_sizes.at(ndim - 3) : weight_sizes.at(ndim - 4);
 
@@ -59,11 +61,14 @@ ValueRef prepack_biases(
     const ValueRef vref,
     const ValueRef weight,
     const bool transposed) {
-  TensorRef tref = graph.get_tref(weight);
-  const int64_t out_channels = transposed ? tref.sizes.at(1) : tref.sizes.at(0);
+  auto sizes = graph.get_sizes_of(weight);
+  const int64_t out_channels = transposed ? sizes.at(1) : sizes.at(0);
 
   ValueRef v = graph.add_tensor(
-      {out_channels}, tref.dtype, api::kTexture2D, api::kWidthPacked);
+      {out_channels},
+      graph.get_dtype_of(weight),
+      api::kTexture2D,
+      api::kWidthPacked);
   vTensorPtr t = graph.get_tensor(v);
 
   api::ShaderInfo shader = get_nchw_to_image_shader(*t);
@@ -102,7 +107,7 @@ api::ShaderInfo get_conv2d_shader(
     case Conv2dMethod::Depthwise:
       kernel_name = "conv2d_dw";
       if (!prepack_weights) {
-        const auto weight_sizes = graph.get_tref(weight).sizes;
+        const auto& weight_sizes = graph.get_tref(weight)->sizes;
         if (weight_sizes.at(2) == 3 && weight_sizes.at(3) == 3) {
           kernel_name += "_output_tile_3x3";
         }
@@ -180,12 +185,12 @@ ValueRef prepack_weights(
     ComputeGraph& graph,
     const ValueRef vref,
     const Conv2dMethod method) {
-  const auto original_sizes = graph.get_tref(vref).sizes;
-  const auto final_sizes = get_final_sizes(graph.get_tref(vref).sizes, method);
+  const auto original_sizes = graph.get_sizes_of(vref);
+  const auto final_sizes = get_final_sizes(original_sizes, method);
 
   ValueRef v = graph.add_tensor(
       final_sizes,
-      graph.get_tref(vref).dtype,
+      graph.get_dtype_of(vref),
       api::kTexture2D,
       api::kChannelsPacked);
   vTensorPtr t = graph.get_tensor(v);
@@ -239,7 +244,7 @@ Conv2dParams create_conv2d_params(
       p.kernel_size.data[1] +
           (p.kernel_size.data[1] - 1) * (p.dilation.data[1] - 1),
   });
-  const auto weight_sizes = graph.get_tref(weight).sizes;
+  const auto weight_sizes = graph.get_sizes_of(weight);
   const int32_t in_group_size =
       api::utils::safe_downcast<int32_t>(api::utils::align_up(
           transposed ? weight_sizes.at(0) : weight_sizes.at(1), INT64_C(4)));
@@ -267,7 +272,7 @@ Conv2dMethod get_conv2d_method(
     const ValueRef weight,
     const int64_t groups,
     const bool transposed) {
-  const auto weight_sizes = graph.get_tref(weight).sizes;
+  const auto weight_sizes = graph.get_sizes_of(weight);
   if (!transposed && weight_sizes.at(0) == groups && weight_sizes.at(1) == 1) {
     return Conv2dMethod::Depthwise;
   }
 
@@ -97,7 +97,7 @@ ValueRef prepack_if_tensor_ref(
 ValueRef prepack_if_tensor_ref(ComputeGraph& graph, const ValueRef v) {
   if (graph.val_is_tref(v)) {
     api::GPUMemoryLayout layout =
-        graph.suggested_memory_layout(graph.get_tref(v).sizes);
+        graph.suggested_memory_layout(graph.get_tref(v)->sizes);
     return prepack(graph, v, layout);
   } else {
     return v;
 
@@ -21,7 +21,7 @@ api::utils::ivec2 make_ivec2_kernel_size(
   if (kernel_size_only) {
     return make_ivec2_from_list(graph, weight);
   } else {
-    const auto weight_sizes = graph.get_tref(weight).sizes;
+    const auto weight_sizes = graph.get_tref(weight)->sizes;
     return api::utils::make_ivec2({weight_sizes.at(3), weight_sizes.at(2)});
   }
 }
 
@@ -248,8 +248,8 @@ def virtual_resize(self, ref: ValueRefList) -> str:
         assert ref.src_cpp_type == AT_TENSOR and ref.is_in
         if self.prepack_ref(ref):
             return ""
-        ret_str = f"{self.graph}{self.dot}get_val({ref.name}.value).toTensor()"
-        ret_str += f".virtual_resize({ref.src_cpp_name}.sizes().vec());\n"
+        ret_str = f"{self.graph}{self.dot}get_tensor({ref.name}.value)"
+        ret_str += f"->virtual_resize({ref.src_cpp_name}.sizes().vec());\n"
         return ret_str
 
     def copy_into_staging(self, ref: ValueRefList) -> str:
 
@@ -81,7 +81,7 @@ add_library(xnnpack_backend STATIC ${_xnnpack_backend__srcs})
 target_link_libraries(xnnpack_backend
                       PRIVATE
                       ${xnnpack_third_party}
-                      executorch
+                      executorch_no_prim_ops
                       xnnpack_schema)
 
 target_include_directories(xnnpack_backend
 
@@ -115,6 +115,10 @@ Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct b
 
 ```bash
 cd $EXECUTORCH_ROOT
+# Workaround for fbs files in exir/_serialize
+cp schema/program.fbs exir/_serialize/program.fbs
+cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+
 mkdir build_x86_64
 cd build_x86_64
 cmake .. -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=${QNN_SDK_ROOT}
@@ -138,8 +142,8 @@ mkdir build_android
 cd build_android
 # build executorch & qnn_executorch_backend
 cmake .. \
-    -DBUCK2=buck2 \
     -DCMAKE_INSTALL_PREFIX=$PWD \
+    -DEXECUTORCH_BUILD_SDK=ON \
     -DEXECUTORCH_BUILD_QNN=ON \
     -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
@@ -220,6 +224,7 @@ So, we can run `qnn_executor_runner` like
 ```bash
 adb push ./deeplab_v3/dlv3_qnn.pte ${DEVICE_DIR}
 adb push ${EXECUTORCH_ROOT}/build_android/examples/qualcomm/qnn_executor_runner ${DEVICE_DIR}
+adb push ${EXECUTORCH_ROOT}/build_android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
 adb shell "cd ${DEVICE_DIR} \
            && export LD_LIBRARY_PATH=${DEVICE_DIR} \
            && export ADSP_LIBRARY_PATH=${DEVICE_DIR} \
 
@@ -64,7 +64,7 @@ Step 2. Make sure you have completed the ExecuTorch setup tutorials linked to at
 The working tree is:
 
 ```
-examples/xtensa/
+examples/cadence/
 ├── aot
 ├── kernels
 ├── ops
@@ -75,7 +75,7 @@ examples/xtensa/
 
 ***AoT (Ahead-of-Time) Components***:
 
-The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [meta_registrations.py](https://github.com/pytorch/executorch/blob/main/examples/xtensa/aot/meta_registrations.py) and have corresponding implemetations in the other folders.
+The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [meta_registrations.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/meta_registrations.py) and have corresponding implemetations in the other folders.
 
 ***Operators***:
 
@@ -101,14 +101,14 @@ python3 -m examples.portable.scripts.export --model_name="add"
 ***Quantized Operators***:
 
 The other, more complex model are custom operators, including:
-  - a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/xtensa/tests/quantized_linear_example.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models.
-  - a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/xtensa/tests/quantized_conv1d_example.py#L36). Convolutions are important in wake word and many denoising models.
+  - a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_linear_example.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models.
+  - a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_conv1d_example.py#L36). Convolutions are important in wake word and many denoising models.
 
 In both cases the generated file is called `XtensaDemoModel.pte`.
 
 ```bash
 cd executorch
-python3 -m examples.xtensa.tests.quantized_<linear,conv1d>_example
+python3 -m examples.cadence.tests.quantized_<linear,conv1d>_example
 ```
 
 ***Small Model: RNNT predictor***:
@@ -118,7 +118,7 @@ The predictor is a sequence of basic ops (embedding, ReLU, linear, layer norm) a
 
 ```bash
 cd executorch
-python3 -m examples.xtensa.tests.rnnt_predictor_quantized_example
+python3 -m examples.cadence.tests.rnnt_predictor_quantized_example
 ```
 
 The generated file is called `XtensaDemoModel.pte`.
@@ -131,7 +131,7 @@ In this step, you'll be building the DSP firmware image that consists of the sam
 ***Step 1***. Configure the environment variables needed to point to the Xtensa toolchain that you have installed in the previous step. The three environment variables that need to be set include:
 ```bash
 # Directory in which the Xtensa toolchain was installed
-export XTENSA_TOOLCHAIN=/home/user_name/xtensa/XtDevTools/install/tools
+export XTENSA_TOOLCHAIN=/home/user_name/cadence/XtDevTools/install/tools
 # The version of the toolchain that was installed. This is essentially the name of the directory
 # that is present in the XTENSA_TOOLCHAIN directory from above.
 export TOOLCHAIN_VER=RI-2021.8-linux
@@ -151,7 +151,7 @@ cd executorch
 rm -rf cmake-out
 # prebuild and install executorch library
 cmake -DBUCK2=buck2 \
-    -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/xtensa/xtensa.cmake \
+    -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/cadence/cadence.cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Debug \
     -DPYTHON_EXECUTABLE=python3 \
@@ -165,18 +165,18 @@ cmake -DBUCK2=buck2 \
     -Bcmake-out .
 
 cmake --build cmake-out -j8 --target install --config Debug
-# build xtensa runner
+# build cadence runner
 cmake -DCMAKE_BUILD_TYPE=Debug \
-    -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/xtensa/xtensa.cmake \
+    -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/cadence/cadence.cmake \
     -DCMAKE_PREFIX_PATH=<path_to_executorch>/cmake-out \
     -DMODEL_PATH=<path_to_program_file_generated_in_previous_step> \
     -DNXP_SDK_ROOT_DIR=<path_to_nxp_sdk_root> -DEXECUTORCH_BUILD_FLATC=0 \
     -DFLATC_EXECUTABLE="$(which flatc)" \
     -DNN_LIB_BASE_DIR=<path_to_nnlib_cloned_in_step_2> \
-    -Bcmake-out/examples/xtensa \
-    examples/xtensa
+    -Bcmake-out/examples/cadence \
+    examples/cadence
 
-cmake --build cmake-out/examples/xtensa -j8 -t xtensa_executorch_example
+cmake --build cmake-out/examples/cadence -j8 -t cadence_executorch_example
 ```
 
 After having succesfully run the above step you should see two binary files in their CMake output directory.
@@ -213,6 +213,6 @@ First 20 elements of output 0
 
 In this tutorial, you have learned how to export a quantized operation, build the ExecuTorch runtime and run this model on the Xtensa HiFi4 DSP chip.
 
-The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/examples/xtensa/ops) and [kernels](https://github.com/pytorch/executorch/blob/main/examples/xtensa/kernels).
+The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/examples/cadence/ops) and [kernels](https://github.com/pytorch/executorch/blob/main/examples/cadence/kernels).
 
 Other models can be created following the same structure, always assuming that operators and kernels are available.
Original file line number	Diff line number	Diff line change
`@@ -253,7 +253,7 @@ target_link_libraries(qnn_executorch_backend`
`253`	`253`	`qnn_executorch_header`
`254`	`254`	`qnn_schema`
`255`	`255`	`qnn_manager`
`256`		`- executorch`
	`256`	`+ executorch_no_prim_ops`
`257`	`257`	`qcir_utils`
`258`	`258`	`)`
`259`	`259`	`target_link_libraries(utils`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ api::utils::ivec2 make_ivec2_kernel_size(`
`21`	`21`	`if (kernel_size_only) {`
`22`	`22`	`return make_ivec2_from_list(graph, weight);`
`23`	`23`	`} else {`
`24`		`- const auto weight_sizes = graph.get_tref(weight).sizes;`
	`24`	`+ const auto weight_sizes = graph.get_tref(weight)->sizes;`
`25`	`25`	`return api::utils::make_ivec2({weight_sizes.at(3), weight_sizes.at(2)});`
`26`	`26`	`}`
`27`	`27`	`}`