pytorch
diff --git a/‎.github/workflows/android-release-artifacts.yml
Lines changed: 66 additions & 0 deletions b/‎.github/workflows/android-release-artifacts.yml
Lines changed: 66 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 0 deletions b/‎README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/cadence/CMakeLists.txt
Lines changed: 0 additions & 49 deletions b/‎backends/cadence/CMakeLists.txt
Lines changed: 0 additions & 49 deletions
diff --git a/‎backends/cadence/build_cadence_xtensa.sh
Lines changed: 4 additions & 5 deletions b/‎backends/cadence/build_cadence_xtensa.sh
Lines changed: 4 additions & 5 deletions
diff --git a/‎backends/cadence/cadence_runner/CMakeLists.txt
Lines changed: 72 additions & 0 deletions b/‎backends/cadence/cadence_runner/CMakeLists.txt
Lines changed: 72 additions & 0 deletions
diff --git a/‎backends/cadence/build_cadence_runner.sh renamed to ‎backends/cadence/cadence_runner/build_cadence_runner.sh
Lines changed: 5 additions & 4 deletions b/‎backends/cadence/build_cadence_runner.sh renamed to ‎backends/cadence/cadence_runner/build_cadence_runner.sh
Lines changed: 5 additions & 4 deletions
diff --git a/‎backends/cadence/hifi/kernels/CMakeLists.txt
Lines changed: 3 additions & 0 deletions b/‎backends/cadence/hifi/kernels/CMakeLists.txt
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/cadence/hifi/operators/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/hifi/operators/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/reference/kernels/CMakeLists.txt
Lines changed: 6 additions & 1 deletion b/‎backends/cadence/reference/kernels/CMakeLists.txt
Lines changed: 6 additions & 1 deletion
diff --git a/‎backends/cadence/reference/kernels/kernels.cpp
Lines changed: 7 additions & 10 deletions b/‎backends/cadence/reference/kernels/kernels.cpp
Lines changed: 7 additions & 10 deletions
diff --git a/‎backends/cadence/reference/operators/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/reference/operators/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/runtime/executor.py
Lines changed: 3 additions & 1 deletion b/‎backends/cadence/runtime/executor.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/qualcomm/aot/python/TARGETS
Lines changed: 5 additions & 0 deletions b/‎backends/qualcomm/aot/python/TARGETS
Lines changed: 5 additions & 0 deletions
@@ -0,0 +1,66 @@
+name: Android Release Artifacts
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: Version name to be uploaded for AAR release
+        required: false
+        type: string
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-aar:
+    name: build-aar
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-clang12-android
+      submodules: 'true'
+      ref: ${{ github.sha }}
+      timeout: 90
+      upload-artifact: android-apps
+      upload-artifact-to-s3: true
+      script: |
+        set -eux
+
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
+        export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
+
+        # Build LLM Demo for Android
+        bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
+
+        shasum -a 256 "${ARTIFACTS_DIR_NAME}/llm_demo/executorch.aar"
+
+  upload-release-aar:
+    name: upload-release-aar
+    needs: build-aar
+    runs-on: ubuntu-22.04
+    timeout-minutes: 10
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - name: configure aws credentials
+        uses: aws-actions/[email protected]
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
+          aws-region: us-east-1
+      - name: Upload AAR RC to AWS S3
+        shell: bash
+        run: |
+          wget https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/executorch.aar
+          shasum -a 256 executorch.aar > executorch.aar.sha256sums
+
+          pip install awscli==1.32.18
+          AWS_CMD="aws s3 cp"
+          VERSION="${{ inputs.version }}"
+          VERSION_NAME="${VERSION:-temp_snapshot}"
+          ${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION_NAME}/executorch.aar --acl public-read
+          ${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION_NAME}/executorch.aar.sha256sums --acl public-read
@@ -24,6 +24,9 @@ Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-st
 
 Check out the examples of [Llama](./examples/models/llama2/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
 
+
+**[UPDATE - 09/25]** We have added support for running [Llama 3.2 1B/3B](./examples/models/llama2/README.md) models via ExecuTorch.
+
 ## Feedback
 
 We welcome any feedback, suggestions, and bug reports from the community to help
 
@@ -20,7 +20,6 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
 
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
@@ -30,54 +29,6 @@ if(EXECUTORCH_NNLIB_OPT)
   set(TARGET_DIR hifi)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib)
 endif()
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-
-# Source root directory for executorch.
-if(NOT EXECUTORCH_ROOT)
-  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
-endif()
-
-if(NOT PYTHON_EXECUTABLE)
-  resolve_python_executable()
-endif()
-
-set(_common_compile_options -Wno-deprecated-declarations -fPIC)
-
-# Find prebuilt libraries. executorch package should contain portable_ops_lib,
-# etdump, bundled_program.
-find_package(executorch CONFIG REQUIRED)
-target_link_options_shared_lib(executorch)
-target_link_options_shared_lib(portable_ops_lib)
-
-target_include_directories(executorch INTERFACE ${_common_include_directories})
-
-find_package(
-  gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../third-party
-)
-
-add_executable(cadence_runner cadence_runner/cadence_runner.cpp)
-target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
-
-target_include_directories(
-  etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../sdk/include
-                   ${EXECUTORCH_ROOT}/third-party/flatcc/include
-)
-
-target_include_directories(
-  cadence_runner PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
-                        ${_common_include_directories}
-)
-
-target_link_libraries(
-  cadence_runner
-  executorch
-  gflags
-  etdump
-  extension_data_loader
-  bundled_program
-  cadence_ops_lib
-  flatccrt
-)
@@ -65,20 +65,19 @@ else
         -DEXECUTORCH_BUILD_HOST_TARGETS=ON \
         -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
         -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
-        -DEXECUTORCH_BUILD_CADENCE=OFF \
+        -DEXECUTORCH_BUILD_CPUINFO=OFF \
+        -DEXECUTORCH_BUILD_FLATC=OFF \
+        -DEXECUTORCH_BUILD_CADENCE=ON \
         -DFLATC_EXECUTABLE="$(which flatc)" \
+        -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
         -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_NNLIB_OPT=ON \
-        -DEXECUTORCH_BUILD_GFLAGS=ON \
         -DHAVE_FNMATCH_H=OFF \
-        -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \
-        -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
-        -DEXECUTORCH_BUILD_CPUINFO=OFF \
         -Bcmake-out
     cmake --build cmake-out --target install --config Release -j16
 fi
 
@@ -0,0 +1,72 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Set the minimum required version of CMake for this project.
+cmake_minimum_required(VERSION 3.10)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+endif()
+
+# Set the project name.
+project(cadence_backend)
+
+# Source root directory for executorch.
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
+endif()
+
+include(${EXECUTORCH_ROOT}/build/Utils.cmake)
+include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+
+# Let files say "include <executorch/path/to/header.h>".
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+set(TARGET_DIR reference)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if(NOT PYTHON_EXECUTABLE)
+  resolve_python_executable()
+endif()
+
+# Find prebuilt libraries. executorch package should contain portable_ops_lib,
+# etdump, bundled_program.
+find_package(executorch CONFIG REQUIRED)
+target_link_options_shared_lib(executorch)
+target_link_options_shared_lib(portable_ops_lib)
+
+target_include_directories(executorch INTERFACE ${_common_include_directories})
+
+find_package(
+  gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party
+)
+
+add_executable(cadence_runner cadence_runner.cpp)
+target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
+
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
+
+target_include_directories(
+  etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../../devtools/include
+                   ${EXECUTORCH_ROOT}/third-party/flatcc/include
+)
+
+target_include_directories(
+  cadence_runner PUBLIC ${ROOT_DIR}/../.. ${CMAKE_BINARY_DIR}
+                        ${_common_include_directories}
+)
+
+target_link_libraries(
+  cadence_runner
+  executorch
+  gflags
+  etdump
+  extension_data_loader
+  bundled_program
+  cadence_ops_lib
+  flatccrt
+)
@@ -12,7 +12,7 @@ set -euo pipefail
 SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 readonly SCRIPT_DIR
 
-readonly EXECUTORCH_ROOT="${SCRIPT_DIR}/../.."
+readonly EXECUTORCH_ROOT="${SCRIPT_DIR}/../../.."
 
 # Allow overriding the number of build jobs. Default to 9.
 export CMAKE_BUILD_PARALLEL_LEVEL="${CMAKE_BUILD_PARALLEL_LEVEL:-9}"
@@ -32,8 +32,9 @@ main() {
     -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
     -DEXECUTORCH_BUILD_CPUINFO=OFF \
     -DEXECUTORCH_ENABLE_LOGGING=ON \
-    -Bcmake-out .
-  cmake --build cmake-out --target install --config Release
+    -DEXECUTORCH_NNLIB_OPT=OFF \
+    -Bcmake-out
+  cmake --build cmake-out --target install --config Release -j16
 
   local example_dir=backends/cadence
   local build_dir="cmake-out/${example_dir}"
@@ -43,7 +44,7 @@ main() {
     -DCMAKE_BUILD_TYPE=Release \
     -B"${build_dir}" \
     "${example_dir}"
-  cmake --build "${build_dir}" --config Release
+  cmake --build "${build_dir}" --config Release -j16
 
   local runner="${PWD}/${build_dir}/cadence_runner"
   if [[ ! -f "${runner}" ]]; then
 
@@ -10,6 +10,8 @@ add_library(
   kernels.cpp
   ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
 )
+# Let files say "include <executorch/path/to/header.h>".
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
 target_include_directories(
   cadence_kernels
@@ -19,6 +21,7 @@ target_include_directories(
     ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib
     ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include
     ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/
+  ${_common_include_directories}
 )
 
 target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
@@ -28,6 +28,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_add.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
 
@@ -7,4 +7,9 @@
 # lint_cmake: -linelength
 add_library(cadence_kernels kernels.cpp)
 
-target_include_directories(cadence_kernels PUBLIC .)
+# Let files say "include <executorch/path/to/header.h>".
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+
+target_include_directories(cadence_kernels PUBLIC .
+                    ${_common_include_directories}
+)
@@ -9,6 +9,7 @@
 #include <math.h>
 #include <algorithm>
 #include <cstring>
+#include <limits>
 #include <numeric>
 
 namespace impl {
@@ -17,8 +18,7 @@ namespace kernels {
 
 // Quantize a fp32 value to an int8_t/uint8_t value
 template <typename T>
-__attribute__((always_inline)) T
-quantize(const float x, float scale, int32_t zero_point) {
+T quantize(const float x, float scale, int32_t zero_point) {
   constexpr float min_val = std::numeric_limits<T>::min();
   constexpr float max_val = std::numeric_limits<T>::max();
   float tmp = roundf(x * scale + zero_point);
@@ -40,8 +40,7 @@ void quantize(
 
 // Dequantize an int8_t/uint8_t value to an fp32 value
 template <typename T>
-__attribute__((always_inline)) float
-dequantize(const T x, float scale, int32_t zero_point) {
+float dequantize(const T x, float scale, int32_t zero_point) {
   return scale * (x - zero_point);
 }
 
@@ -60,9 +59,8 @@ void dequantize(
 
 // explicit template instantiation
 
-#define typed_quantize_val(dtype)                         \
-  template __attribute__((always_inline)) dtype quantize( \
-      const float x, float inv_scale, int32_t zero_point);
+#define typed_quantize_val(dtype) \
+  template dtype quantize(const float x, float inv_scale, int32_t zero_point);
 typed_quantize_val(int8_t);
 typed_quantize_val(uint8_t);
 typed_quantize_val(int16_t);
@@ -82,9 +80,8 @@ typed_quantize_vec(int16_t);
 typed_quantize_vec(int32_t);
 #undef typed_quantize_vec
 
-#define typed_dequantize_val(dtype)                         \
-  template __attribute__((always_inline)) float dequantize( \
-      const dtype x, float scale, int32_t zero_point);
+#define typed_dequantize_val(dtype) \
+  template float dequantize(const dtype x, float scale, int32_t zero_point);
 typed_dequantize_val(int8_t);
 typed_dequantize_val(uint8_t);
 typed_dequantize_val(int16_t);
 
@@ -32,6 +32,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_floath.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
 
@@ -106,7 +106,9 @@ def __init__(
         working_dir: str = "",
     ):
         self.working_dir = working_dir
-        self.executor_builder = "./backends/cadence/build_cadence_runner.sh"
+        self.executor_builder = (
+            "./backends/cadence/cadence_runner/build_cadence_runner.sh"
+        )
         self.execute_runner = "./cmake-out/backends/cadence/cadence_runner"
         self.bundled_program_path: str = "CadenceDemoModel.bpte"
 
 
@@ -0,0 +1,5 @@
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,8 @@ add_library(`
`10`	`10`	`kernels.cpp`
`11`	`11`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp`
`12`	`12`	`)`
	`13`	`+# Let files say "include <executorch/path/to/header.h>".`
	`14`	`+set(_common_include_directories ${EXECUTORCH_ROOT}/..)`
`13`	`15`
`14`	`16`	`target_include_directories(`
`15`	`17`	`cadence_kernels`
`@@ -19,6 +21,7 @@ target_include_directories(`
`19`	`21`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib`
`20`	`22`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include`
`21`	`23`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/`
	`24`	`+ ${_common_include_directories}`
`22`	`25`	`)`
`23`	`26`
`24`	`27`	`target_link_libraries(cadence_kernels PRIVATE xa_nnlib)`