pytorch
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 17 additions & 6 deletions b/‎.ci/scripts/test_llama.sh
Lines changed: 17 additions & 6 deletions
diff --git a/‎.ci/scripts/test_quantized_aot_lib.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_quantized_aot_lib.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android.yml
Lines changed: 3 additions & 2 deletions b/‎.github/workflows/android.yml
Lines changed: 3 additions & 2 deletions
diff --git a/‎.github/workflows/doc-build.yml
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/doc-build.yml
Lines changed: 7 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pull.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 2 additions & 11 deletions b/‎CMakeLists.txt
Lines changed: 2 additions & 11 deletions
diff --git a/‎backends/vulkan/runtime/api/Pipeline.cpp
Lines changed: 2 additions & 4 deletions b/‎backends/vulkan/runtime/api/Pipeline.cpp
Lines changed: 2 additions & 4 deletions
diff --git a/‎backends/vulkan/runtime/api/Pipeline.h
Lines changed: 7 additions & 2 deletions b/‎backends/vulkan/runtime/api/Pipeline.h
Lines changed: 7 additions & 2 deletions
diff --git a/‎build/Utils.cmake
Lines changed: 0 additions & 2 deletions b/‎build/Utils.cmake
Lines changed: 0 additions & 2 deletions
diff --git a/‎build/build_apple_frameworks.sh
Lines changed: 2 additions & 5 deletions b/‎build/build_apple_frameworks.sh
Lines changed: 2 additions & 5 deletions
diff --git a/‎build/executorch-config.cmake
Lines changed: 1 addition & 1 deletion b/‎build/executorch-config.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/demo-apps/android/LlamaDemo/android-llama2-device-farm-test-spec.yml
Lines changed: 76 additions & 0 deletions b/‎examples/demo-apps/android/LlamaDemo/android-llama2-device-farm-test-spec.yml
Lines changed: 76 additions & 0 deletions
diff --git a/‎examples/models/llama2/CMakeLists.txt
Lines changed: 22 additions & 0 deletions b/‎examples/models/llama2/CMakeLists.txt
Lines changed: 22 additions & 0 deletions
@@ -12,7 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 MODEL_NAME=$1 # stories110M.pt
 BUILD_TOOL=$2 # buck2 or cmake
 DTYPE=$3 # fp16 or fp32
-MODE=${4:-"xnnpack"} # portable or xnnpack
+MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
 if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
     echo "Expecting atleast 4 positional arguments"
     echo "Usage: [...]"
@@ -37,7 +37,7 @@ if [[ -z "${MODE:-}" ]]; then
   exit 1
 fi
 
-if [[ "${MODE}" =~ xnnpack.* ]]; then
+if [[ "${MODE}" =~ .*xnnpack.* ]]; then
   XNNPACK=ON
 else
   XNNPACK=OFF
@@ -49,6 +49,12 @@ else
   CUSTOM=OFF
 fi
 
+if [[ "${MODE}" =~ .*qe.* ]]; then
+  QE=ON
+else
+  QE=OFF
+fi
+
 if [[ -z "${BUCK:-}" ]]; then
   BUCK=buck2
 fi
@@ -84,7 +90,6 @@ cmake_build_llama_runner() {
         -DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
         -DEXECUTORCH_BUILD_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-        -DEXECUTORCH_BUILD_OPTIMIZED=ON \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out/${dir} \
         ${dir}
@@ -126,9 +131,15 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
-if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
+EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
+if [[ "${XNNPACK}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
+fi
+if [[ "${CUSTOM}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
+fi
+if [[ "${QE}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
 fi
 # Add dynamically linked library location
 $PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
 
@@ -24,7 +24,7 @@ build_cmake_quantized_aot_lib() {
     && retry cmake -DBUCK2=buck2 \
       -DCMAKE_BUILD_TYPE=Release \
       -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-      -DEXECUTORCH_BUILD_QUANTIZED=ON \
+      -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
       -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
 
   cmake --build ${CMAKE_OUTPUT_DIR} -j4
 
@@ -10,7 +10,8 @@ on:
       - .ci/docker/**
       - .github/workflows/android.yml
       - install_requirements.sh
-      - examples/demo-apps/**
+      - examples/demo-apps/android/**
+      - extension/android/**
       - extension/module/**
   workflow_dispatch:
 
@@ -101,7 +102,7 @@ jobs:
       android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug.apk
       android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug-androidTest.apk
       # The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
-      test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/414cb54d-4d83-4576-8317-93244e4dc50e
+      test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
       # The exported llama2 model and its tokenizer, can be downloaded from https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b.zip.
       # Among the input, this is the biggest file and uploading it to AWS beforehand makes the test run much faster
       extra-data: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/bd15825b-ddab-4e47-9fef-a9c8935778dd
@@ -68,6 +68,13 @@ jobs:
         make html
         cd ..
 
+        # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
+        GITHUB_REF=${{ github.ref }}
+        echo "GitHub Ref: ${GITHUB_REF}"
+        if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
+          find docs/_build/html/ -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">';
+        fi
+
         cp -rf docs/_build/html/* "${RUNNER_DOCS_DIR}"
 
         mv docs/_build/html "${RUNNER_ARTIFACT_DIR}"
 
@@ -90,7 +90,7 @@ jobs:
       matrix:
         dtype: [fp32]
         build-tool: [buck2, cmake]
-        mode: [portable, xnnpack+kv+custom]
+        mode: [portable, xnnpack+custom, xnnpack+custom+qe]
       fail-fast: false
     with:
       runner: linux.2xlarge
 
@@ -164,8 +164,6 @@ option(EXECUTORCH_BUILD_QNN "Build the Qualcomm backend" OFF)
 
 option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)
 
-option(EXECUTORCH_BUILD_QUANTIZED "Build the quantized kernels" OFF)
-
 option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch SDK")
 
 option(EXECUTORCH_BUILD_SIZE_TEST "Build the size test" OFF)
@@ -413,9 +411,7 @@ if(EXECUTORCH_BUILD_OPTIMIZED)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
 endif()
 
-if(EXECUTORCH_BUILD_QUANTIZED)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
-endif()
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
 
@@ -445,19 +441,14 @@ cmake_dependent_option(
   EXECUTORCH_BUILD_HOST_TARGETS OFF)
 if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
   # Baseline libraries that executor_runner will link against.
-  set(_executor_runner_libs executorch gflags)
+  set(_executor_runner_libs executorch gflags quantized_ops_lib)
 
   if(EXECUTORCH_BUILD_OPTIMIZED)
     list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
   else()
     list(APPEND _executor_runner_libs portable_ops_lib)
   endif()
 
-  # Generate lib to register quantized ops
-  if(EXECUTORCH_BUILD_QUANTIZED)
-    list(APPEND _executor_runner_libs quantized_ops_lib)
-  endif()
-
   add_executable(executor_runner ${_executor_runner__srcs})
   if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
     target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
 
@@ -157,9 +157,7 @@ bool operator==(const SpecVar& lhs, const SpecVar& rhs) {
   return false;
 }
 
-SpecVarList::SpecVarList() {
-  vars.reserve(8);
-}
+SpecVarList::SpecVarList() {}
 
 SpecVarList::SpecVarList(std::initializer_list<SpecVar> init_list) {
   vars.resize(init_list.size());
@@ -176,7 +174,7 @@ std::vector<VkSpecializationMapEntry> SpecVarList::generate_map_entries()
   map_entries.resize(vars.size());
   uint32_t cur_offset = 0u;
   for (uint32_t i = 0; i < vars.size(); ++i) {
-    map_entries[i] = {
+    map_entries.at(i) = {
         i, cur_offset + vars.at(i).val_offset(), vars.at(i).val_size()};
     cur_offset += sizeof(SpecVar);
   }
 
@@ -53,12 +53,17 @@ struct SpecVar final {
 
 bool operator==(const SpecVar& lhs, const SpecVar& rhs);
 
-struct SpecVarList final {
+class SpecVarList final {
   std::vector<SpecVar> vars;
 
+ public:
   SpecVarList();
   SpecVarList(std::initializer_list<SpecVar> init_list);
 
+  inline const SpecVar& at(const size_t index) const {
+    return vars.at(index);
+  }
+
   inline const SpecVar* data() const {
     return vars.data();
   }
@@ -235,7 +240,7 @@ class ComputePipelineCache final {
       seed = utils::hash_combine(seed, std::hash<uint32_t>()(spec_vars.size()));
 
       for (int i = 0; i < spec_vars.size(); ++i) {
-        const SpecVar& spec_var = spec_vars.vars.at(i);
+        const SpecVar& spec_var = spec_vars.at(i);
         size_t new_seed = 0;
         switch (spec_var.type) {
           case SpecVar::Type::FLOAT:
 
@@ -74,8 +74,6 @@ function(executorch_print_configuration_summary)
     STATUS "  EXECUTORCH_BUILD_QNN                   : ${EXECUTORCH_BUILD_QNN}")
   message(STATUS "  EXECUTORCH_BUILD_OPTIMIZED             : "
                  "${EXECUTORCH_BUILD_OPTIMIZED}")
-  message(STATUS "  EXECUTORCH_BUILD_QUANTIZED             : "
-                 "${EXECUTORCH_BUILD_QUANTIZED}")
   message(
     STATUS "  EXECUTORCH_BUILD_SDK                   : ${EXECUTORCH_BUILD_SDK}")
   message(
 
@@ -22,7 +22,7 @@ CUSTOM=OFF
 MPS=OFF
 OPTIMIZED=OFF
 PORTABLE=OFF
-QUANTIZED=OFF
+QUANTIZED=ON
 XNNPACK=OFF
 HEADERS_PATH="include"
 EXECUTORCH_FRAMEWORK="executorch:libexecutorch.a,libexecutorch_no_prim_ops.a,libextension_apple.a,libextension_data_loader.a,libextension_module.a:$HEADERS_PATH"
@@ -51,7 +51,6 @@ usage() {
   echo "  --mps                Include this flag to build the Metal Performance Shaders backend."
   echo "  --optimized          Include this flag to build the Optimized backend."
   echo "  --portable           Include this flag to build the Portable backend."
-  echo "  --quantized          Include this flag to build the Quantized backend."
   echo "  --xnnpack            Include this flag to build the XNNPACK backend."
   echo
   echo "Example:"
@@ -74,7 +73,6 @@ for arg in "$@"; do
       --mps) MPS=ON ;;
       --optimized) OPTIMIZED=ON ;;
       --portable) PORTABLE=ON ;;
-      --quantized) QUANTIZED=ON ;;
       --xnnpack) XNNPACK=ON ;;
       *)
       if [[ -z "$SOURCE_ROOT_DIR" ]]; then
@@ -137,7 +135,6 @@ cmake_build() {
         -DEXECUTORCH_BUILD_CUSTOM=$CUSTOM \
         -DEXECUTORCH_BUILD_MPS=$MPS \
         -DEXECUTORCH_BUILD_OPTIMIZED=$OPTIMIZED \
-        -DEXECUTORCH_BUILD_QUANTIZED=$QUANTIZED \
         -DEXECUTORCH_BUILD_XNNPACK=$XNNPACK \
         ${platform_flag:+-DIOS_PLATFORM=$platform_flag}
     cmake --build . --config $MODE
@@ -181,7 +178,7 @@ append_framework_flag "$CUSTOM" "$CUSTOM_FRAMEWORK"
 append_framework_flag "$MPS" "$MPS_FRAMEWORK"
 append_framework_flag "$OPTIMIZED" "$OPTIMIZED_FRAMEWORK"
 append_framework_flag "$PORTABLE" "$PORTABLE_FRAMEWORK"
-append_framework_flag "$QUANTIZED" "$QUANTIZED_FRAMEWORK"
+append_framework_flag "ON" "$QUANTIZED_FRAMEWORK"
 append_framework_flag "$XNNPACK" "$XNNPACK_FRAMEWORK"
 
 "$SOURCE_ROOT_DIR"/build/create_frameworks.sh "${FRAMEWORK_FLAGS[@]}"
 
@@ -38,7 +38,7 @@ set(lib_list
     etdump bundled_program extension_data_loader ${FLATCCRT_LIB} mpsdelegate
     qnn_executorch_backend portable_ops_lib extension_module xnnpack_backend
     XNNPACK cpuinfo pthreadpool vulkan_backend optimized_kernels cpublas eigen_blas
-    optimized_ops_lib optimized_native_cpu_ops_lib
+    optimized_ops_lib optimized_native_cpu_ops_lib quantized_kernels quantized_ops_lib
 )
 foreach(lib ${lib_list})
     # Name of the variable which stores result of the find_library search
 
@@ -0,0 +1,76 @@
+version: 0.1
+
+android_test_host: amazon_linux_2
+
+phases:
+  install:
+    commands:
+
+  pre_test:
+    commands:
+      # Prepare the model and the tokenizer
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /sdcard/"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "mkdir -p /data/local/tmp/llama/"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/tokenizer.bin /data/local/tmp/llama/tokenizer.bin"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/xnnpack_llama2.pte /data/local/tmp/llama/xnnpack_llama2.pte"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/llama/tokenizer.bin"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/llama/xnnpack_llama2.pte"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /data/local/tmp/llama/"
+
+  test:
+    commands:
+      # By default, the following ADB command is used by Device Farm to run your Instrumentation test.
+      # Please refer to Android's documentation for more options on running instrumentation tests with adb:
+      # https://developer.android.com/studio/test/command-line#run-tests-with-adb
+      - echo "Starting the Instrumentation test"
+      - |
+        adb -s $DEVICEFARM_DEVICE_UDID shell "am instrument -r -w --no-window-animation \
+        $DEVICEFARM_TEST_PACKAGE_NAME/$DEVICEFARM_TEST_PACKAGE_RUNNER 2>&1 || echo \": -1\"" |
+        tee $DEVICEFARM_LOG_DIR/instrument.log
+
+      # Parse the results
+      - |-
+        INSTRUMENT_LOG="$DEVICEFARM_LOG_DIR/instrument.log"
+
+        DID_ANY_TESTS_START=$(grep "INSTRUMENTATION_STATUS_CODE: 1" $INSTRUMENT_LOG | wc -l);
+        TESTS_PASSED=$(grep "INSTRUMENTATION_STATUS_CODE: 0" $INSTRUMENT_LOG | wc -l);
+        TESTS_ERRORED=$(grep "INSTRUMENTATION_STATUS_CODE: -1" $INSTRUMENT_LOG | wc -l);
+        TESTS_FAILED=$(grep "INSTRUMENTATION_STATUS_CODE: -2" $INSTRUMENT_LOG | wc -l);
+        TESTS_IGNORED=$(grep "INSTRUMENTATION_STATUS_CODE: -3" $INSTRUMENT_LOG | wc -l);
+        TESTS_ASSUMPTION_FAILED=$(grep "INSTRUMENTATION_STATUS_CODE: -4" $INSTRUMENT_LOG | wc -l);
+        TESTS_PROCESSES_CRASHED=$(grep "INSTRUMENTATION_RESULT: shortMsg=Process crashed." $INSTRUMENT_LOG | wc -l);
+
+      # And print the results so that the CI job can show them later
+      - |
+        INSTRUMENT_LOG="$DEVICEFARM_LOG_DIR/instrument.log"
+
+        if [ $DID_ANY_TESTS_START -eq 0 ];
+        then
+          echo "[PyTorch] Marking the test suite as failed because no tests started!";
+          false;
+        elif [ $TESTS_FAILED -ne 0 ];
+        then
+          OBSERVED_TPS=$(grep "The observed TPS " $INSTRUMENT_LOG | tail -n 1)
+
+          if [ -n "${OBSERVED_TPS}" ];
+          then
+            echo "[PyTorch] ${OBSERVED_TPS}";
+          else
+            echo "[PyTorch] Marking the test suite as failed because it failed to load the model";
+          fi
+        elif [ $TESTS_ERRORED -ne 0 ];
+        then
+          echo "[PyTorch] Marking the test suite as failed because $TESTS_ERRORED tests errored!";
+          false;
+        elif [ $TESTS_PROCESSES_CRASHED -ne 0 ];
+        then
+          echo "[PyTorch] Marking the test suite as failed because the app crashed due to OOM!";
+          false;
+        fi;
+
+  post_test:
+    commands:
+
+artifacts:
+  # By default, Device Farm will collect your artifacts from the $DEVICEFARM_LOG_DIR directory.
+  - $DEVICEFARM_LOG_DIR
@@ -44,6 +44,7 @@ set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
 set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
+include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
@@ -91,6 +92,7 @@ add_subdirectory(runner)
 if(EXECUTORCH_USE_TIKTOKEN)
   # find RE2 for tokenizer
   set(ABSL_ENABLE_INSTALL ON)
+  set(ABSL_PROPAGATE_CXX_STD ON)
   set(_pic_flag
     ${CMAKE_POSITION_INDEPENDENT_CODE})
   set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -118,6 +120,26 @@ else()
   target_link_options_shared_lib(portable_ops_lib)
 endif()
 
+# quantized ops yaml file operation
+merge_yaml(
+  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/ops/quantized.yaml
+  FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/quantized/quantized.yaml
+  OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+gen_selected_ops("${CMAKE_CURRENT_BINARY_DIR}/merged.yaml" "" "")
+generate_bindings_for_kernels(
+    FUNCTIONS_YAML ${CMAKE_CURRENT_BINARY_DIR}/merged.yaml)
+message("Generated files ${gen_command_sources}")
+
+# quantized_merge_ops_lib: Register quantized op kernels into the runtime
+gen_operators_lib(
+  "quantized_merge_ops_lib"
+  KERNEL_LIBS quantized_kernels
+  DEPS executorch)
+target_include_directories(quantized_merge_ops_lib PUBLIC ${_common_include_directories})
+target_link_options_shared_lib(quantized_merge_ops_lib)
+list(APPEND link_libraries quantized_kernels quantized_merge_ops_lib)
+
 if(EXECUTORCH_BUILD_CUSTOM)
   target_link_options_shared_lib(custom_ops)
   list(APPEND link_libraries custom_ops)
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ set(lib_list`
`38`	`38`	`etdump bundled_program extension_data_loader ${FLATCCRT_LIB} mpsdelegate`
`39`	`39`	`qnn_executorch_backend portable_ops_lib extension_module xnnpack_backend`
`40`	`40`	`XNNPACK cpuinfo pthreadpool vulkan_backend optimized_kernels cpublas eigen_blas`
`41`		`- optimized_ops_lib optimized_native_cpu_ops_lib`
	`41`	`+ optimized_ops_lib optimized_native_cpu_ops_lib quantized_kernels quantized_ops_lib`
`42`	`42`	`)`
`43`	`43`	`foreach(lib ${lib_list})`
`44`	`44`	`# Name of the variable which stores result of the find_library search`