pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 18 additions & 6 deletions b/‎.ci/scripts/test_llama.sh
Lines changed: 18 additions & 6 deletions
diff --git a/‎.ci/scripts/test_quantized_aot_lib.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_quantized_aot_lib.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android.yml
Lines changed: 3 additions & 2 deletions b/‎.github/workflows/android.yml
Lines changed: 3 additions & 2 deletions
diff --git a/‎.github/workflows/doc-build.yml
Lines changed: 18 additions & 19 deletions b/‎.github/workflows/doc-build.yml
Lines changed: 18 additions & 19 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pull.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitmodules
Lines changed: 6 additions & 0 deletions b/‎.gitmodules
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/apple/coreml/CMakeLists.txt
Lines changed: 10 additions & 0 deletions b/‎backends/apple/coreml/CMakeLists.txt
Lines changed: 10 additions & 0 deletions
diff --git a/‎backends/apple/coreml/README.md
Lines changed: 5 additions & 1 deletion b/‎backends/apple/coreml/README.md
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
Lines changed: 3 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
Lines changed: 3 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
Lines changed: 24 additions & 12 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
Lines changed: 24 additions & 12 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
Lines changed: 12 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
Lines changed: 12 additions & 0 deletions
@@ -1 +1 @@
-868e5ced5df34f1aef3703654f76e03f5126b534
+19f50333e91e9e8b20a78517becd74bca70c7d46
@@ -12,7 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 MODEL_NAME=$1 # stories110M.pt
 BUILD_TOOL=$2 # buck2 or cmake
 DTYPE=$3 # fp16 or fp32
-MODE=${4:-"xnnpack"} # portable or xnnpack
+MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
 if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
     echo "Expecting atleast 4 positional arguments"
     echo "Usage: [...]"
@@ -37,7 +37,7 @@ if [[ -z "${MODE:-}" ]]; then
   exit 1
 fi
 
-if [[ "${MODE}" =~ xnnpack.* ]]; then
+if [[ "${MODE}" =~ .*xnnpack.* ]]; then
   XNNPACK=ON
 else
   XNNPACK=OFF
@@ -49,6 +49,12 @@ else
   CUSTOM=OFF
 fi
 
+if [[ "${MODE}" =~ .*qe.* ]]; then
+  QE=ON
+else
+  QE=OFF
+fi
+
 if [[ -z "${BUCK:-}" ]]; then
   BUCK=buck2
 fi
@@ -69,6 +75,7 @@ cmake_install_executorch_libraries() {
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
         -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out .
@@ -84,7 +91,6 @@ cmake_build_llama_runner() {
         -DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
         -DEXECUTORCH_BUILD_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-        -DEXECUTORCH_BUILD_OPTIMIZED=ON \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out/${dir} \
         ${dir}
@@ -126,9 +132,15 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
-if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
+EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
+if [[ "${XNNPACK}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
+fi
+if [[ "${CUSTOM}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
+fi
+if [[ "${QE}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
 fi
 # Add dynamically linked library location
 $PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
 
@@ -24,7 +24,7 @@ build_cmake_quantized_aot_lib() {
     && retry cmake -DBUCK2=buck2 \
       -DCMAKE_BUILD_TYPE=Release \
       -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-      -DEXECUTORCH_BUILD_QUANTIZED=ON \
+      -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
       -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
 
   cmake --build ${CMAKE_OUTPUT_DIR} -j4
 
@@ -10,7 +10,8 @@ on:
       - .ci/docker/**
       - .github/workflows/android.yml
       - install_requirements.sh
-      - examples/demo-apps/**
+      - examples/demo-apps/android/**
+      - extension/android/**
       - extension/module/**
   workflow_dispatch:
 
@@ -101,7 +102,7 @@ jobs:
       android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug.apk
       android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug-androidTest.apk
       # The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
-      test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/414cb54d-4d83-4576-8317-93244e4dc50e
+      test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
       # The exported llama2 model and its tokenizer, can be downloaded from https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b.zip.
       # Among the input, this is the biggest file and uploading it to AWS beforehand makes the test run much faster
       extra-data: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/bd15825b-ddab-4e47-9fef-a9c8935778dd
@@ -68,23 +68,22 @@ jobs:
         make html
         cd ..
 
+        # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
+        GITHUB_REF=${{ github.ref }}
+        echo "GitHub Ref: ${GITHUB_REF}"
+        if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
+          find docs/_build/html/ -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">';
+        fi
+
         cp -rf docs/_build/html/* "${RUNNER_DOCS_DIR}"
 
         mv docs/_build/html "${RUNNER_ARTIFACT_DIR}"
 
         ls -R "${RUNNER_ARTIFACT_DIR}"/*/*.html
 
-# Enable preview later. Previews are available publicly
-#
-# upload-preview:
-#    if: github.repository == 'pytorch/executorch' && github.event_name == 'push' &&
-#        (github.ref_type == 'branch' && github.ref_name == 'main')
-#    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-
   upload-gh-pages:
     needs: build
-    if: github.repository == 'pytorch/executorch' && github.event_name == 'push' &&
-        ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
+    if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release/') || startsWith(github.ref, 'refs/tags/v'))
     permissions:
       contents: write
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -108,10 +107,16 @@ jobs:
         elif [[ "${REF_TYPE}" == tag ]]; then
           # Strip the leading "v" as well as the trailing patch version and "-rc" suffix.
           # For example: 'v0.1.2' -> '0.1' and 'v0.1.2-rc1' -> 0.1.
-          TARGET_FOLDER=$(echo "${REF_NAME}" | sed 's/^v//i; s/-rc[0-9]*$//; s/\.[0-9]*$//')
-        else
-          echo "ERROR: Invalid REF_TYPE: ${REF_TYPE}. Expected 'branch' or 'tag'."
-          exit 1
+          case "${REF_NAME}" in
+            *-rc*)
+              echo "Aborting upload since this is an RC tag: ${REF_NAME}"
+              # We don't generate -rc* documentation but for actual tag only.
+              exit 0
+              ;;
+            *)
+              TARGET_FOLDER=$(echo "${REF_NAME}" | sed 's/v\([0-9]\+\)\.\([0-9]\+\)\.[0-9]\+/\1.\2/')
+              ;;
+          esac
         fi
         echo "Target Folder: ${TARGET_FOLDER}"
 
@@ -122,12 +127,6 @@ jobs:
         mv "${RUNNER_ARTIFACT_DIR}"/html/* "${TARGET_FOLDER}"
         git add "${TARGET_FOLDER}" || true
 
-        # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
-        if [[ "${REF_NAME}" == 'main' ]]; then
-          find "${TARGET_FOLDER}" -type f -name "*.html" -exec sed -i '/<head>/a <meta name="robots" content="noindex">' {} \;
-          git add "${TARGET_FOLDER}"/**/*.html || true
-        fi
-
         git config user.name 'pytorchbot'
         git config user.email '[email protected]'
         git commit -m "Auto-generating sphinx docs" || true
 
@@ -90,7 +90,7 @@ jobs:
       matrix:
         dtype: [fp32]
         build-tool: [buck2, cmake]
-        mode: [portable, xnnpack+kv+custom]
+        mode: [portable, xnnpack+custom, xnnpack+custom+qe]
       fail-fast: false
     with:
       runner: linux.2xlarge
 
@@ -62,3 +62,9 @@
 [submodule "examples/third-party/LLaVA"]
 	path = examples/third-party/LLaVA
 	url = https://github.com/haotian-liu/LLaVA.git
+[submodule "examples/models/llama2/third-party/re2"]
+	path = examples/models/llama2/third-party/re2
+	url = https://github.com/google/re2.git
+[submodule "examples/models/llama2/third-party/abseil-cpp"]
+	path = examples/models/llama2/third-party/abseil-cpp
+	url = https://github.com/abseil/abseil-cpp.git
@@ -13,6 +13,8 @@ if(NOT EXECUTORCH_ROOT)
   set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
 endif()
 
+option(COREML_BUILD_EXECUTOR_RUNNER "Build CoreML executor runner." OFF)
+
 # inmemoryfs sources
 set(INMEMORYFS_SOURCES
   runtime/inmemoryfs/inmemory_filesystem.cpp
@@ -181,6 +183,14 @@ target_link_libraries(coremldelegate
   ${SQLITE_LIBRARY}
 )
 
+if(COREML_BUILD_EXECUTOR_RUNNER)
+target_link_libraries(coremldelegate
+  PRIVATE
+  portable_ops_lib
+  portable_kernels
+)
+endif()
+
 target_compile_options(coremldelegate PRIVATE "-fobjc-arc")
 target_compile_options(coremldelegate PRIVATE "-fno-exceptions")
 
 
@@ -28,6 +28,10 @@ For delegating the Program to the **Core ML** backend, the client must be respon
 import executorch.exir as exir
 import torch
 
+from torch.export import export
+
+from executorch.exir import to_edge
+
 from executorch.exir.backend.backend_api import to_backend
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
@@ -42,7 +46,7 @@ class LowerableSubModel(torch.nn.Module):
 # Convert the lowerable module to Edge IR Representation
 to_be_lowered = LowerableSubModel()
 example_input = (torch.ones(1), )
-to_be_lowered_exir_submodule = exir.capture(to_be_lowered, example_input).to_edge()
+to_be_lowered_exir_submodule = to_edge(export(to_be_lowered, example_input))
 
 # Lower to Core ML backend
 lowered_module = to_backend('CoreMLBackend', to_be_lowered_exir_submodule.exported_program, [])
 
@@ -630,7 +630,7 @@ - (NSUInteger)_compact:(NSUInteger)sizeInBytes error:(NSError * __autoreleasing
     }
 
     if (_estimatedSizeInBytes <= sizeInBytes) {
-        return YES;
+        return _estimatedSizeInBytes;
     }
 
     std::error_code ec;
 
@@ -28,6 +28,9 @@ __attribute__((objc_subclassing_restricted))
 /// The model.
 @property (readonly, strong, nonatomic) ETCoreMLModel* model;
 
+/// If set to `YES` then output backing are ignored.
+@property (readwrite, atomic) BOOL ignoreOutputBackings;
+
 @end
 
 NS_ASSUME_NONNULL_END
@@ -26,6 +26,9 @@ - (instancetype)initWithModel:(ETCoreMLModel *)model {
                                               loggingOptions:(const executorchcoreml::ModelLoggingOptions& __unused)loggingOptions
                                                  eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable __unused)eventLogger
                                                        error:(NSError * __autoreleasing *)error {
+    if (self.ignoreOutputBackings) {
+        predictionOptions.outputBackings = @{};
+    }
     id<MLFeatureProvider> outputs = [self.model.mlModel predictionFromFeatures:inputs
                                                                        options:predictionOptions
                                                                          error:error];
 
@@ -7,6 +7,7 @@
 
 #import <Foundation/Foundation.h>
 
+#import <executorch/runtime/platform/log.h>
 #import <os/log.h>
 
 NS_ASSUME_NONNULL_BEGIN
@@ -48,7 +49,11 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
 
 /// Record the error with `os_log_error` and fills `*errorOut` with `NSError`.
 #define ETCoreMLLogErrorAndSetNSError(errorOut, errorCode, formatString, ...)                                        \
-    os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__);                                    \
+    if (ET_LOG_ENABLED) {                                                                                            \
+        ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String);                    \
+    } else {                                                                                                         \
+        os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__);                                \
+    }                                                                                                                \
     if (errorOut) {                                                                                                  \
         *errorOut =                                                                                                  \
             [NSError errorWithDomain:ETCoreMLErrorDomain                                                             \
@@ -58,24 +63,31 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
                             }];                                                                                      \
     }
 
-/// Record the error and its underlying error with `os_log_error` and fills
-/// `*errorOut` with NSError.
+/// Record the error and its underlying error with `os_log_error` and fills `*errorOut` with `NSError`.
 #define ETCoreMLLogUnderlyingErrorAndSetNSError(errorOut, errorCode, underlyingNSError, formatString, ...) \
-    os_log_error(ETCoreMLErrorUtils.loggingChannel,                                                        \
-                 formatString ", with underlying error= %@.",                                              \
-                 ##__VA_ARGS__,                                                                            \
-                 (underlyingNSError).localizedDescription);                                                \
+    if (ET_LOG_ENABLED) {                                                                                  \
+        ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String);          \
+    } else {                                                                                               \
+        os_log_error(ETCoreMLErrorUtils.loggingChannel,                                                    \
+                     formatString ", with underlying error= %@.",                                          \
+                     ##__VA_ARGS__,                                                                        \
+                     (underlyingNSError).localizedDescription);                                            \
+    }                                                                                                      \
     if (errorOut) {                                                                                        \
         *errorOut = [ETCoreMLErrorUtils errorWithCode:errorCode                                            \
                                       underlyingError:underlyingNSError                                    \
                                                format:@formatString, ##__VA_ARGS__];                       \
     }
 
-#define ETCoreMLLogError(error, formatString, ...)  \
-    os_log_error(ETCoreMLErrorUtils.loggingChannel, \
-                 formatString ", with error= %@.",  \
-                 ##__VA_ARGS__,                     \
-                 (error).localizedDescription);
+#define ETCoreMLLogError(error, formatString, ...)                                                \
+    if (ET_LOG_ENABLED) {                                                                         \
+        ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \
+    } else {                                                                                      \
+        os_log_error(ETCoreMLErrorUtils.loggingChannel,                                           \
+                     formatString ", with error= %@.",                                            \
+                     ##__VA_ARGS__,                                                               \
+                     (error).localizedDescription);                                               \
+    }
 
 
 #pragma clang diagnostic pop
 
@@ -6,12 +6,18 @@
 // Please refer to the license found in the LICENSE file in the root directory of the source tree.
 
 #import <CoreML/CoreML.h>
+#import <vector>
 
 NS_ASSUME_NONNULL_BEGIN
 
 @class ETCoreMLAsset;
 
+namespace executorchcoreml {
+class MultiArray;
+}
+
 /// Represents a ML model, the class is a thin wrapper over `MLModel` with additional properties.
+__attribute__((objc_subclassing_restricted))
 @interface ETCoreMLModel : NSObject
 
 - (instancetype)init NS_UNAVAILABLE;
@@ -31,6 +37,12 @@ NS_ASSUME_NONNULL_BEGIN
                     orderedOutputNames:(NSOrderedSet<NSString*>*)orderedOutputNames
                                  error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
 
+- (nullable NSArray<MLMultiArray*>*)prepareInputs:(const std::vector<executorchcoreml::MultiArray>&)inputs
+                                            error:(NSError* __autoreleasing*)error;
+
+- (nullable NSArray<MLMultiArray*>*)prepareOutputBackings:(const std::vector<executorchcoreml::MultiArray>&)outputs
+                                                    error:(NSError* __autoreleasing*)error;
+
 /// The underlying MLModel.
 @property (strong, readonly, nonatomic) MLModel* mlModel;
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-868e5ced5df34f1aef3703654f76e03f5126b534`
	`1`	`+19f50333e91e9e8b20a78517becd74bca70c7d46`
Original file line number	Diff line number	Diff line change
`@@ -630,7 +630,7 @@ - (NSUInteger)_compact:(NSUInteger)sizeInBytes error:(NSError * __autoreleasing`
`630`	`630`	`}`
`631`	`631`
`632`	`632`	`if (_estimatedSizeInBytes <= sizeInBytes) {`
`633`		`- return YES;`
	`633`	`+ return _estimatedSizeInBytes;`
`634`	`634`	`}`
`635`	`635`
`636`	`636`	`std::error_code ec;`