pytorch
diff --git a/‎.swift/coreml_backend/dummy.swift b/‎.swift/coreml_backend/dummy.swift
diff --git a/‎.swift/executorch/dummy.swift b/‎.swift/executorch/dummy.swift
diff --git a/‎.swift/mps_backend/dummy.swift b/‎.swift/mps_backend/dummy.swift
diff --git a/‎.swift/portable_backend/dummy.swift b/‎.swift/portable_backend/dummy.swift
diff --git a/‎.swift/xnnpack_backend/dummy.swift b/‎.swift/xnnpack_backend/dummy.swift
diff --git a/‎Package.swift
Lines changed: 29 additions & 6 deletions b/‎Package.swift
Lines changed: 29 additions & 6 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/CMakeLists.txt
Lines changed: 3 additions & 2 deletions b/‎backends/vulkan/CMakeLists.txt
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/xnnpack/xnnpack_preprocess.py
Lines changed: 8 additions & 11 deletions b/‎backends/xnnpack/xnnpack_preprocess.py
Lines changed: 8 additions & 11 deletions
diff --git a/‎examples/models/llama2/builder.py
Lines changed: 2 additions & 0 deletions b/‎examples/models/llama2/builder.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/models/llama2/custom_ops/op_sdpa.cpp
Lines changed: 8 additions & 8 deletions b/‎examples/models/llama2/custom_ops/op_sdpa.cpp
Lines changed: 8 additions & 8 deletions
diff --git a/‎examples/models/llama2/eval_llama_lib.py
Lines changed: 0 additions & 6 deletions b/‎examples/models/llama2/eval_llama_lib.py
Lines changed: 0 additions & 6 deletions
diff --git a/‎examples/models/llama2/export_llama_lib.py
Lines changed: 8 additions & 0 deletions b/‎examples/models/llama2/export_llama_lib.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/models/llama2/model.py
Lines changed: 3 additions & 1 deletion b/‎examples/models/llama2/model.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎exir/backend/backend_api.py
Lines changed: 4 additions & 2 deletions b/‎exir/backend/backend_api.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎exir/backend/utils.py
Lines changed: 15 additions & 13 deletions b/‎exir/backend/utils.py
Lines changed: 15 additions & 13 deletions
diff --git a/‎extension/android/CMakeLists.txt
Lines changed: 26 additions & 17 deletions b/‎extension/android/CMakeLists.txt
Lines changed: 26 additions & 17 deletions
diff --git a/‎extension/aten_util/aten_bridge.cpp
Lines changed: 2 additions & 2 deletions b/‎extension/aten_util/aten_bridge.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎extension/aten_util/aten_bridge.h
Lines changed: 4 additions & 4 deletions b/‎extension/aten_util/aten_bridge.h
Lines changed: 4 additions & 4 deletions
diff --git a/‎extension/module/module.cpp
Lines changed: 4 additions & 2 deletions b/‎extension/module/module.cpp
Lines changed: 4 additions & 2 deletions
@@ -20,6 +20,8 @@ let xnnpack_sha256 = "3fd6e4e1d9687eb25e2638bb3dfbc429b736cbf47e7ed769f1dbec6225
 struct Framework {
   let name: String
   let checksum: String
+  var frameworks: [String] = []
+  var libraries: [String] = []
 
   func target() -> Target {
     .binaryTarget(
@@ -28,20 +30,43 @@ struct Framework {
       checksum: checksum
     )
   }
+
+  func dependencies() -> Target {
+    .target(
+      name: "\(name)_dependencies",
+      dependencies: [.target(name: name)],
+      path: ".swift/\(name)",
+      linkerSettings:
+          frameworks.map { .linkedFramework($0) } +
+          libraries.map { .linkedLibrary($0) }
+    )
+  }
 }
 
 let frameworks = [
   Framework(
     name: "coreml_backend",
-    checksum: coreml_sha256
+    checksum: coreml_sha256,
+    frameworks: [
+      "Accelerate",
+      "CoreML",
+    ],
+    libraries: [
+      "sqlite3",
+    ]
   ),
   Framework(
     name: "executorch",
     checksum: executorch_sha256
   ),
   Framework(
     name: "mps_backend",
-    checksum: mps_sha256
+    checksum: mps_sha256,
+    frameworks: [
+      "Metal",
+      "MetalPerformanceShaders",
+      "MetalPerformanceShadersGraph",
+    ]
   ),
   Framework(
     name: "portable_backend",
@@ -58,8 +83,6 @@ let package = Package(
   platforms: [
     .iOS(.v15),
   ],
-  products: frameworks.map { framework in
-    .library(name: framework.name, targets: [framework.name])
-  },
-  targets: frameworks.map { $0.target() }
+  products: frameworks.map { .library(name: $0.name, targets: ["\($0.name)_dependencies"]) },
+  targets: frameworks.flatMap { [$0.target(), $0.dependencies()] }
 )
@@ -384,7 +384,7 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
     auto modelAssetType = get_model_asset_type(inMemoryFS);
     ETCoreMLAsset *modelAsset = nil;
     // Write the model files.
-    if (modelAssetType == ModelAssetType::ModelPackage) {
+    if (modelAssetType == ModelAssetType::Model) {
         NSURL *modelURL = ::write_model_files(dstURL, self.fileManager, identifier, modelAssetType.value(), inMemoryFS, error);
         if (modelURL) {
             modelAsset = make_asset(modelURL,
 
@@ -116,8 +116,9 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*iOS\.cmake$")
   target_compile_options(vulkan_executor_runner PUBLIC ${VULKAN_CXX_FLAGS})
 
   add_library(vulkan_executor_runner_lib STATIC ${VULKAN_RUNNER_SRCS})
-  target_link_libraries(vulkan_executor_runner_lib ${_executor_runner_libs}
-                        vulkan_schema vulkan_backend)
+  target_link_libraries(
+    vulkan_executor_runner_lib ${_executor_runner_libs} vulkan_schema
+    vulkan_backend vulkan_api_lib ${VULKAN_STANDARD_OPS_LIBS})
   target_compile_options(vulkan_executor_runner_lib PUBLIC ${VULKAN_CXX_FLAGS})
 endif()
 
 
@@ -4,8 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import copy
-
 import logging
 from dataclasses import dataclass
 from typing import Dict, final, List
@@ -86,7 +84,6 @@ def preprocess(
         edge_program: ExportedProgram,
         compile_specs: List[CompileSpec],
     ) -> PreprocessResult:
-        ep = copy.deepcopy(edge_program)
         # Need to wrap EP here because xnnpack does addmm to linear
         # transforms. This makes resulting graph not aten compliant
         # as aten.linear is not a core aten op.
@@ -97,17 +94,17 @@ def preprocess(
         # EdgeDialectVerifier, but disable it.
         # TODO (task link) to implement NullVerifier or something similar
         ep = ExportedProgram(
-            root=ep.graph_module,
-            graph=ep.graph,
-            graph_signature=ep.graph_signature,
-            state_dict=ep.state_dict,
-            range_constraints=ep.range_constraints,
-            module_call_graph=copy.deepcopy(ep.module_call_graph),
-            example_inputs=ep.example_inputs,
+            root=edge_program.graph_module,
+            graph=edge_program.graph,
+            graph_signature=edge_program.graph_signature,
+            state_dict=edge_program.state_dict,
+            range_constraints=edge_program.range_constraints,
+            module_call_graph=edge_program.module_call_graph,
+            example_inputs=edge_program.example_inputs,
             verifier=EXIREdgeDialectVerifier(
                 check_edge_ops=False, enable=False, class_only=True
             ),
-            constants=ep.constants,
+            constants=edge_program.constants,
         )
 
         passes = []
 
@@ -68,6 +68,7 @@ def load_llama_model(
     use_sdpa_with_kv_cache: bool = False,
     weight_type: WeightType = WeightType.LLAMA,
     verbose: bool = False,
+    max_seq_len: int = 128,
 ) -> "LlamaEdgeManager":
     """
     A helper util that builds a Llama2 model. It returns a LlamaEdgeManager that
@@ -87,6 +88,7 @@ def load_llama_model(
         use_kv_cache=use_kv_cache,
         use_sdpa_with_kv_cache=use_sdpa_with_kv_cache,
         fairseq2=weight_type == WeightType.FAIRSEQ2,
+        max_seq_len=max_seq_len,
     )
     state_dict = model.state_dict()
     dtype = state_dict[next(iter(state_dict))].dtype
 
@@ -702,21 +702,21 @@ Tensor& flash_attention_kernel_out(
 
 /*
   Input params
-  @params[in]: q_projected: Projected query with query weights.
+  @param[in] q_projected Projected query with query weights.
   Format [n_layers, batch size, seq_len, num heads, head dim]
-  @params[in]: k_projected: Projected query with key weights.
+  @param[in] k_projected Projected query with key weights.
   Format [n_layers, batch size, seq_len, num heads, head dim]
-  @params[in]: v_projected: Projected query with value weights.
+  @param[in] v_projected Projected query with value weights.
   Format [n_layers, batch size, seq_len, num heads, head dim]
-  @params[in]: key_cache: Cache of previous k_projected.
+  @param[in] key_cache Cache of previous k_projected.
   Format [n_layers, batch size, max_seq_len, num heads, head dim]
-  @params[in]: key_cache: Cache of previous v_projected.
+  @param[in] key_cache Cache of previous v_projected.
   Format [n_layers, batch size, max_seq_len, num heads, head dim]
   ....
-  @params[in] layer_id: which layer this call belongs to.
+  @param[in] layer_id which layer this call belongs to.
   Used to updated appropriate entry of kv cache
-  @params[in]: start_pos: sequence position
-  @params[in]: seq_len: Seq length. e.g. seq_len dim of q_projected.
+  @param[in] start_pos sequence position
+  @param[in] seq_len Seq length. e.g. seq_len dim of q_projected.
 */
 Tensor& sdpa_with_kv_cache_out(
     RuntimeContext& ctx,
 
@@ -140,12 +140,6 @@ def build_args_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "--limit", type=int, default=5, help="number of samples to evalulate"
     )
-    parser.add_argument(
-        "--max_seq_length",
-        type=int,
-        default=100,
-        help="maximum length sequence to evaluate",
-    )
 
     return parser
 
 
@@ -391,6 +391,13 @@ def build_args_parser() -> argparse.ArgumentParser:
         help="Override the output filename of the saved pte model file.",
     )
 
+    parser.add_argument(
+        "--max_seq_length",
+        type=int,
+        default=128,
+        help="maximum length sequence to evaluate",
+    )
+
     parser.add_argument("-2", "--fairseq2", action="store_true")
     parser.add_argument("-v", "--verbose", action="store_true")
     parser.add_argument("-X", "--xnnpack", action="store_true")
@@ -511,6 +518,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
             use_sdpa_with_kv_cache=args.use_sdpa_with_kv_cache,
             weight_type=weight_type,
             verbose=args.verbose,
+            max_seq_len=args.max_seq_length,
         )
         .set_output_dir(output_dir_path)
         .set_metadata(args.metadata)
 
@@ -66,6 +66,8 @@ def __init__(self, **kwargs):
             if "use_sdpa_with_kv_cache" in kwargs
             else False
         )
+
+        self.max_seq_len = kwargs["max_seq_len"] if "max_seq_len" in kwargs else 128
         # The example is using a dummy small model with random weights for demo purpose only.
         # Follow the instruction in https://github.com/facebookresearch/llama to download the model
         device = "cpu"
@@ -112,7 +114,7 @@ def __init__(self, **kwargs):
                 )
         with open(params_path, "r") as f:
             params = json.loads(f.read())
-        max_seq_len = 128
+        max_seq_len = self.max_seq_len
         max_batch_size = 1
         model_args: ModelArgs = ModelArgs(
             max_seq_len=max_seq_len,
 
@@ -351,8 +351,10 @@ def to_backend(
     # Fall back to deepcopy if no fake mode is found. TODO(T182910699): Remove this fallback.
     try:
         fake_edge_program = get_fake_program(edge_program)
-    except AssertionError as e:
-        logging.warning(f"No fake mode found for {edge_program.graph_module}: {e}")
+    except Exception as e:
+        logging.warning(
+            f"Error in get_fake_program for graph {edge_program.graph_module}, fallback to deepcopy: {e}"
+        )
         fake_edge_program = copy.deepcopy(edge_program)
     partitioner_result = partitioner_instance(fake_edge_program)
     tagged_exported_program = partitioner_result.tagged_exported_program
 
@@ -220,23 +220,25 @@ def print_delegated_graph(graph_module: torch.fx.GraphModule) -> str:
         %arg2_1 : [num_users=2] = placeholder[target=arg2_1]
         %lowered_module_0 : [num_users=1] = get_attr[target=lowered_module_0]
             backend_id: BackendWithCompilerDemo
-            lowered graph():       %arg0_1 : [num_users=1] = placeholder[target=arg0_1]
-            %arg1_1 : [num_users=1] = placeholder[target=arg1_1]
-            %arg2_1 : [num_users=1] = placeholder[target=arg2_1]
-            %aten_mm_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%arg0_1, %arg1_1), kwargs = {})
-            %aten_add_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default, %arg2_1), kwargs = {})
-            return [aten_add_tensor]
+            lowered graph():
+                %arg0_1 : [num_users=1] = placeholder[target=arg0_1]
+                %arg1_1 : [num_users=1] = placeholder[target=arg1_1]
+                %arg2_1 : [num_users=1] = placeholder[target=arg2_1]
+                %aten_mm_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%arg0_1, %arg1_1), kwargs = {})
+                %aten_add_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default, %arg2_1), kwargs = {})
+                return [aten_add_tensor]
         %executorch_call_delegate : [num_users=1] = call_function[target=torch.ops.higher_order.executorch_call_delegate](args = (%lowered_module_0, %arg0_1, %arg1_1, %arg2_1), kwargs = {})
         %getitem : [num_users=1] = call_function[target=operator.getitem](args = (%executorch_call_delegate, 0), kwargs = {})
         %aten_sub_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.sub.Tensor](args = (%getitem, %arg0_1), kwargs = {})
         %lowered_module_1 : [num_users=1] = get_attr[target=lowered_module_1]
             backend_id: BackendWithCompilerDemo
-            lowered graph():       %aten_sub_tensor : [num_users=1] = placeholder[target=aten_sub_tensor]
-            %arg1_1 : [num_users=1] = placeholder[target=arg1_1]
-            %arg2_1 : [num_users=1] = placeholder[target=arg2_1]
-            %aten_mm_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%aten_sub_tensor, %arg1_1), kwargs = {})
-            %aten_add_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default_1, %arg2_1), kwargs = {})
-            return [aten_add_tensor_1]
+            lowered graph():
+                %aten_sub_tensor : [num_users=1] = placeholder[target=aten_sub_tensor]
+                %arg1_1 : [num_users=1] = placeholder[target=arg1_1]
+                %arg2_1 : [num_users=1] = placeholder[target=arg2_1]
+                %aten_mm_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%aten_sub_tensor, %arg1_1), kwargs = {})
+                %aten_add_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default_1, %arg2_1), kwargs = {})
+                return [aten_add_tensor_1]
         %executorch_call_delegate_1 : [num_users=1] = call_function[target=torch.ops.higher_order.executorch_call_delegate](args = (%lowered_module_1, %aten_sub_tensor, %arg1_1, %arg2_1), kwargs = {})
         %getitem_1 : [num_users=1] = call_function[target=operator.getitem](args = (%executorch_call_delegate_1, 0), kwargs = {})
         return [getitem_1]
@@ -253,7 +255,7 @@ def print_delegated_graph(graph_module: torch.fx.GraphModule) -> str:
         if node.op == "get_attr" and node.name.startswith("lowered_module_"):
             lowered_module = lowered_module_dict[node.name]
             graph_format_str += f"{indent * 2}backend_id: {lowered_module.backend_id}\n"
-            graph_format_str += f"{indent * 2}lowered graph(): "
+            graph_format_str += f"{indent * 2}lowered graph():\n"
             for node_in_lowered_module in lowered_module.original_module.graph.nodes:
                 graph_format_str += (
                     f"{indent * 3}{node_in_lowered_module.format_node()}\n"
 
@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-
 cmake_minimum_required(VERSION 3.19)
 
 project(executorch_jni)
@@ -14,22 +13,32 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
 
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
-add_subdirectory(
-        ${EXECUTORCH_ROOT}/examples/third-party/fbjni
-        ${CMAKE_CURRENT_BINARY_DIR}/third-party/fbjni)
+add_subdirectory(${EXECUTORCH_ROOT}/examples/third-party/fbjni
+                 ${CMAKE_CURRENT_BINARY_DIR}/third-party/fbjni)
 
 if(CMAKE_TOOLCHAIN_ANDROID)
-    add_library(executorch_jni SHARED jni/jni_layer.cpp)
-    target_link_libraries(executorch_jni extension_data_loader
-        extension_module xnn_executor_runner_lib fbjni)
-    if(EXECUTORCH_BUILD_QNN)
-        target_link_libraries(executorch_jni qnn_executorch_backend)
-    endif()
-    target_compile_options(executorch_jni PUBLIC ${_common_compile_options})
-
-    add_library(executorch_llama_jni SHARED jni/jni_layer_llama.cpp)
-    target_link_libraries(executorch_llama_jni fbjni llama_runner
-                          xnn_executor_runner_lib)
-    target_compile_options(executorch_llama_jni PUBLIC
-                           ${_common_compile_options})
+  add_library(executorch_jni SHARED jni/jni_layer.cpp)
+  target_link_libraries(executorch_jni extension_data_loader extension_module
+                        fbjni)
+  if(EXECUTORCH_BUILD_QNN)
+    target_link_libraries(executorch_jni qnn_executorch_backend)
+  endif()
+  if(EXECUTORCH_BUILD_XNNPACK)
+    target_link_libraries(executorch_jni xnn_executor_runner_lib)
+  endif()
+  if(EXECUTORCH_BUILD_VULKAN)
+    target_link_libraries(executorch_jni vulkan_executor_runner_lib)
+  endif()
+  target_compile_options(executorch_jni PUBLIC ${_common_compile_options})
+
+  add_library(executorch_llama_jni SHARED jni/jni_layer_llama.cpp)
+  target_link_libraries(executorch_llama_jni fbjni llama_runner)
+  if(EXECUTORCH_BUILD_XNNPACK)
+    target_link_libraries(executorch_llama_jni xnn_executor_runner_lib)
+  endif()
+  if(EXECUTORCH_BUILD_VULKAN)
+    target_link_libraries(executorch_llama_jni vulkan_executor_runner_lib)
+  endif()
+
+  target_compile_options(executorch_llama_jni PUBLIC ${_common_compile_options})
 endif()
@@ -124,8 +124,8 @@ c10::ScalarType execuTorchtoTorchScalarType(torch::executor::ScalarType type) {
  * assumption , a strong one, that, such memory is arena allocated whose
  * lifetime is tied to model's lifetime, we assume that memory is not leaked as
  * it is freed when arean is freed.
- * @param[in] aten_tensor: Input at::Tensor
- * @param[in/out] mutable_et: ETensor whose underlying memory now will alias to
+ * @param[in] aten_tensor Input at::Tensor
+ * @param[in/out] mutable_et ETensor whose underlying memory now will alias to
  * aten_tensor
  */
 void alias_etensor_to_attensor(
 
@@ -26,16 +26,16 @@ torch::executor::ScalarType torchToExecuTorchScalarType(caffe2::TypeMeta type);
 c10::ScalarType execuTorchtoTorchScalarType(torch::executor::ScalarType type);
 
 /*
- * @param[in] aten_tensor: Input at::Tensor
- * @param[in/out] mutable_et: ETensor whose underlying memory now will alias to
+ * @param[in] aten_tensor Input at::Tensor
+ * @param[in,out] mutable_et ETensor whose underlying memory now will alias to
  * aten_tensor
  */
 void alias_etensor_to_attensor(at::Tensor& at, torch::executor::Tensor& et);
 
 /*
- * @param[in] et: ETensor whose underlying memory now will alias to returned
+ * @param[in] et ETensor whose underlying memory now will alias to returned
  * output tensor
- * @param[ret] aten_tensor: output at::Tensor
+ * @param[ret] aten_tensor output at::Tensor
  * Notes:
  * It is owned by the caller of alias_attensor_to_etensor.
  * Lifetime of tensor meta must be >= to that of the returned tensor since
 
@@ -36,10 +36,12 @@ namespace torch::executor {
 
 Module::Module(
     const std::string& file_path,
-    const Module::MlockConfig mlock_config)
+    const Module::MlockConfig mlock_config,
+    std::unique_ptr<EventTracer> event_tracer)
     : file_path_(file_path),
       mlock_config_(mlock_config),
-      memory_allocator_(std::make_unique<util::MallocMemoryAllocator>()) {
+      memory_allocator_(std::make_unique<util::MallocMemoryAllocator>()),
+      event_tracer_(std::move(event_tracer)) {
   runtime_init();
 }