Skip to content

Commit 166674d

Browse files
committed
Update base for Update on "Add script to cut release branch"
Step 1 of release process: Create a release branch. The branch is named with `release/MAJOR.MINOR`. From executorch root, run `./scripts/release/cut-release-branch.sh` will cut a release branch off stable branch (`viable/strict`) by default. Use `GIT_BRANCH_TO_CUT_FROM=main ./scripts/release/cut-release-branch.sh` to cut off `main` branch. Differential Revision: [D55208762](https://our.internmc.facebook.com/intern/diff/D55208762) [ghstack-poisoned]
2 parents 12b5324 + ec6b88a commit 166674d

File tree

23 files changed

+131
-88
lines changed

23 files changed

+131
-88
lines changed

.swift/coreml_backend/dummy.swift

Whitespace-only changes.

.swift/executorch/dummy.swift

Whitespace-only changes.

.swift/mps_backend/dummy.swift

Whitespace-only changes.

.swift/portable_backend/dummy.swift

Whitespace-only changes.

.swift/xnnpack_backend/dummy.swift

Whitespace-only changes.

Package.swift

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ let xnnpack_sha256 = "3fd6e4e1d9687eb25e2638bb3dfbc429b736cbf47e7ed769f1dbec6225
2020
struct Framework {
2121
let name: String
2222
let checksum: String
23+
var frameworks: [String] = []
24+
var libraries: [String] = []
2325

2426
func target() -> Target {
2527
.binaryTarget(
@@ -28,20 +30,43 @@ struct Framework {
2830
checksum: checksum
2931
)
3032
}
33+
34+
func dependencies() -> Target {
35+
.target(
36+
name: "\(name)_dependencies",
37+
dependencies: [.target(name: name)],
38+
path: ".swift/\(name)",
39+
linkerSettings:
40+
frameworks.map { .linkedFramework($0) } +
41+
libraries.map { .linkedLibrary($0) }
42+
)
43+
}
3144
}
3245

3346
let frameworks = [
3447
Framework(
3548
name: "coreml_backend",
36-
checksum: coreml_sha256
49+
checksum: coreml_sha256,
50+
frameworks: [
51+
"Accelerate",
52+
"CoreML",
53+
],
54+
libraries: [
55+
"sqlite3",
56+
]
3757
),
3858
Framework(
3959
name: "executorch",
4060
checksum: executorch_sha256
4161
),
4262
Framework(
4363
name: "mps_backend",
44-
checksum: mps_sha256
64+
checksum: mps_sha256,
65+
frameworks: [
66+
"Metal",
67+
"MetalPerformanceShaders",
68+
"MetalPerformanceShadersGraph",
69+
]
4570
),
4671
Framework(
4772
name: "portable_backend",
@@ -58,8 +83,6 @@ let package = Package(
5883
platforms: [
5984
.iOS(.v15),
6085
],
61-
products: frameworks.map { framework in
62-
.library(name: framework.name, targets: [framework.name])
63-
},
64-
targets: frameworks.map { $0.target() }
86+
products: frameworks.map { .library(name: $0.name, targets: ["\($0.name)_dependencies"]) },
87+
targets: frameworks.flatMap { [$0.target(), $0.dependencies()] }
6588
)

backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
384384
auto modelAssetType = get_model_asset_type(inMemoryFS);
385385
ETCoreMLAsset *modelAsset = nil;
386386
// Write the model files.
387-
if (modelAssetType == ModelAssetType::ModelPackage) {
387+
if (modelAssetType == ModelAssetType::Model) {
388388
NSURL *modelURL = ::write_model_files(dstURL, self.fileManager, identifier, modelAssetType.value(), inMemoryFS, error);
389389
if (modelURL) {
390390
modelAsset = make_asset(modelURL,

backends/vulkan/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*iOS\.cmake$")
116116
target_compile_options(vulkan_executor_runner PUBLIC ${VULKAN_CXX_FLAGS})
117117

118118
add_library(vulkan_executor_runner_lib STATIC ${VULKAN_RUNNER_SRCS})
119-
target_link_libraries(vulkan_executor_runner_lib ${_executor_runner_libs}
120-
vulkan_schema vulkan_backend)
119+
target_link_libraries(
120+
vulkan_executor_runner_lib ${_executor_runner_libs} vulkan_schema
121+
vulkan_backend vulkan_api_lib ${VULKAN_STANDARD_OPS_LIBS})
121122
target_compile_options(vulkan_executor_runner_lib PUBLIC ${VULKAN_CXX_FLAGS})
122123
endif()
123124

backends/xnnpack/xnnpack_preprocess.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
import copy
8-
97
import logging
108
from dataclasses import dataclass
119
from typing import Dict, final, List
@@ -86,7 +84,6 @@ def preprocess(
8684
edge_program: ExportedProgram,
8785
compile_specs: List[CompileSpec],
8886
) -> PreprocessResult:
89-
ep = copy.deepcopy(edge_program)
9087
# Need to wrap EP here because xnnpack does addmm to linear
9188
# transforms. This makes resulting graph not aten compliant
9289
# as aten.linear is not a core aten op.
@@ -97,17 +94,17 @@ def preprocess(
9794
# EdgeDialectVerifier, but disable it.
9895
# TODO (task link) to implement NullVerifier or something similar
9996
ep = ExportedProgram(
100-
root=ep.graph_module,
101-
graph=ep.graph,
102-
graph_signature=ep.graph_signature,
103-
state_dict=ep.state_dict,
104-
range_constraints=ep.range_constraints,
105-
module_call_graph=copy.deepcopy(ep.module_call_graph),
106-
example_inputs=ep.example_inputs,
97+
root=edge_program.graph_module,
98+
graph=edge_program.graph,
99+
graph_signature=edge_program.graph_signature,
100+
state_dict=edge_program.state_dict,
101+
range_constraints=edge_program.range_constraints,
102+
module_call_graph=edge_program.module_call_graph,
103+
example_inputs=edge_program.example_inputs,
107104
verifier=EXIREdgeDialectVerifier(
108105
check_edge_ops=False, enable=False, class_only=True
109106
),
110-
constants=ep.constants,
107+
constants=edge_program.constants,
111108
)
112109

113110
passes = []

examples/models/llama2/builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def load_llama_model(
6868
use_sdpa_with_kv_cache: bool = False,
6969
weight_type: WeightType = WeightType.LLAMA,
7070
verbose: bool = False,
71+
max_seq_len: int = 128,
7172
) -> "LlamaEdgeManager":
7273
"""
7374
A helper util that builds a Llama2 model. It returns a LlamaEdgeManager that
@@ -87,6 +88,7 @@ def load_llama_model(
8788
use_kv_cache=use_kv_cache,
8889
use_sdpa_with_kv_cache=use_sdpa_with_kv_cache,
8990
fairseq2=weight_type == WeightType.FAIRSEQ2,
91+
max_seq_len=max_seq_len,
9092
)
9193
state_dict = model.state_dict()
9294
dtype = state_dict[next(iter(state_dict))].dtype

examples/models/llama2/custom_ops/op_sdpa.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -702,21 +702,21 @@ Tensor& flash_attention_kernel_out(
702702

703703
/*
704704
Input params
705-
@params[in]: q_projected: Projected query with query weights.
705+
@param[in] q_projected Projected query with query weights.
706706
Format [n_layers, batch size, seq_len, num heads, head dim]
707-
@params[in]: k_projected: Projected query with key weights.
707+
@param[in] k_projected Projected query with key weights.
708708
Format [n_layers, batch size, seq_len, num heads, head dim]
709-
@params[in]: v_projected: Projected query with value weights.
709+
@param[in] v_projected Projected query with value weights.
710710
Format [n_layers, batch size, seq_len, num heads, head dim]
711-
@params[in]: key_cache: Cache of previous k_projected.
711+
@param[in] key_cache Cache of previous k_projected.
712712
Format [n_layers, batch size, max_seq_len, num heads, head dim]
713-
@params[in]: key_cache: Cache of previous v_projected.
713+
@param[in] key_cache Cache of previous v_projected.
714714
Format [n_layers, batch size, max_seq_len, num heads, head dim]
715715
....
716-
@params[in] layer_id: which layer this call belongs to.
716+
@param[in] layer_id which layer this call belongs to.
717717
Used to updated appropriate entry of kv cache
718-
@params[in]: start_pos: sequence position
719-
@params[in]: seq_len: Seq length. e.g. seq_len dim of q_projected.
718+
@param[in] start_pos sequence position
719+
@param[in] seq_len Seq length. e.g. seq_len dim of q_projected.
720720
*/
721721
Tensor& sdpa_with_kv_cache_out(
722722
RuntimeContext& ctx,

examples/models/llama2/eval_llama_lib.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,6 @@ def build_args_parser() -> argparse.ArgumentParser:
140140
parser.add_argument(
141141
"--limit", type=int, default=5, help="number of samples to evalulate"
142142
)
143-
parser.add_argument(
144-
"--max_seq_length",
145-
type=int,
146-
default=100,
147-
help="maximum length sequence to evaluate",
148-
)
149143

150144
return parser
151145

examples/models/llama2/export_llama_lib.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,13 @@ def build_args_parser() -> argparse.ArgumentParser:
391391
help="Override the output filename of the saved pte model file.",
392392
)
393393

394+
parser.add_argument(
395+
"--max_seq_length",
396+
type=int,
397+
default=128,
398+
help="maximum length sequence to evaluate",
399+
)
400+
394401
parser.add_argument("-2", "--fairseq2", action="store_true")
395402
parser.add_argument("-v", "--verbose", action="store_true")
396403
parser.add_argument("-X", "--xnnpack", action="store_true")
@@ -511,6 +518,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
511518
use_sdpa_with_kv_cache=args.use_sdpa_with_kv_cache,
512519
weight_type=weight_type,
513520
verbose=args.verbose,
521+
max_seq_len=args.max_seq_length,
514522
)
515523
.set_output_dir(output_dir_path)
516524
.set_metadata(args.metadata)

examples/models/llama2/model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ def __init__(self, **kwargs):
6666
if "use_sdpa_with_kv_cache" in kwargs
6767
else False
6868
)
69+
70+
self.max_seq_len = kwargs["max_seq_len"] if "max_seq_len" in kwargs else 128
6971
# The example is using a dummy small model with random weights for demo purpose only.
7072
# Follow the instruction in https://github.com/facebookresearch/llama to download the model
7173
device = "cpu"
@@ -112,7 +114,7 @@ def __init__(self, **kwargs):
112114
)
113115
with open(params_path, "r") as f:
114116
params = json.loads(f.read())
115-
max_seq_len = 128
117+
max_seq_len = self.max_seq_len
116118
max_batch_size = 1
117119
model_args: ModelArgs = ModelArgs(
118120
max_seq_len=max_seq_len,

exir/backend/backend_api.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,10 @@ def to_backend(
351351
# Fall back to deepcopy if no fake mode is found. TODO(T182910699): Remove this fallback.
352352
try:
353353
fake_edge_program = get_fake_program(edge_program)
354-
except AssertionError as e:
355-
logging.warning(f"No fake mode found for {edge_program.graph_module}: {e}")
354+
except Exception as e:
355+
logging.warning(
356+
f"Error in get_fake_program for graph {edge_program.graph_module}, fallback to deepcopy: {e}"
357+
)
356358
fake_edge_program = copy.deepcopy(edge_program)
357359
partitioner_result = partitioner_instance(fake_edge_program)
358360
tagged_exported_program = partitioner_result.tagged_exported_program

exir/backend/utils.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -220,23 +220,25 @@ def print_delegated_graph(graph_module: torch.fx.GraphModule) -> str:
220220
%arg2_1 : [num_users=2] = placeholder[target=arg2_1]
221221
%lowered_module_0 : [num_users=1] = get_attr[target=lowered_module_0]
222222
backend_id: BackendWithCompilerDemo
223-
lowered graph(): %arg0_1 : [num_users=1] = placeholder[target=arg0_1]
224-
%arg1_1 : [num_users=1] = placeholder[target=arg1_1]
225-
%arg2_1 : [num_users=1] = placeholder[target=arg2_1]
226-
%aten_mm_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%arg0_1, %arg1_1), kwargs = {})
227-
%aten_add_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default, %arg2_1), kwargs = {})
228-
return [aten_add_tensor]
223+
lowered graph():
224+
%arg0_1 : [num_users=1] = placeholder[target=arg0_1]
225+
%arg1_1 : [num_users=1] = placeholder[target=arg1_1]
226+
%arg2_1 : [num_users=1] = placeholder[target=arg2_1]
227+
%aten_mm_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%arg0_1, %arg1_1), kwargs = {})
228+
%aten_add_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default, %arg2_1), kwargs = {})
229+
return [aten_add_tensor]
229230
%executorch_call_delegate : [num_users=1] = call_function[target=torch.ops.higher_order.executorch_call_delegate](args = (%lowered_module_0, %arg0_1, %arg1_1, %arg2_1), kwargs = {})
230231
%getitem : [num_users=1] = call_function[target=operator.getitem](args = (%executorch_call_delegate, 0), kwargs = {})
231232
%aten_sub_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.sub.Tensor](args = (%getitem, %arg0_1), kwargs = {})
232233
%lowered_module_1 : [num_users=1] = get_attr[target=lowered_module_1]
233234
backend_id: BackendWithCompilerDemo
234-
lowered graph(): %aten_sub_tensor : [num_users=1] = placeholder[target=aten_sub_tensor]
235-
%arg1_1 : [num_users=1] = placeholder[target=arg1_1]
236-
%arg2_1 : [num_users=1] = placeholder[target=arg2_1]
237-
%aten_mm_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%aten_sub_tensor, %arg1_1), kwargs = {})
238-
%aten_add_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default_1, %arg2_1), kwargs = {})
239-
return [aten_add_tensor_1]
235+
lowered graph():
236+
%aten_sub_tensor : [num_users=1] = placeholder[target=aten_sub_tensor]
237+
%arg1_1 : [num_users=1] = placeholder[target=arg1_1]
238+
%arg2_1 : [num_users=1] = placeholder[target=arg2_1]
239+
%aten_mm_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%aten_sub_tensor, %arg1_1), kwargs = {})
240+
%aten_add_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mm_default_1, %arg2_1), kwargs = {})
241+
return [aten_add_tensor_1]
240242
%executorch_call_delegate_1 : [num_users=1] = call_function[target=torch.ops.higher_order.executorch_call_delegate](args = (%lowered_module_1, %aten_sub_tensor, %arg1_1, %arg2_1), kwargs = {})
241243
%getitem_1 : [num_users=1] = call_function[target=operator.getitem](args = (%executorch_call_delegate_1, 0), kwargs = {})
242244
return [getitem_1]
@@ -253,7 +255,7 @@ def print_delegated_graph(graph_module: torch.fx.GraphModule) -> str:
253255
if node.op == "get_attr" and node.name.startswith("lowered_module_"):
254256
lowered_module = lowered_module_dict[node.name]
255257
graph_format_str += f"{indent * 2}backend_id: {lowered_module.backend_id}\n"
256-
graph_format_str += f"{indent * 2}lowered graph(): "
258+
graph_format_str += f"{indent * 2}lowered graph():\n"
257259
for node_in_lowered_module in lowered_module.original_module.graph.nodes:
258260
graph_format_str += (
259261
f"{indent * 3}{node_in_lowered_module.format_node()}\n"

extension/android/CMakeLists.txt

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
87
cmake_minimum_required(VERSION 3.19)
98

109
project(executorch_jni)
@@ -14,22 +13,32 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
1413

1514
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
1615

17-
add_subdirectory(
18-
${EXECUTORCH_ROOT}/examples/third-party/fbjni
19-
${CMAKE_CURRENT_BINARY_DIR}/third-party/fbjni)
16+
add_subdirectory(${EXECUTORCH_ROOT}/examples/third-party/fbjni
17+
${CMAKE_CURRENT_BINARY_DIR}/third-party/fbjni)
2018

2119
if(CMAKE_TOOLCHAIN_ANDROID)
22-
add_library(executorch_jni SHARED jni/jni_layer.cpp)
23-
target_link_libraries(executorch_jni extension_data_loader
24-
extension_module xnn_executor_runner_lib fbjni)
25-
if(EXECUTORCH_BUILD_QNN)
26-
target_link_libraries(executorch_jni qnn_executorch_backend)
27-
endif()
28-
target_compile_options(executorch_jni PUBLIC ${_common_compile_options})
29-
30-
add_library(executorch_llama_jni SHARED jni/jni_layer_llama.cpp)
31-
target_link_libraries(executorch_llama_jni fbjni llama_runner
32-
xnn_executor_runner_lib)
33-
target_compile_options(executorch_llama_jni PUBLIC
34-
${_common_compile_options})
20+
add_library(executorch_jni SHARED jni/jni_layer.cpp)
21+
target_link_libraries(executorch_jni extension_data_loader extension_module
22+
fbjni)
23+
if(EXECUTORCH_BUILD_QNN)
24+
target_link_libraries(executorch_jni qnn_executorch_backend)
25+
endif()
26+
if(EXECUTORCH_BUILD_XNNPACK)
27+
target_link_libraries(executorch_jni xnn_executor_runner_lib)
28+
endif()
29+
if(EXECUTORCH_BUILD_VULKAN)
30+
target_link_libraries(executorch_jni vulkan_executor_runner_lib)
31+
endif()
32+
target_compile_options(executorch_jni PUBLIC ${_common_compile_options})
33+
34+
add_library(executorch_llama_jni SHARED jni/jni_layer_llama.cpp)
35+
target_link_libraries(executorch_llama_jni fbjni llama_runner)
36+
if(EXECUTORCH_BUILD_XNNPACK)
37+
target_link_libraries(executorch_llama_jni xnn_executor_runner_lib)
38+
endif()
39+
if(EXECUTORCH_BUILD_VULKAN)
40+
target_link_libraries(executorch_llama_jni vulkan_executor_runner_lib)
41+
endif()
42+
43+
target_compile_options(executorch_llama_jni PUBLIC ${_common_compile_options})
3544
endif()

extension/aten_util/aten_bridge.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ c10::ScalarType execuTorchtoTorchScalarType(torch::executor::ScalarType type) {
124124
* assumption , a strong one, that, such memory is arena allocated whose
125125
* lifetime is tied to model's lifetime, we assume that memory is not leaked as
126126
* it is freed when arean is freed.
127-
* @param[in] aten_tensor: Input at::Tensor
128-
* @param[in/out] mutable_et: ETensor whose underlying memory now will alias to
127+
* @param[in] aten_tensor Input at::Tensor
128+
* @param[in/out] mutable_et ETensor whose underlying memory now will alias to
129129
* aten_tensor
130130
*/
131131
void alias_etensor_to_attensor(

extension/aten_util/aten_bridge.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,16 @@ torch::executor::ScalarType torchToExecuTorchScalarType(caffe2::TypeMeta type);
2626
c10::ScalarType execuTorchtoTorchScalarType(torch::executor::ScalarType type);
2727

2828
/*
29-
* @param[in] aten_tensor: Input at::Tensor
30-
* @param[in/out] mutable_et: ETensor whose underlying memory now will alias to
29+
* @param[in] aten_tensor Input at::Tensor
30+
* @param[in,out] mutable_et ETensor whose underlying memory now will alias to
3131
* aten_tensor
3232
*/
3333
void alias_etensor_to_attensor(at::Tensor& at, torch::executor::Tensor& et);
3434

3535
/*
36-
* @param[in] et: ETensor whose underlying memory now will alias to returned
36+
* @param[in] et ETensor whose underlying memory now will alias to returned
3737
* output tensor
38-
* @param[ret] aten_tensor: output at::Tensor
38+
* @param[ret] aten_tensor output at::Tensor
3939
* Notes:
4040
* It is owned by the caller of alias_attensor_to_etensor.
4141
* Lifetime of tensor meta must be >= to that of the returned tensor since

extension/module/module.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ namespace torch::executor {
3636

3737
Module::Module(
3838
const std::string& file_path,
39-
const Module::MlockConfig mlock_config)
39+
const Module::MlockConfig mlock_config,
40+
std::unique_ptr<EventTracer> event_tracer)
4041
: file_path_(file_path),
4142
mlock_config_(mlock_config),
42-
memory_allocator_(std::make_unique<util::MallocMemoryAllocator>()) {
43+
memory_allocator_(std::make_unique<util::MallocMemoryAllocator>()),
44+
event_tracer_(std::move(event_tracer)) {
4345
runtime_init();
4446
}
4547

0 commit comments

Comments
 (0)