Skip to content

Commit 580e1e5

Browse files
committed
Update base for Update on "[ET-VK] Introduce virtual_clone API to support view of view use cases + fix synchronization hazard with view tensors"
## Context This diff fixes some hazards (not necessarily) bugs with view tensors. ### `virtual_clone` API Consider the following sequence of calls which may be common in the view of view use case. ``` t1 = graph.add_tensor(...); // t2 will have the same metadata as t1 t2 = graph.add_tensor_view(t1); // t3 will also have the same metadata as t2 at this point. t3 = graph.add_tensor_view(t2); // t2 metadata will be updated correctly. t2 = add_transpose_view_node(t1, 0, 1, t2); // Unfortunately, this node will have an assumption that t3 has the same metadata as t2 to start. However, this is not true. // As a result, t3 will have incorrect metadata after this node. t3 = add_transpose_view_node(t2, 1, 2, t3); ``` To address this, the `virtual_clone` API is introduced which will allow view nodes to set the metadata of the output equal to the input before modifying the output. ### WAW synchronization hazards `vTensorStorage` maintains a `last_access` state which facilitates inserting the correct memory barriers for the underlying `vkImage` or `vkBuffer`. However, when we create a tensor view, `last_access` is not shared between `vTensor` instances that use the same resource. As as result, writing into a `vTensor` will not update the `last_access` of its views, and vice versa. Therefore, sebsequent accesses of the other tensor that references the same resource will result in a synchronization hazard. This diff fixes this hazard in a bit of a crude way; if the `vTensor` is a copy, or has copies, then cowardly assume that it has been written to before the current access so that appropriate memory barriers are inserted. This was the selected solution because I thought that adding a map to track last access of tensors that share resources is a bit overkill when the assumption that the underlying resource has been written to before the current access should hold most of the time. Differential Revision: [D63642092](https://our.internmc.facebook.com/intern/diff/D63642092/) [ghstack-poisoned]
2 parents fe0e676 + 0d96f75 commit 580e1e5

File tree

42 files changed

+738
-258
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+738
-258
lines changed

CMakeLists.txt

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -458,22 +458,22 @@ endif()
458458
add_subdirectory(schema)
459459

460460
#
461-
# executorch_no_prim_ops: Minimal runtime library
461+
# executorch_core: Minimal runtime library
462462
#
463463
# The bare-minimum runtime library, supporting the Program and Method
464464
# interfaces. Does not contain any operators, including primitive ops. Does not
465465
# contain any backends.
466466
#
467467

468468
# Remove any PAL-definition files from the sources.
469-
list(FILTER _executorch_no_prim_ops__srcs EXCLUDE REGEX
469+
list(FILTER _executorch_core__srcs EXCLUDE REGEX
470470
"runtime/platform/default/[^/]*.cpp$"
471471
)
472472

473473
# Add the source file that maps to the requested default PAL implementation.
474474
if(EXECUTORCH_PAL_DEFAULT MATCHES "^(posix|minimal)$")
475475
message(STATUS "executorch: Using PAL default '${EXECUTORCH_PAL_DEFAULT}'")
476-
list(APPEND _executorch_no_prim_ops__srcs
476+
list(APPEND _executorch_core__srcs
477477
"runtime/platform/default/${EXECUTORCH_PAL_DEFAULT}.cpp"
478478
)
479479
else()
@@ -483,45 +483,49 @@ else()
483483
)
484484
endif()
485485

486-
add_library(executorch_no_prim_ops ${_executorch_no_prim_ops__srcs})
487-
target_link_libraries(executorch_no_prim_ops PRIVATE program_schema)
486+
add_library(executorch_core ${_executorch_core__srcs})
487+
488+
# Legacy name alias.
489+
add_library(executorch_no_prim_ops ALIAS executorch_core)
490+
491+
target_link_libraries(executorch_core PRIVATE program_schema)
488492
if(EXECUTORCH_USE_DL)
489493
# Check if dl exists for this toolchain and only then link it.
490494
find_library(DL_LIBRARY_EXISTS NAMES dl)
491495
# Check if the library was found
492496
if(DL_LIBRARY_EXISTS)
493-
target_link_libraries(executorch_no_prim_ops PRIVATE dl) # For dladdr()
497+
target_link_libraries(executorch_core PRIVATE dl) # For dladdr()
494498
endif()
495499
endif()
496500
target_include_directories(
497-
executorch_no_prim_ops PUBLIC ${_common_include_directories}
501+
executorch_core PUBLIC ${_common_include_directories}
498502
)
499-
target_compile_options(executorch_no_prim_ops PUBLIC ${_common_compile_options})
503+
target_compile_options(executorch_core PUBLIC ${_common_compile_options})
500504
if(MAX_KERNEL_NUM)
501505
target_compile_definitions(
502-
executorch_no_prim_ops PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM}
506+
executorch_core PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM}
503507
)
504508
endif()
505509

506510
if(EXECUTORCH_BUILD_PYBIND AND APPLE)
507511
# shared version
508512
add_library(
509-
executorch_no_prim_ops_shared SHARED ${_executorch_no_prim_ops__srcs}
513+
executorch_core_shared SHARED ${_executorch_core__srcs}
510514
)
511-
target_link_libraries(executorch_no_prim_ops_shared PRIVATE program_schema)
515+
target_link_libraries(executorch_core_shared PRIVATE program_schema)
512516
if(DL_LIBRARY_EXISTS)
513517
# For dladdr()
514-
target_link_libraries(executorch_no_prim_ops_shared PRIVATE dl)
518+
target_link_libraries(executorch_core_shared PRIVATE dl)
515519
endif()
516520
target_include_directories(
517-
executorch_no_prim_ops_shared PUBLIC ${_common_include_directories}
521+
executorch_core_shared PUBLIC ${_common_include_directories}
518522
)
519523
target_compile_options(
520-
executorch_no_prim_ops_shared PUBLIC ${_common_compile_options}
524+
executorch_core_shared PUBLIC ${_common_compile_options}
521525
)
522526
if(MAX_KERNEL_NUM)
523527
target_compile_definitions(
524-
executorch_no_prim_ops_shared PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM}
528+
executorch_core_shared PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM}
525529
)
526530
endif()
527531
endif()
@@ -534,7 +538,7 @@ endif()
534538
# any backends.
535539
#
536540
add_library(executorch ${_executorch__srcs})
537-
target_link_libraries(executorch PRIVATE executorch_no_prim_ops)
541+
target_link_libraries(executorch PRIVATE executorch_core)
538542
target_include_directories(executorch PUBLIC ${_common_include_directories})
539543
target_compile_options(executorch PUBLIC ${_common_compile_options})
540544
target_link_options_shared_lib(executorch)
@@ -570,7 +574,7 @@ endif()
570574
# Install `executorch` library as well as `executorch-config.cmake` under
571575
# ${CMAKE_INSTALL_PREFIX}/
572576
install(
573-
TARGETS executorch executorch_no_prim_ops
577+
TARGETS executorch executorch_core
574578
DESTINATION lib
575579
INCLUDES
576580
DESTINATION ${_common_include_directories}

backends/apple/coreml/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ target_include_directories(
134134
coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util
135135
)
136136
target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/..)
137-
target_link_libraries(coremldelegate PRIVATE executorch_no_prim_ops)
137+
target_link_libraries(coremldelegate PRIVATE executorch_core)
138138

139139
if(EXECUTORCH_BUILD_DEVTOOLS)
140140
target_sources(coremldelegate PRIVATE ${SDK_SOURCES} ${PROTOBUF_SOURCES})
@@ -159,7 +159,7 @@ find_library(SQLITE_LIBRARY sqlite3)
159159

160160
target_link_libraries(
161161
coremldelegate
162-
PRIVATE executorch_no_prim_ops ${ACCELERATE_FRAMEWORK} ${COREML_FRAMEWORK}
162+
PRIVATE executorch_core ${ACCELERATE_FRAMEWORK} ${COREML_FRAMEWORK}
163163
${FOUNDATION_FRAMEWORK} ${SQLITE_LIBRARY}
164164
)
165165

@@ -176,7 +176,7 @@ target_compile_options(coremldelegate PRIVATE "-fno-exceptions")
176176

177177
if(EXECUTORCH_BUILD_DEVTOOLS)
178178
target_compile_options(
179-
executorch_no_prim_ops PUBLIC -DET_EVENT_TRACER_ENABLED
179+
executorch_core PUBLIC -DET_EVENT_TRACER_ENABLED
180180
)
181181
target_compile_options(coremldelegate PRIVATE "-frtti")
182182
target_compile_options(libprotobuf-lite PRIVATE "-frtti")

backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
C9E7D7962AB3F9BF00CCAE5D /* KeyValueStoreTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = C9E7D78E2AB3F9BF00CCAE5D /* KeyValueStoreTests.mm */; };
106106
C9E7D7A22AB3FBB200CCAE5D /* CoreMLBackendDelegateTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = C9E7D7A12AB3FBB200CCAE5D /* CoreMLBackendDelegateTests.mm */; };
107107
C9EC7E1B2BC73B3200A6B166 /* MultiArrayTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = C9EC7E1A2BC73B3200A6B166 /* MultiArrayTests.mm */; };
108-
F24817E52BC655E100E80D98 /* libexecutorch_no_prim_ops.a in Frameworks */ = {isa = PBXBuildFile; fileRef = F24817E42BC655E100E80D98 /* libexecutorch_no_prim_ops.a */; };
108+
F24817E52BC655E100E80D98 /* libexecutorch_core.a in Frameworks */ = {isa = PBXBuildFile; fileRef = F24817E42BC655E100E80D98 /* libexecutorch_core.a */; };
109109
/* End PBXBuildFile section */
110110

111111
/* Begin PBXCopyFilesBuildPhase section */
@@ -310,7 +310,7 @@
310310
C9EA3FE52B73EF6300B7D7BD /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
311311
C9EC7E092BC662A300A6B166 /* objc_array_util.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = objc_array_util.h; path = ../util/objc_array_util.h; sourceTree = "<group>"; };
312312
C9EC7E1A2BC73B3200A6B166 /* MultiArrayTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; name = MultiArrayTests.mm; path = ../test/MultiArrayTests.mm; sourceTree = "<group>"; };
313-
F24817E42BC655E100E80D98 /* libexecutorch_no_prim_ops.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libexecutorch_no_prim_ops.a; path = ../libraries/libexecutorch_no_prim_ops.a; sourceTree = "<group>"; };
313+
F24817E42BC655E100E80D98 /* libexecutorch_core.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libexecutorch_core.a; path = ../libraries/libexecutorch_core.a; sourceTree = "<group>"; };
314314
/* End PBXFileReference section */
315315

316316
/* Begin PBXFrameworksBuildPhase section */
@@ -319,7 +319,7 @@
319319
buildActionMask = 2147483647;
320320
files = (
321321
C94D510F2ABDF87500AF47FD /* Accelerate.framework in Frameworks */,
322-
F24817E52BC655E100E80D98 /* libexecutorch_no_prim_ops.a in Frameworks */,
322+
F24817E52BC655E100E80D98 /* libexecutorch_core.a in Frameworks */,
323323
C94D510E2ABDF86800AF47FD /* libsqlite3.tbd in Frameworks */,
324324
C94D50D92ABD7B2400AF47FD /* CoreML.framework in Frameworks */,
325325
C99883862B95AD7D000953A3 /* libprotobuf-lite.a in Frameworks */,
@@ -540,7 +540,7 @@
540540
C96560942AABFDCE005F8126 /* libsqlite3.tbd */,
541541
C96560922AABF992005F8126 /* CoreML.framework */,
542542
C96560902AABF982005F8126 /* Accelerate.framework */,
543-
F24817E42BC655E100E80D98 /* libexecutorch_no_prim_ops.a */,
543+
F24817E42BC655E100E80D98 /* libexecutorch_core.a */,
544544
C965608D2AABF72A005F8126 /* libexecutorch.a */,
545545
);
546546
name = "Recovered References";

backends/apple/coreml/scripts/build_tests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ cmake --build "$CMAKE_PROTOBUF_BUILD_DIR_PATH" -j9 -t libprotobuf-lite
5959
echo "ExecuTorch: Copying libraries"
6060
mkdir "$LIBRARIES_DIR_PATH"
6161
cp -f "$CMAKE_EXECUTORCH_BUILD_DIR_PATH/libexecutorch.a" "$LIBRARIES_DIR_PATH"
62-
cp -f "$CMAKE_EXECUTORCH_BUILD_DIR_PATH/libexecutorch_no_prim_ops.a" "$LIBRARIES_DIR_PATH"
62+
cp -f "$CMAKE_EXECUTORCH_BUILD_DIR_PATH/libexecutorch_core.a" "$LIBRARIES_DIR_PATH"
6363
cp -f "$CMAKE_PROTOBUF_BUILD_DIR_PATH/libprotobuf-lite.a" "$LIBRARIES_DIR_PATH"
6464

6565
#Copy ExecuTorch headers

backends/apple/mps/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ target_link_libraries(
7575
mpsdelegate
7676
PRIVATE bundled_program
7777
mps_schema
78-
executorch_no_prim_ops
78+
executorch_core
7979
${FOUNDATION_FRAMEWORK}
8080
${METAL_FRAMEWORK}
8181
${MPS_FRAMEWORK}

backends/arm/arm_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def preprocess( # noqa: C901
217217
# const data directly. Path created and data written only in debug builds.
218218
tosa_graph = ts.TosaSerializer(artifact_path)
219219
graph_module = ArmPassManager().transform_to_backend_pipeline(
220-
graph_module=edge_program.graph_module, compile_spec=compile_spec
220+
exported_program=edge_program, compile_spec=compile_spec
221221
)
222222

223223
node_visitors = get_node_visitors(edge_program)

backends/arm/arm_partitioner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
5858
exir_ops.edge.aten.mm.default,
5959
exir_ops.edge.aten.repeat.default,
6060
exir_ops.edge.aten.relu.default,
61+
exir_ops.edge.aten.rsqrt.default,
6162
exir_ops.edge.aten._softmax.default,
6263
exir_ops.edge.aten.slice_copy.Tensor,
6364
exir_ops.edge.aten.sub.Tensor,

backends/arm/operators/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
op_quant,
2929
op_relu,
3030
op_repeat,
31+
op_rsqrt,
3132
op_sigmoid,
3233
op_slice,
3334
op_softmax,

backends/arm/operators/op_rsqrt.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright 2024 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
from typing import List
6+
7+
import numpy as np
8+
import serializer.tosa_serializer as ts
9+
import torch
10+
from executorch.backends.arm.operators.node_visitor import (
11+
NodeVisitor,
12+
register_node_visitor,
13+
)
14+
from executorch.backends.arm.tosa_mapping import TosaArg
15+
from executorch.backends.arm.tosa_quant_utils import (
16+
dequantize_value,
17+
get_quant_node_args,
18+
QuantArgs,
19+
quantize_value,
20+
)
21+
from serializer.tosa_serializer import TosaOp
22+
23+
24+
@register_node_visitor
25+
class RsqrtVisitor(NodeVisitor):
26+
target = "aten.rsqrt.default"
27+
28+
def define_node(
29+
self,
30+
node: torch.fx.Node,
31+
tosa_graph: ts.TosaSerializer,
32+
inputs: List[TosaArg],
33+
output: TosaArg,
34+
is_quant_node: bool,
35+
) -> None:
36+
if is_quant_node:
37+
# Assume quantized input is 8 bit.
38+
# Create attribute for 8 bit table lookup.
39+
input_node = node.all_input_nodes[0]
40+
in_quantargs = get_quant_node_args(input_node)
41+
output_node = list(node.users)[0]
42+
out_quantargs = get_quant_node_args(output_node)
43+
table = rsqrt_table_8bit(in_quantargs, out_quantargs)
44+
table_attr = ts.TosaSerializerAttribute()
45+
table_attr.TableAttribute(table)
46+
tosa_graph.addOperator(
47+
TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr
48+
)
49+
else:
50+
tosa_graph.addOperator(TosaOp.Op().RSQRT, [inputs[0].name], [output.name])
51+
52+
53+
def rsqrt_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs):
54+
"""
55+
Returns a table mapping 256 entries to rqsrt([qmin,qmax])
56+
Reference: https://www.mlplatform.org/tosa/tosa_spec.html#_rsqrt
57+
"""
58+
59+
def rqsrt(x):
60+
# Convert quantized input to floating point rqsrt input space.
61+
v = dequantize_value(x, in_quantargs)
62+
# Compute rqsrt.
63+
v = 1 / np.sqrt(v)
64+
# Convert rqsrt output back to quantized space.
65+
return quantize_value(v, out_quantargs)
66+
67+
return [
68+
rqsrt(x)
69+
for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8)
70+
]

backends/arm/passes/arm_pass_manager.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
)
2323
from executorch.backends.arm.passes.remove_clone_pass import RemoveClonePass
2424
from executorch.backends.arm.passes.size_adjust_conv2d_pass import SizeAdjustConv2DPass
25+
from executorch.exir import ExportedProgram
2526
from executorch.exir.backend.compile_spec_schema import CompileSpec
2627
from executorch.exir.pass_manager import PassManager
2728

@@ -32,7 +33,7 @@ def _transform(self, graph_module: torch.fx.GraphModule):
3233
return self(graph_module).graph_module
3334

3435
def transform_to_backend_pipeline(
35-
self, graph_module: torch.fx.GraphModule, compile_spec: list[CompileSpec]
36+
self, exported_program: ExportedProgram, compile_spec: list[CompileSpec]
3637
):
3738
"""Apply passes before transforming program to backend"""
3839
self.add_pass(SizeAdjustConv2DPass())
@@ -46,4 +47,4 @@ def transform_to_backend_pipeline(
4647
if memory_format == "nhwc":
4748
self.add_pass(AnnotateChannelsLastDimOrder())
4849

49-
return self._transform(graph_module)
50+
return self._transform(exported_program.graph_module)

backends/arm/quantizer/quantization_annotation/one_to_one_annotator.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ def _annotate_one_to_one(
3535
Typical ops are ops implemented with a lookup table.
3636
"""
3737
annotated_partitions = []
38-
one_to_one_ops = (torch.ops.aten.exp.default, torch.ops.aten.log.default)
38+
one_to_one_ops = {
39+
torch.ops.aten.exp.default,
40+
torch.ops.aten.log.default,
41+
torch.ops.aten.rsqrt.default,
42+
}
3943
for node in gm.graph.nodes:
4044
if node.op != "call_function" or node.target not in one_to_one_ops:
4145
continue

backends/arm/test/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
import logging
78
import os
89
import shutil
910
import subprocess
11+
import sys
1012
import tempfile
1113

1214
import pytest
@@ -37,6 +39,7 @@ def pytest_configure(config):
3739
"Tests are run with --arm_run_corstone300 but corstone300 FVP is not installed."
3840
)
3941
_enabled_options.append("corstone300")
42+
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
4043

4144

4245
def pytest_collection_modifyitems(config, items):

0 commit comments

Comments
 (0)