Skip to content

Commit c48f51b

Browse files
committed
Building optimized library with CMake
1 parent a8c6943 commit c48f51b

File tree

8 files changed

+347
-5
lines changed

8 files changed

+347
-5
lines changed

CMakeLists.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ endif()
8181
set(EXECUTORCH_LOG_LEVEL "Info" CACHE STRING
8282
"Build with the given ET_MIN_LOG_LEVEL value")
8383
string(TOLOWER "${EXECUTORCH_LOG_LEVEL}" LOG_LEVEL_LOWER)
84-
if (LOG_LEVEL_LOWER STREQUAL "debug")
84+
if(LOG_LEVEL_LOWER STREQUAL "debug")
8585
add_definitions(-DET_MIN_LOG_LEVEL=Debug)
8686
elseif(LOG_LEVEL_LOWER STREQUAL "info")
8787
add_definitions(-DET_MIN_LOG_LEVEL=Info)
@@ -90,8 +90,9 @@ elseif(LOG_LEVEL_LOWER STREQUAL "error")
9090
elseif(LOG_LEVEL_LOWER STREQUAL "fatal")
9191
add_definitions(-DET_MIN_LOG_LEVEL=Fatal)
9292
else()
93-
message(SEND_ERROR
94-
"Unknown log level \"${EXECUTORCH_LOG_LEVEL}\". Expected one of Debug, Info, Error, or Fatal.")
93+
message(SEND_ERROR
94+
"Unknown log level \"${EXECUTORCH_LOG_LEVEL}\"." +
95+
"Expected one of Debug, Info, Error, or Fatal.")
9596
endif()
9697

9798
option(EXECUTORCH_ENABLE_PROGRAM_VERIFICATION
@@ -308,6 +309,7 @@ endif()
308309
# operators necessary for the models that will run.
309310
#
310311
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/portable)
312+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
311313

312314
#
313315
# gflags: Commandline flag host library.

build/cmake_deps.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,22 @@ deps = [
4545
"executorch",
4646
]
4747

48+
[targets.optimized_kernels]
49+
buck_targets = [
50+
"//kernels/optimized:generated_lib",
51+
]
52+
filters = [
53+
".cpp$",
54+
]
55+
excludes = [
56+
# Exclude the codegen templates, which are picked up because the buck target
57+
# is the generated_lib and not the unwrapped set of kernels.
58+
"^codegen/templates",
59+
]
60+
deps = [
61+
"executorch",
62+
]
63+
4864
[targets.quantized_kernels]
4965
buck_targets = [
5066
"//kernels/quantized:generated_lib",

kernels/optimized/CMakeLists.txt

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# Kernel library for optimized kernels. Please this file formatted by running:
8+
# ~~~
9+
# cmake-format --first-comment-is-literal=True CMakeLists.txt
10+
# ~~~
11+
12+
cmake_minimum_required(VERSION 3.19)
13+
14+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
15+
if(NOT CMAKE_CXX_STANDARD)
16+
set(CMAKE_CXX_STANDARD 17)
17+
endif()
18+
19+
if(NOT PYTHON_EXECUTABLE)
20+
set(PYTHON_EXECUTABLE python3)
21+
endif()
22+
# Source root directory for executorch.
23+
if(NOT EXECUTORCH_ROOT)
24+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
25+
endif()
26+
# Source root directory for pytorch.
27+
if(NOT TORCH_ROOT)
28+
set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
29+
endif()
30+
31+
set(_common_compile_options -Wno-deprecated-declarations)
32+
33+
# Set architecture-dependent flags.
34+
set(_arch_compile_flags "")
35+
# TODO
36+
37+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
38+
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
39+
40+
# Generate C++ bindings to register kernels into both PyTorch (for AOT) and
41+
# Executorch (for runtime). Here select all ops in optimized.yaml
42+
set(_yaml "${CMAKE_CURRENT_LIST_DIR}/optimized.yaml")
43+
gen_selected_ops("${_yaml}" "" "")
44+
45+
generate_bindings_for_kernels(${CMAKE_CURRENT_SOURCE_DIR}/optimized.yaml "")
46+
message("Generated files ${gen_command_sources}")
47+
48+
list(TRANSFORM _optimized_kernels__srcs PREPEND "${EXECUTORCH_ROOT}/")
49+
add_library(optimized_kernels ${_optimized_kernels__srcs})
50+
target_link_libraries(optimized_kernels PRIVATE executorch)
51+
target_compile_options(optimized_kernels PUBLIC ${_common_compile_options})
52+
# Build a library for _optimized_kernels_srcs
53+
#
54+
# optimized_ops_lib: Register optimized ops kernels into Executorch runtime
55+
gen_operators_lib("optimized_ops_lib" optimized_kernels executorch)

kernels/optimized/op_registration_util.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
load("@fbsource//tools/build_defs:selects.bzl", "selects")
21
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
33
load(
44
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
55
"get_vec_android_preprocessor_flags",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DEVSERVER_PLATFORM_REGEX = "UNUSED"

shim/xplat/executorch/build/env_interface.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def _remove_platform_specific_args(kwargs):
117117
"""
118118
keys = []
119119
for key in kwargs:
120-
if key.endswith("_platform_preprocessor_flags") or key.endswith("_platform_deps"):
120+
if key.endswith("_platform_preprocessor_flags") or key.endswith("_platform_deps") or key.startswith("fbobjc"):
121121
keys.append(key)
122122
for key in keys:
123123
kwargs.pop(key)
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
load("@fbsource//tools/build_defs:default_platform_defs.bzl", "DEVSERVER_PLATFORM_REGEX")
2+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
3+
4+
# Because vec exists as a collection of header files, compile and preprocessor
5+
# flags applied to the vec target do not have any effect, since no compilation
6+
# actually occurs for the target.
7+
#
8+
# Targets using the vec library must therefore call the get_vec_*_flags
9+
# functions in order to declare the required compiler flags needed in order to
10+
# access CPU vector intrinsics.
11+
12+
def get_vec_android_preprocessor_flags():
13+
preprocessor_flags = [
14+
(
15+
"^android-arm64.*$",
16+
[
17+
"-DET_BUILD_ARM_VEC256_WITH_SLEEF",
18+
],
19+
),
20+
]
21+
return preprocessor_flags
22+
23+
def get_vec_cxx_preprocessor_flags():
24+
preprocessor_flags = [
25+
(
26+
DEVSERVER_PLATFORM_REGEX,
27+
[
28+
"-DCPU_CAPABILITY_AVX2",
29+
],
30+
),
31+
]
32+
return preprocessor_flags
33+
34+
def get_vec_fbcode_preprocessor_flags():
35+
preprocessor_flags = [
36+
"-DCPU_CAPABILITY_AVX2",
37+
]
38+
return preprocessor_flags
39+
40+
# Currently, having a dependency on fbsource//third-party/sleef:sleef may cause
41+
# duplicate symbol errors when linking fbcode targets in opt mode that also
42+
# depend on ATen. This is because ATen accesses sleef via the third-party folder
43+
# in caffe2 (caffe2/third-party//sleef:sleef).
44+
# TODO(ssjia): Enable -DCPU_CAPABILITY_AVX2 in fbcode, which requires sleef.
45+
def define_libs():
46+
runtime.cxx_library(
47+
name = "libvec",
48+
srcs = [],
49+
exported_headers = native.glob([
50+
"vec/**/*.h",
51+
]),
52+
header_namespace = "executorch/kernels/optimized",
53+
visibility = [
54+
"//executorch/...",
55+
"@EXECUTORCH_CLIENTS",
56+
],
57+
cxx_platform_deps = select({
58+
"DEFAULT": [
59+
(
60+
DEVSERVER_PLATFORM_REGEX,
61+
[
62+
"fbsource//third-party/sleef:sleef",
63+
],
64+
),
65+
],
66+
"ovr_config//cpu:arm64": [
67+
(
68+
DEVSERVER_PLATFORM_REGEX,
69+
[
70+
"fbsource//third-party/sleef:sleef_arm",
71+
],
72+
),
73+
],
74+
}),
75+
fbandroid_platform_deps = [
76+
(
77+
"^android-arm64.*$",
78+
[
79+
"fbsource//third-party/sleef:sleef_arm",
80+
],
81+
),
82+
],
83+
)
84+
85+
runtime.cxx_library(
86+
name = "libutils",
87+
srcs = [],
88+
exported_headers = native.glob([
89+
"utils/**/*.h",
90+
]),
91+
header_namespace = "executorch/kernels/optimized",
92+
visibility = [
93+
"//executorch/...",
94+
"@EXECUTORCH_CLIENTS",
95+
],
96+
exported_deps = [
97+
# Needed to access the __ET_INLINE macro
98+
"//executorch/runtime/platform:compiler",
99+
],
100+
)
101+
102+
runtime.cxx_library(
103+
name = "libblas",
104+
srcs = native.glob([
105+
"blas/**/*.cpp",
106+
]),
107+
exported_headers = native.glob([
108+
"blas/**/*.h",
109+
]),
110+
header_namespace = "executorch/kernels/optimized",
111+
visibility = [
112+
"//executorch/...",
113+
"@EXECUTORCH_CLIENTS",
114+
],
115+
fbandroid_platform_preprocessor_flags = [
116+
(
117+
"^android-arm64.*$",
118+
[
119+
"-DET_BUILD_WITH_BLAS",
120+
],
121+
),
122+
],
123+
fbandroid_platform_deps = [
124+
(
125+
"^android-arm64.*$",
126+
[
127+
"fbsource//third-party/openblas:openblas",
128+
],
129+
),
130+
],
131+
fbobjc_exported_preprocessor_flags = [
132+
"-DET_BUILD_WITH_BLAS",
133+
"-DET_BUILD_FOR_APPLE",
134+
],
135+
fbobjc_frameworks = [
136+
"Accelerate",
137+
],
138+
exported_deps = [
139+
"//executorch/kernels/optimized:libutils",
140+
"//executorch/runtime/core/exec_aten:lib",
141+
],
142+
)
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
3+
load(
4+
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
5+
"get_vec_android_preprocessor_flags",
6+
)
7+
8+
def op_target(name, deps = []):
9+
"""Registers an optimized implementation for an operator overload group.
10+
11+
An operator overload group is a set of operator overloads with a common
12+
operator name. That common operator name should be the base name of this
13+
target.
14+
15+
E.g., the "add" operator overload group, named "op_add" in this target,
16+
might implement:
17+
- add.Tensor
18+
- add_.Tensor
19+
- add.out
20+
- add.Scalar
21+
22+
If an op target would like to share a header/sources with a different op
23+
target (e.g., helpers/utilities), it should declare a separate cxx_library
24+
and add it as a dep.
25+
26+
Args:
27+
name: The name of the operator overload group; e.g.,
28+
"op_add". This directory must contain a source file named
29+
"<name>.cpp"; e.g., "op_add.cpp".
30+
deps: Optional extra deps to add to the cxx_library(). Note:
31+
- op targets may not depend on other op targets, to keep the
32+
dependencies manageable. If two op targets would like to share
33+
code, define a separate runtime.cxx_library that they both depend
34+
on.
35+
"""
36+
37+
# Note that this doesn't actually define the target, but helps register
38+
# it in a table that's used to define the target.
39+
return {
40+
"deps": deps,
41+
"name": name,
42+
}
43+
44+
def _enforce_deps(deps, name):
45+
"""Fails if any of the deps are not allowed.
46+
47+
Args:
48+
deps: A list of build target strings.
49+
name: The name of the target; e.g., "op_add"
50+
"""
51+
for dep in deps:
52+
if dep.startswith(":op_"):
53+
# op targets may not depend on other op targets, to keep the
54+
# dependencies manageable. If two op targets would like to share
55+
# code, define a separate runtime.cxx_library that they both depend
56+
# on.
57+
fail("op_target {} may not depend on other op_target {}".format(
58+
name,
59+
dep,
60+
))
61+
62+
def define_op_library(name, deps):
63+
"""Defines a cxx_library target for the named operator overload group.
64+
65+
Args:
66+
name: The name of the target; e.g., "op_add"
67+
deps: List of deps for the target.
68+
"""
69+
selects.apply(obj = deps, function = native.partial(_enforce_deps, name = name))
70+
71+
augmented_deps = deps + [
72+
"//executorch/kernels/optimized:libvec",
73+
"//executorch/kernels/optimized:libutils",
74+
]
75+
76+
runtime.cxx_library(
77+
name = "{}".format(name),
78+
srcs = [
79+
"{}.cpp".format(name),
80+
],
81+
visibility = [
82+
"//executorch/kernels/portable/test/...",
83+
"//executorch/kernels/quantized/test/...",
84+
"//executorch/kernels/optimized/test/...",
85+
"//executorch/kernels/test/...",
86+
"@EXECUTORCH_CLIENTS",
87+
],
88+
# kernels often have helpers with no prototypes just disabling the warning here as the headers
89+
# are codegend and linked in later
90+
compiler_flags = ["-Wno-missing-prototypes"],
91+
deps = [
92+
"//executorch/runtime/kernel:kernel_includes",
93+
] + augmented_deps,
94+
fbandroid_platform_preprocessor_flags = get_vec_android_preprocessor_flags(),
95+
# sleef needs to be added as a direct dependency of the operator target when building for Android,
96+
# or a linker error may occur. Not sure why this happens; it seems that fbandroid_platform_deps of
97+
# dependencies are not transitive
98+
fbandroid_platform_deps = [
99+
(
100+
"^android-arm64.*$",
101+
[
102+
"fbsource//third-party/sleef:sleef_arm",
103+
],
104+
),
105+
],
106+
# link_whole is necessary because the operators register themselves
107+
# via static initializers that run at program startup.
108+
# @lint-ignore BUCKLINT link_whole
109+
link_whole = True,
110+
)
111+
112+
def define_op_target(name, deps):
113+
"""Possibly defines cxx_library targets for the named operator group.
114+
115+
Args:
116+
name: The base name of the target; e.g., "op_add"
117+
deps: List of deps for the targets.
118+
"""
119+
120+
# When building in ATen mode, ATen-compatible (non-custom) operators will
121+
# use the implementations provided by ATen, so we should not build the
122+
# versions defined here.
123+
define_op_library(
124+
name = name,
125+
deps = deps,
126+
)

0 commit comments

Comments
 (0)