Skip to content

Commit a14d078

Browse files
GregoryComerfacebook-github-bot
authored andcommitted
Build optimized library with CMake
Summary: Support optimized kernel library in CMake builds. Overriding pre-existing CI failures in llama runner and mac unit test. bypass-github-export-checks bypass-github-executorch-ci-checks X-link: pytorch/executorch#2530 Reviewed By: kimishpatel Differential Revision: D55118200 Pulled By: GregoryComer fbshipit-source-id: b094980e9fc402e316a4d7cfb24ee3646a00d64e
1 parent bab185f commit a14d078

File tree

3 files changed

+274
-0
lines changed

3 files changed

+274
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DEVSERVER_PLATFORM_REGEX = "UNUSED"
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
load("@fbsource//tools/build_defs:default_platform_defs.bzl", "DEVSERVER_PLATFORM_REGEX")
2+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
3+
4+
# Because vec exists as a collection of header files, compile and preprocessor
5+
# flags applied to the vec target do not have any effect, since no compilation
6+
# actually occurs for the target.
7+
#
8+
# Targets using the vec library must therefore call the get_vec_*_flags
9+
# functions in order to declare the required compiler flags needed in order to
10+
# access CPU vector intrinsics.
11+
12+
def get_vec_android_preprocessor_flags():
13+
preprocessor_flags = [
14+
(
15+
"^android-arm64.*$",
16+
[
17+
"-DET_BUILD_ARM_VEC256_WITH_SLEEF",
18+
],
19+
),
20+
]
21+
return preprocessor_flags
22+
23+
def get_vec_cxx_preprocessor_flags():
24+
preprocessor_flags = [
25+
(
26+
DEVSERVER_PLATFORM_REGEX,
27+
[
28+
"-DCPU_CAPABILITY_AVX2",
29+
],
30+
),
31+
]
32+
return preprocessor_flags
33+
34+
def get_vec_fbcode_preprocessor_flags():
35+
preprocessor_flags = [
36+
"-DCPU_CAPABILITY_AVX2",
37+
]
38+
return preprocessor_flags
39+
40+
# Currently, having a dependency on fbsource//third-party/sleef:sleef may cause
41+
# duplicate symbol errors when linking fbcode targets in opt mode that also
42+
# depend on ATen. This is because ATen accesses sleef via the third-party folder
43+
# in caffe2 (caffe2/third-party//sleef:sleef).
44+
# TODO(ssjia): Enable -DCPU_CAPABILITY_AVX2 in fbcode, which requires sleef.
45+
def define_libs():
46+
runtime.cxx_library(
47+
name = "libvec",
48+
srcs = [],
49+
exported_headers = native.glob([
50+
"vec/**/*.h",
51+
]),
52+
header_namespace = "executorch/kernels/optimized",
53+
visibility = [
54+
"//executorch/...",
55+
"@EXECUTORCH_CLIENTS",
56+
],
57+
cxx_platform_deps = select({
58+
"DEFAULT": [
59+
(
60+
DEVSERVER_PLATFORM_REGEX,
61+
[
62+
"fbsource//third-party/sleef:sleef",
63+
],
64+
),
65+
],
66+
"ovr_config//cpu:arm64": [
67+
(
68+
DEVSERVER_PLATFORM_REGEX,
69+
[
70+
"fbsource//third-party/sleef:sleef_arm",
71+
],
72+
),
73+
],
74+
}),
75+
fbandroid_platform_deps = [
76+
(
77+
"^android-arm64.*$",
78+
[
79+
"fbsource//third-party/sleef:sleef_arm",
80+
],
81+
),
82+
],
83+
)
84+
85+
runtime.cxx_library(
86+
name = "libutils",
87+
srcs = [],
88+
exported_headers = native.glob([
89+
"utils/**/*.h",
90+
]),
91+
header_namespace = "executorch/kernels/optimized",
92+
visibility = [
93+
"//executorch/...",
94+
"@EXECUTORCH_CLIENTS",
95+
],
96+
exported_deps = [
97+
# Needed to access the __ET_INLINE macro
98+
"//executorch/runtime/platform:compiler",
99+
],
100+
)
101+
102+
runtime.cxx_library(
103+
name = "libblas",
104+
srcs = native.glob([
105+
"blas/**/*.cpp",
106+
]),
107+
exported_headers = native.glob([
108+
"blas/**/*.h",
109+
]),
110+
header_namespace = "executorch/kernels/optimized",
111+
visibility = [
112+
"//executorch/...",
113+
"@EXECUTORCH_CLIENTS",
114+
],
115+
fbandroid_platform_preprocessor_flags = [
116+
(
117+
"^android-arm64.*$",
118+
[
119+
"-DET_BUILD_WITH_BLAS",
120+
],
121+
),
122+
],
123+
fbandroid_platform_deps = [
124+
(
125+
"^android-arm64.*$",
126+
[
127+
"fbsource//third-party/openblas:openblas",
128+
],
129+
),
130+
],
131+
fbobjc_exported_preprocessor_flags = [
132+
"-DET_BUILD_WITH_BLAS",
133+
"-DET_BUILD_FOR_APPLE",
134+
],
135+
fbobjc_frameworks = [
136+
"Accelerate",
137+
],
138+
exported_deps = [
139+
"//executorch/kernels/optimized:libutils",
140+
"//executorch/runtime/core/exec_aten:lib",
141+
],
142+
)
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
3+
load(
4+
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
5+
"get_vec_android_preprocessor_flags",
6+
)
7+
8+
def op_target(name, deps = []):
9+
"""Registers an optimized implementation for an operator overload group.
10+
11+
An operator overload group is a set of operator overloads with a common
12+
operator name. That common operator name should be the base name of this
13+
target.
14+
15+
E.g., the "add" operator overload group, named "op_add" in this target,
16+
might implement:
17+
- add.Tensor
18+
- add_.Tensor
19+
- add.out
20+
- add.Scalar
21+
22+
If an op target would like to share a header/sources with a different op
23+
target (e.g., helpers/utilities), it should declare a separate cxx_library
24+
and add it as a dep.
25+
26+
Args:
27+
name: The name of the operator overload group; e.g.,
28+
"op_add". This directory must contain a source file named
29+
"<name>.cpp"; e.g., "op_add.cpp".
30+
deps: Optional extra deps to add to the cxx_library(). Note:
31+
- op targets may not depend on other op targets, to keep the
32+
dependencies manageable. If two op targets would like to share
33+
code, define a separate runtime.cxx_library that they both depend
34+
on.
35+
"""
36+
37+
# Note that this doesn't actually define the target, but helps register
38+
# it in a table that's used to define the target.
39+
return {
40+
"deps": deps,
41+
"name": name,
42+
}
43+
44+
def _enforce_deps(deps, name):
45+
"""Fails if any of the deps are not allowed.
46+
47+
Args:
48+
deps: A list of build target strings.
49+
name: The name of the target; e.g., "op_add"
50+
"""
51+
for dep in deps:
52+
if dep.startswith(":op_"):
53+
# op targets may not depend on other op targets, to keep the
54+
# dependencies manageable. If two op targets would like to share
55+
# code, define a separate runtime.cxx_library that they both depend
56+
# on.
57+
fail("op_target {} may not depend on other op_target {}".format(
58+
name,
59+
dep,
60+
))
61+
62+
def define_op_library(name, deps):
63+
"""Defines a cxx_library target for the named operator overload group.
64+
65+
Args:
66+
name: The name of the target; e.g., "op_add"
67+
deps: List of deps for the target.
68+
"""
69+
selects.apply(obj = deps, function = native.partial(_enforce_deps, name = name))
70+
71+
augmented_deps = deps + [
72+
"//executorch/kernels/optimized:libvec",
73+
"//executorch/kernels/optimized:libutils",
74+
]
75+
76+
runtime.cxx_library(
77+
name = "{}".format(name),
78+
srcs = [
79+
"{}.cpp".format(name),
80+
],
81+
visibility = [
82+
"//executorch/kernels/portable/test/...",
83+
"//executorch/kernels/quantized/test/...",
84+
"//executorch/kernels/optimized/test/...",
85+
"//executorch/kernels/test/...",
86+
"@EXECUTORCH_CLIENTS",
87+
],
88+
# kernels often have helpers with no prototypes just disabling the warning here as the headers
89+
# are codegend and linked in later
90+
compiler_flags = ["-Wno-missing-prototypes"],
91+
deps = [
92+
"//executorch/runtime/kernel:kernel_includes",
93+
] + augmented_deps,
94+
fbandroid_platform_preprocessor_flags = get_vec_android_preprocessor_flags(),
95+
# sleef needs to be added as a direct dependency of the operator target when building for Android,
96+
# or a linker error may occur. Not sure why this happens; it seems that fbandroid_platform_deps of
97+
# dependencies are not transitive
98+
fbandroid_platform_deps = [
99+
(
100+
"^android-arm64.*$",
101+
[
102+
"fbsource//third-party/sleef:sleef_arm",
103+
],
104+
),
105+
],
106+
# link_whole is necessary because the operators register themselves
107+
# via static initializers that run at program startup.
108+
# @lint-ignore BUCKLINT link_whole
109+
link_whole = True,
110+
)
111+
112+
def define_op_target(name, deps):
113+
"""Possibly defines cxx_library targets for the named operator group.
114+
115+
Args:
116+
name: The base name of the target; e.g., "op_add"
117+
deps: List of deps for the targets.
118+
"""
119+
120+
# When building in ATen mode, ATen-compatible (non-custom) operators will
121+
# use the implementations provided by ATen, so we should not build the
122+
# versions defined here.
123+
define_op_library(
124+
name = name,
125+
deps = deps,
126+
)
127+
128+
def is_op_disabled(name):
129+
# TODO (gjcomer) Enable ops with sleef dependency in OSS
130+
disabled_ops = ["op_gelu", "op_log_softmax"]
131+
return name in disabled_ops

0 commit comments

Comments
 (0)