Skip to content

Commit 0851b1b

Browse files
committed
Build optimized library with CMake (#2530)
Summary: Support optimized kernel library in CMake builds. Note that I'm excluding gelu and log_softmax temporarily, as they require sleef. I will add support in a follow-up to build sleef and enable those two ops. Test Plan: Built executor_runner with optimized_ops_lib and debug logging enabled. Confirmed that optimized kernels were loaded. Ran add example with optimized add kernel. Differential Revision: D55118200 Pulled By: GregoryComer
1 parent 3152d7f commit 0851b1b

File tree

15 files changed

+559
-26
lines changed

15 files changed

+559
-26
lines changed

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ endif()
309309
# operators necessary for the models that will run.
310310
#
311311
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/portable)
312+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
313+
314+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
312315

313316
#
314317
# gflags: Commandline flag host library.
@@ -336,7 +339,7 @@ cmake_dependent_option(
336339
EXECUTORCH_BUILD_HOST_TARGETS OFF)
337340
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
338341
# Baseline libraries that executor_runner will link against.
339-
set(_executor_runner_libs executorch portable_ops_lib gflags)
342+
set(_executor_runner_libs executorch optimized_native_cpu_ops_lib gflags)
340343

341344
# Generate lib to register quantized ops
342345
if(REGISTER_QUANTIZED_OPS)

build/Codegen.cmake

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,24 @@ function(gen_selected_ops ops_schema_yaml root_ops include_all_ops)
3636

3737
endfunction()
3838

39-
# Codegen for registering kernels. Kernels are defined in functions_yaml and
40-
# custom_ops_yaml
41-
function(generate_bindings_for_kernels functions_yaml custom_ops_yaml)
39+
# Codegen for registering kernels. Kernels are defined in functions_yaml,
40+
# custom_ops_yaml, and optionally fallback_yaml.
41+
#
42+
# Invoked as
43+
# generate_bindings_for_kernels(
44+
# FUNCTIONS_YAML functions_yaml
45+
# FALLBACK_YAML fallback_yaml
46+
# CUSTOM_OPS_YAML custom_ops_yaml
47+
# )
48+
function(generate_bindings_for_kernels)
49+
set(arg_names FUNCTIONS_YAML FALLBACK_YAML CUSTOM_OPS_YAML)
50+
cmake_parse_arguments(GEN "" "${arg_names}" "" ${ARGN})
51+
52+
message(STATUS "Generating kernel bindings:")
53+
message(STATUS " FUNCTIONS_YAML: ${GEN_FUNCTIONS_YAML}")
54+
message(STATUS " FALLBACK_YAML: ${GEN_FALLBACK_YAML}")
55+
message(STATUS " CUSTOM_OPS_YAML: ${GEN_CUSTOM_OPS_YAML}")
56+
4257
# Command to generate selected_operators.yaml from custom_ops.yaml.
4358
file(GLOB_RECURSE _codegen_templates "${EXECUTORCH_ROOT}/codegen/templates/*")
4459
file(GLOB_RECURSE _torchgen_srcs "${TORCH_ROOT}/torchgen/*.py")
@@ -60,11 +75,11 @@ function(generate_bindings_for_kernels functions_yaml custom_ops_yaml)
6075
${CMAKE_CURRENT_BINARY_DIR}/Functions.h
6176
${CMAKE_CURRENT_BINARY_DIR}/NativeFunctions.h)
6277

63-
if(functions_yaml)
64-
list(APPEND _gen_command --functions-yaml-path=${functions_yaml})
78+
if(GEN_FUNCTIONS_YAML)
79+
list(APPEND _gen_command --functions-yaml-path=${GEN_FUNCTIONS_YAML})
6580
endif()
66-
if(custom_ops_yaml)
67-
list(APPEND _gen_command --custom-ops-yaml-path=${custom_ops_yaml})
81+
if(GEN_CUSTOM_OPS_YAML)
82+
list(APPEND _gen_command --custom-ops-yaml-path=${GEN_CUSTOM_OPS_YAML})
6883
list(
6984
APPEND
7085
_gen_command_sources
@@ -77,7 +92,7 @@ function(generate_bindings_for_kernels functions_yaml custom_ops_yaml)
7792
COMMENT "Generating code for kernel registration"
7893
OUTPUT ${_gen_command_sources}
7994
COMMAND ${_gen_command}
80-
DEPENDS ${_oplist_yaml} ${custom_ops_yaml} ${functions_yaml}
95+
DEPENDS ${_oplist_yaml} ${GEN_CUSTOM_OPS_YAML} ${GEN_FUNCTIONS_YAML}
8196
${_codegen_templates} ${_torchgen_srcs}
8297
WORKING_DIRECTORY ${EXECUTORCH_ROOT})
8398
# Make generated file list available in parent scope
@@ -107,18 +122,47 @@ function(gen_custom_ops_aot_lib lib_name kernel_sources)
107122
endfunction()
108123

109124
# Generate a runtime lib for registering operators in Executorch
110-
function(gen_operators_lib lib_name kernel_lib deps)
111-
add_library(${lib_name})
125+
function(gen_operators_lib)
126+
set(arg_names LIB_NAME)
127+
set(multi_arg_names KERNEL_LIBS DEPS)
128+
cmake_parse_arguments(GEN "" "${arg_names}" "${multi_arg_names}" ${ARGN})
129+
130+
message(STATUS "Generating operator lib:")
131+
message(STATUS " LIB_NAME: ${GEN_LIB_NAME}")
132+
message(STATUS " KERNEL_LIBS: ${GEN_KERNEL_LIBS}")
133+
message(STATUS " DEPS: ${GEN_DEPS}")
134+
135+
add_library(${GEN_LIB_NAME})
112136
target_sources(
113-
${lib_name}
137+
${GEN_LIB_NAME}
114138
PRIVATE
115139
${CMAKE_CURRENT_BINARY_DIR}/RegisterCodegenUnboxedKernelsEverything.cpp
116140
${CMAKE_CURRENT_BINARY_DIR}/Functions.h
117141
${CMAKE_CURRENT_BINARY_DIR}/NativeFunctions.h)
118-
target_link_libraries(${lib_name} PRIVATE ${deps})
119-
if(kernel_lib)
120-
target_link_libraries(${lib_name} PRIVATE ${kernel_lib})
142+
target_link_libraries(${GEN_LIB_NAME} PRIVATE ${GEN_DEPS})
143+
if(GEN_KERNEL_LIBS)
144+
target_link_libraries(${GEN_LIB_NAME} PRIVATE ${GEN_KERNEL_LIBS})
121145
endif()
122146

123-
target_link_options_shared_lib(${lib_name})
147+
target_link_options_shared_lib(${GEN_LIB_NAME})
124148
endfunction()
149+
150+
# Merge two kernel yaml files, prioritizing functions from FUNCTIONS_YAML
151+
# and taking functions from FALLBACK_YAML when no implementation is found.
152+
# This corresponds to the merge_yaml buck implementation in codegen/tools.
153+
function(merge_yaml)
154+
set(arg_names FUNCTIONS_YAML FALLBACK_YAML OUTPUT_DIR)
155+
cmake_parse_arguments(GEN "" "${arg_names}" "" ${ARGN})
156+
157+
set(_gen_command
158+
"${PYTHON_EXECUTABLE}" -m codegen.tools.merge_yaml
159+
--functions_yaml_path=${GEN_FUNCTIONS_YAML}
160+
--fallback_yaml_path=${GEN_FALLBACK_YAML}
161+
--output_dir=${GEN_OUTPUT_DIR})
162+
163+
add_custom_command(
164+
COMMENT "Merging kernel yaml files"
165+
OUTPUT ${GEN_OUTPUT_DIR}/merged.yaml
166+
COMMAND ${_gen_command}
167+
WORKING_DIRECTORY ${EXECUTORCH_ROOT})
168+
endfunction()

build/cmake_deps.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,22 @@ deps = [
4545
"executorch",
4646
]
4747

48+
[targets.optimized_kernels]
49+
buck_targets = [
50+
"//kernels/optimized:generated_lib",
51+
]
52+
filters = [
53+
".cpp$",
54+
]
55+
excludes = [
56+
# Exclude the codegen templates, which are picked up because the buck target
57+
# is the generated_lib and not the unwrapped set of kernels.
58+
"^codegen/templates",
59+
]
60+
deps = [
61+
"executorch",
62+
]
63+
4864
[targets.quantized_kernels]
4965
buck_targets = [
5066
"//kernels/quantized:generated_lib",

codegen/tools/merge_yaml.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,20 @@
1212

1313
import yaml
1414

15-
from executorch.codegen.tools.yaml_util import BlankLineDumper
16-
1715
try:
1816
from yaml import CSafeLoader as Loader
1917
except ImportError:
2018
from yaml import SafeLoader as Loader # type: ignore[misc]
2119

2220

21+
class BlankLineDumper(yaml.SafeDumper):
22+
def write_line_break(self, data=None):
23+
super().write_line_break(data)
24+
# insert a new line between entries.
25+
if len(self.indents) == 1:
26+
super().write_line_break()
27+
28+
2329
def merge(functions_yaml_path: str, fallback_yaml_path: Optional[str], output_dir: str):
2430
output_file = os.path.join(output_dir, "merged.yaml")
2531

configurations/CMakeLists.txt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
cmake_minimum_required(VERSION 3.19)
8+
9+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
10+
if(NOT CMAKE_CXX_STANDARD)
11+
set(CMAKE_CXX_STANDARD 17)
12+
endif()
13+
14+
if(NOT PYTHON_EXECUTABLE)
15+
set(PYTHON_EXECUTABLE python3)
16+
endif()
17+
# Source root directory for executorch.
18+
if(NOT EXECUTORCH_ROOT)
19+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
20+
endif()
21+
# Source root directory for pytorch. This is needed for kernel binding.
22+
if(NOT TORCH_ROOT)
23+
set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
24+
endif()
25+
26+
set(_common_compile_options -Wno-deprecated-declarations)
27+
28+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
29+
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
30+
31+
32+
# Merge optimized and portable definitions, taking optimized where available.
33+
merge_yaml(
34+
FUNCTIONS_YAML ${EXECUTORCH_ROOT}/kernels/optimized/optimized-oss.yaml
35+
FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml
36+
OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}
37+
)
38+
39+
gen_selected_ops("${CMAKE_CURRENT_BINARY_DIR}/merged.yaml" "" "")
40+
41+
generate_bindings_for_kernels(
42+
FUNCTIONS_YAML ${CMAKE_CURRENT_BINARY_DIR}/merged.yaml)
43+
message("Generated files ${gen_command_sources}")
44+
45+
# optimized_native_cpu_ops_lib: Register optimized op kernels into the runtime
46+
gen_operators_lib(
47+
LIB_NAME "optimized_native_cpu_ops_lib"
48+
KERNEL_LIBS portable_kernels optimized_kernels
49+
DEPS executorch)
50+
51+
install(TARGETS optimized_native_cpu_ops_lib DESTINATION lib)

kernels/optimized/CMakeLists.txt

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# Kernel library for optimized kernels. Please this file formatted by running:
8+
# ~~~
9+
# cmake-format --first-comment-is-literal=True CMakeLists.txt
10+
# ~~~
11+
12+
cmake_minimum_required(VERSION 3.19)
13+
14+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
15+
if(NOT CMAKE_CXX_STANDARD)
16+
set(CMAKE_CXX_STANDARD 17)
17+
endif()
18+
19+
if(NOT PYTHON_EXECUTABLE)
20+
set(PYTHON_EXECUTABLE python3)
21+
endif()
22+
# Source root directory for executorch.
23+
if(NOT EXECUTORCH_ROOT)
24+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
25+
endif()
26+
# Source root directory for pytorch. This is needed for kernel binding.
27+
if(NOT TORCH_ROOT)
28+
set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
29+
endif()
30+
31+
set(_common_compile_options -Wno-deprecated-declarations)
32+
33+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
34+
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
35+
36+
# Generate C++ bindings to register kernels into both PyTorch (for AOT) and
37+
# Executorch (for runtime). Here select all ops in optimized.yaml
38+
set(_yaml "${CMAKE_CURRENT_LIST_DIR}/optimized-oss.yaml")
39+
gen_selected_ops("${_yaml}" "" "")
40+
41+
generate_bindings_for_kernels(
42+
FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/optimized-oss.yaml)
43+
message("Generated files ${gen_command_sources}")
44+
45+
list(TRANSFORM _optimized_kernels__srcs PREPEND "${EXECUTORCH_ROOT}/")
46+
add_library(optimized_kernels ${_optimized_kernels__srcs})
47+
target_link_libraries(optimized_kernels PRIVATE executorch)
48+
target_compile_options(optimized_kernels PUBLIC ${_common_compile_options})
49+
# Build a library for _optimized_kernels_srcs
50+
#
51+
# optimized_ops_lib: Register optimized ops kernels into Executorch runtime
52+
gen_operators_lib(
53+
LIB_NAME "optimized_ops_lib"
54+
KERNEL_LIBS optimized_kernels
55+
DEPS executorch)

kernels/optimized/cpu/targets.bzl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2-
load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "op_target")
2+
load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "define_op_target", "is_op_disabled", "op_target")
33

44
_OPTIMIZED_ATEN_OPS = (
55
op_target(
@@ -81,11 +81,13 @@ def define_common_targets():
8181
TARGETS and BUCK files that call this function.
8282
"""
8383

84+
enabled_ops = [op for op in _OPTIMIZED_ATEN_OPS if not is_op_disabled(op["name"])]
85+
8486
# Define build targets for all operators registered in the tables above.
85-
for op in _OPTIMIZED_ATEN_OPS:
87+
for op in enabled_ops:
8688
define_op_target(**op)
8789

88-
aten_op_targets = [":{}".format(op["name"]) for op in _OPTIMIZED_ATEN_OPS]
90+
aten_op_targets = [":{}".format(op["name"]) for op in enabled_ops]
8991
all_op_targets = aten_op_targets
9092

9193
runtime.cxx_library(

kernels/optimized/op_registration_util.bzl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
load("@fbsource//tools/build_defs:selects.bzl", "selects")
21
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
33
load(
44
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
55
"get_vec_android_preprocessor_flags",
@@ -124,3 +124,7 @@ def define_op_target(name, deps):
124124
name = name,
125125
deps = deps,
126126
)
127+
128+
def is_op_disabled(name):
129+
# All ops are enabled for internal builds.
130+
return False

kernels/optimized/optimized-oss.yaml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
#
3+
# This yaml file contains operators that have optimized kernels available.
4+
5+
- op: add.out
6+
kernels:
7+
- arg_meta: null
8+
kernel_name: torch::executor::opt_add_out
9+
10+
- op: add.Scalar_out
11+
kernels:
12+
- arg_meta: null
13+
kernel_name: torch::executor::opt_add_scalar_out
14+
15+
- op: bmm.out
16+
kernels:
17+
- arg_meta: null
18+
kernel_name: torch::executor::opt_bmm_out
19+
20+
- op: div.out
21+
kernels:
22+
- arg_meta: null
23+
kernel_name: torch::executor::opt_div_out
24+
25+
- op: div.Scalar_out
26+
kernels:
27+
- arg_meta: null
28+
kernel_name: torch::executor::opt_div_scalar_out
29+
30+
- op: exp.out
31+
kernels:
32+
- arg_meta: null
33+
kernel_name: torch::executor::opt_exp_out
34+
35+
- op: le.Scalar_out
36+
kernels:
37+
- arg_meta: null
38+
kernel_name: torch::executor::opt_le_scalar_out
39+
40+
- op: le.Tensor_out
41+
kernels:
42+
- arg_meta: null
43+
kernel_name: torch::executor::opt_le_tensor_out
44+
45+
- op: mul.out
46+
kernels:
47+
- arg_meta: null
48+
kernel_name: torch::executor::opt_mul_out
49+
50+
- op: mul.Scalar_out
51+
kernels:
52+
- arg_meta: null
53+
kernel_name: torch::executor::opt_mul_scalar_out
54+
55+
- op: native_layer_norm.out
56+
kernels:
57+
- arg_meta: null
58+
kernel_name: torch::executor::opt_native_layer_norm_out
59+
60+
- op: neg.out
61+
kernels:
62+
- arg_meta: null
63+
kernel_name: torch::executor::opt_neg_out
64+
65+
- op: sub.out
66+
kernels:
67+
- arg_meta: null
68+
kernel_name: torch::executor::opt_sub_out
69+
70+
- op: sub.Scalar_out
71+
kernels:
72+
- arg_meta: null
73+
kernel_name: torch::executor::opt_sub_scalar_out

0 commit comments

Comments
 (0)