Skip to content

Dtype selective build for optimized ops #10878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions examples/selective_build/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,19 @@ def define_common_targets():
visibility = ["//executorch/..."],
)

executorch_generated_lib(
name = "select_ops_in_dict_lib_optimized",
functions_yaml_target = "//executorch/kernels/optimized:optimized.yaml",
kernel_deps = [
"//executorch/kernels/optimized:optimized_operators",
],
deps = [
":select_ops_in_dict",
],
dtype_selective_build = True,
visibility = ["//executorch/..."],
)

# Select all ops from a yaml file
et_operator_library(
name = "select_ops_from_yaml",
Expand Down Expand Up @@ -121,6 +134,8 @@ def define_common_targets():
lib.append(":select_ops_in_list_lib")
elif select_ops == "dict":
lib.append(":select_ops_in_dict_lib")
elif select_ops == "dict_optimized":
lib.append(":select_ops_in_dict_lib_optimized")
elif select_ops == "yaml":
lib.append(":select_ops_from_yaml_lib")
elif select_ops == "model":
Expand Down
21 changes: 17 additions & 4 deletions kernels/optimized/cpu/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def define_common_targets():
name = "add_sub_impl",
srcs = [],
exported_headers = ["op_add_sub_impl.h"],
visibility = ["//executorch/kernels/optimized/cpu/..."],
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
exported_deps = [
"//executorch/runtime/core:core",
"//executorch/kernels/portable/cpu/util:broadcast_indexes_range",
Expand All @@ -36,14 +36,14 @@ def define_common_targets():
name = "fft_utils",
srcs = [],
exported_headers = ["fft_utils.h"],
visibility = ["//executorch/kernels/optimized/cpu/..."],
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
exported_deps = [] if runtime.is_oss else ["fbsource//third-party/pocket_fft:pocketfft"],
)

runtime.cxx_library(
name = "binary_ops",
exported_headers = ["binary_ops.h"],
visibility = ["//executorch/kernels/optimized/cpu/..."],
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
exported_deps = ["//executorch/runtime/core:core"],
)

Expand All @@ -58,9 +58,22 @@ def define_common_targets():
name = "moments_utils",
srcs = [],
exported_headers = ["moments_utils.h"],
visibility = ["//executorch/kernels/optimized/..."],
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS",],
exported_deps = [
"//executorch/kernels/optimized:libvec",
"//executorch/kernels/optimized:libutils",
],
)

# Used for dtype selective build. Collect source and header files.
runtime.filegroup(
name = "optimized_source_files",
srcs = native.glob(["*.cpp"]),
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
)

runtime.filegroup(
name = "optimized_header_files",
srcs = native.glob(["*.h"]),
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
)
2 changes: 1 addition & 1 deletion runtime/core/portable_type/c10/c10/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def define_common_targets():
runtime.cxx_library(
name = "aten_headers_for_executorch",
srcs = [],
visibility = ["//executorch/kernels/optimized/..."],
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS"],
exported_deps = select({
"DEFAULT": [],
"ovr_config//cpu:arm64": [
Expand Down
196 changes: 153 additions & 43 deletions shim_et/xplat/executorch/codegen/codegen.bzl
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_default_executorch_platforms", "is_xplat", "runtime", "struct_to_json")
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
load("@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", "portable_header_list", "portable_source_list")
load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "optimized_header_list", "optimized_source_list")
load(
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
"get_vec_deps",
"get_vec_preprocessor_flags",
)

# Headers that declare the function signatures of the C++ functions that
# map to entries in functions.yaml and custom_ops.yaml.
Expand Down Expand Up @@ -384,52 +390,60 @@ def exir_custom_ops_aot_lib(
force_static = False,
)

# Used for dtype selective build. Genrules to copy source and header files.
def portable_outs(target_name, file_list):
outs = {}
for file in file_list:
outs[file] = ["{}/{}".format(target_name, file)]
return outs

def copy_portable_source_files(name):
target_name = "portable_source_files"
def copy_files(genrule_name, target, file_list):
"""
Copy files from `target` to current directory.
genrule_name: name of this copy genrule.
target: a runtime.filegroup that globs together files.
eg. //executorch/kernels/portable/cpu:portable_source_files.
file_list: list of filenames, used to generate the outfiles.
eg. //executorch/kernels/portable/cpu:portable_source_list.
"""
target_name = target.split(":")[1]
runtime.genrule(
name = name,
cmd = "cp -f -r $(location //executorch/kernels/portable/cpu:{}) $OUT/".format(target_name),
outs = portable_outs(target_name, portable_source_list()),
name = genrule_name,
cmd = "cp -f -r $(location {}) $OUT/".format(target),
outs = {file: ["{}/{}".format(target_name, file)] for file in file_list},
default_outs = ["."],
)

def copy_portable_header_files(name):
target_name = "portable_header_files"
runtime.genrule(
def build_portable_header_lib(name, oplist_header_name, feature = None):
"""Build the portable headers into a header-only library.
Ensures that includes work across portable and optimized libs.
#include "executorch/kernels/portable/cpu/<header.h>"
"""
# Copy portable header files.
portable_header_files = {}
genrule_name = name + "_copy_portable_header"
copy_files(genrule_name, "//executorch/kernels/portable/cpu:portable_header_files", portable_header_list())
for header in portable_header_list():
portable_header_files[header] = ":{}[{}]".format(genrule_name, header)

# Include dtype header.
portable_header_files["selected_op_variants.h"] = ":{}[selected_op_variants]".format(oplist_header_name)

# Build portable headers lib.
runtime.cxx_library(
name = name,
cmd = "cp -f -r $(location //executorch/kernels/portable/cpu:{}) $OUT/".format(target_name),
outs = portable_outs(target_name, portable_header_list()),
default_outs = ["."],
srcs = [],
exported_headers = portable_header_files,
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
header_namespace = "executorch/kernels/portable/cpu",
feature = feature,
)

def build_portable_lib(name, oplist_header_name, feature = None, expose_operator_symbols = False):
def build_portable_lib(name, oplist_header_name, portable_header_lib, feature = None, expose_operator_symbols = False):
"""Build portable lib from source. We build from source so that the generated header file,
selected_op_variants.h, can be used to selectively build the lib for different dtypes.
"""

# Copy portable cpp files.
portable_source_files = []
copy_portable_source_files_genrule = name + "_copy_portable_source"
copy_portable_source_files(copy_portable_source_files_genrule)
genrule_name = name + "_copy_portable_source"
copy_files(genrule_name, "//executorch/kernels/portable/cpu:portable_source_files", portable_source_list())
for op in portable_source_list():
portable_source_files.append(":{}[{}]".format(copy_portable_source_files_genrule, op))

# Copy portable header files.
portable_header_files = {}
copy_portable_header_files_genrule = name + "_copy_portable_header"
copy_portable_header_files(copy_portable_header_files_genrule)
for header in portable_header_list():
portable_header_files[header] = ":{}[{}]".format(copy_portable_header_files_genrule, header)

# Include dtype header.
portable_header_files["selected_op_variants.h"] = ":{}[selected_op_variants]".format(oplist_header_name)
portable_source_files.append(":{}[{}]".format(genrule_name, op))

# For shared library build, we don't want to expose symbols of
# kernel implementation (ex torch::executor::native::tanh_out)
Expand All @@ -449,9 +463,8 @@ def build_portable_lib(name, oplist_header_name, feature = None, expose_operator
runtime.cxx_library(
name = name,
srcs = portable_source_files,
exported_headers = portable_header_files,
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
deps = ["//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/kernels/portable/cpu/util:all_deps"],
deps = ["//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/kernels/portable/cpu/util:all_deps"] + [":" + portable_header_lib],
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
header_namespace = "executorch/kernels/portable/cpu",
compiler_flags = compiler_flags,
Expand All @@ -467,6 +480,88 @@ def build_portable_lib(name, oplist_header_name, feature = None, expose_operator
feature = feature,
)

def build_optimized_lib(name, oplist_header_name, portable_header_lib, feature = None, expose_operator_symbols = False):
"""Build optimized lib from source. We build from source so that the generated header file,
selected_op_variants.h, can be used to selectively build the lib for different dtypes.
"""

# Copy optimized cpp files.
optimized_source_files = []
source_genrule = name + "_copy_optimized_source"
copy_files(source_genrule, "//executorch/kernels/optimized/cpu:optimized_source_files", optimized_source_list())
for op in optimized_source_list():
optimized_source_files.append(":{}[{}]".format(source_genrule, op))

# Copy optimized header files.
optimized_header_files = {}
header_genrule = name + "_copy_optimized_header"
copy_files(header_genrule, "//executorch/kernels/optimized/cpu:optimized_header_files", optimized_header_list())
for header in optimized_header_list():
optimized_header_files[header] = ":{}[{}]".format(header_genrule, header)

# For shared library build, we don't want to expose symbols of
# kernel implementation (ex torch::executor::native::tanh_out)
# to library users. They should use kernels through registry only.
# With visibility=hidden, linker won't expose kernel impl symbols
# so it can prune unregistered kernels.
# Currently fbcode links all dependent libraries through shared
# library, and it blocks users like unit tests to use kernel
# implementation directly. So we enable this for xplat only.
compiler_flags = ["-Wno-missing-prototypes", "-Wno-pass-failed","-Wno-global-constructors","-Wno-shadow",]
if not expose_operator_symbols:
# Removing '-fvisibility=hidden' exposes operator symbols.
# This allows operators to be called outside of the kernel registry.
compiler_flags += ["-fvisibility=hidden"]

# Set up dependencies.
optimized_lib_deps = [
"//executorch/kernels/optimized/cpu:add_sub_impl",
"//executorch/kernels/optimized/cpu:binary_ops",
"//executorch/kernels/optimized/cpu:fft_utils",
"//executorch/kernels/optimized/cpu:moments_utils",
"//executorch/kernels/optimized:libblas",
"//executorch/kernels/optimized:libutils",
"//executorch/kernels/optimized:libvec",
"//executorch/kernels/portable/cpu/pattern:all_deps",
"//executorch/kernels/portable/cpu/util:all_deps",
"//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
"//executorch/runtime/kernel:kernel_includes",
":" + portable_header_lib,
] + get_vec_deps()

# Build optimized lib.
runtime.cxx_library(
name = name,
srcs = optimized_source_files,
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
deps = optimized_lib_deps,
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
header_namespace = "executorch/kernels/optimized/cpu",
compiler_flags = compiler_flags,
preprocessor_flags = get_vec_preprocessor_flags(),
# sleef needs to be added as a direct dependency of the operator target when building for Android,
# or a linker error may occur. Not sure why this happens; it seems that fbandroid_platform_deps of
# dependencies are not transitive
fbandroid_platform_deps = [
(
"^android-arm64.*$",
[
"fbsource//third-party/sleef:sleef_arm",
],
),
],
# WARNING: using a deprecated API to avoid being built into a shared
# library. In the case of dynamically loading so library we don't want
# it to depend on other so libraries because that way we have to
# specify library directory path.
force_static = True,
# link_whole is necessary because the operators register themselves
# via static initializers that run at program startup.
# @lint-ignore BUCKLINT link_whole
link_whole = True,
feature = feature,
)

def executorch_generated_lib(
name,
functions_yaml_target = None,
Expand Down Expand Up @@ -629,14 +724,29 @@ def executorch_generated_lib(
)

portable_lib = []
if dtype_selective_build and is_xplat() and "//executorch/kernels/portable:operators" in kernel_deps:
# Remove portable from kernel_deps as we're building it from source.
kernel_deps.remove("//executorch/kernels/portable:operators")

# Build portable lib.
portable_lib_name = name + "_portable_lib"
build_portable_lib(portable_lib_name, oplist_header_name, feature, expose_operator_symbols)
portable_lib = [":{}".format(portable_lib_name)]
optimized_lib = []
if dtype_selective_build and is_xplat():
# Build portable headers lib. Used for portable and optimized kernel libraries.
portable_header_lib = name + "_portable_header_lib"
build_portable_header_lib(portable_header_lib, oplist_header_name, feature)

if "//executorch/kernels/portable:operators" in kernel_deps:
# Remove portable from kernel_deps as we're building it from source.
kernel_deps.remove("//executorch/kernels/portable:operators")

# Build portable lib.
portable_lib_name = name + "_portable_lib"
build_portable_lib(portable_lib_name, oplist_header_name, portable_header_lib, feature, expose_operator_symbols)
portable_lib = [":{}".format(portable_lib_name)]

if "//executorch/kernels/optimized:optimized_operators" in kernel_deps:
# Remove optimized from kernel_deps as we're building it from source.
kernel_deps.remove("//executorch/kernels/optimized:optimized_operators")

# Build optimized lib.
optimized_lib_name = name + "_optimized_lib"
build_optimized_lib(optimized_lib_name, oplist_header_name, portable_header_lib, feature, expose_operator_symbols)
optimized_lib = [":{}".format(optimized_lib_name)]

# Exports headers that declare the function signatures of the C++ functions
# that map to entries in `functions.yaml` and `custom_ops.yaml`.
Expand Down Expand Up @@ -690,7 +800,7 @@ def executorch_generated_lib(
"//executorch/kernels/prim_ops:prim_ops_registry" + aten_suffix,
"//executorch/runtime/core:evalue" + aten_suffix,
"//executorch/codegen:macros",
] + deps + kernel_deps + portable_lib,
] + deps + kernel_deps + portable_lib + optimized_lib,
exported_deps = [
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
"//executorch/runtime/kernel:kernel_runtime_context" + aten_suffix,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,11 @@ OPTIMIZED_ATEN_OPS = (
],
),
)

def optimized_source_list():
"""All the source file names from //executorch/kernels/optimized/cpu"""
return [op["name"] + ".cpp" for op in OPTIMIZED_ATEN_OPS]

def optimized_header_list():
"""All the header file names from //executorch/kernels/optimized/cpu"""
return ["binary_ops.h", "fft_utils.h", "moments_utils.h", "op_add_sub_impl.h",]
Loading