Skip to content

Commit ca49a1d

Browse files
committed
Dtype selective build for optimized ops
Add dtype selective build for optimized ops. Follows the same process as portable, where we copy the source files and rebuild the library. 1. Generalize copy genrule for portable/optimized/source/header. 2. Copy optimized source files + headers. 3. Build optimized ops using source files, dependencies, portable header. 4. Add test, confirm that we can run addmul with float dtypes (when we remove, the test fails). Differential Revision: [D74688554](https://our.internmc.facebook.com/intern/diff/D74688554/) ghstack-source-id: 283810733 Pull Request resolved: #10878
1 parent 0b48615 commit ca49a1d

File tree

5 files changed

+194
-48
lines changed

5 files changed

+194
-48
lines changed

examples/selective_build/targets.bzl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,19 @@ def define_common_targets():
6969
visibility = ["//executorch/..."],
7070
)
7171

72+
executorch_generated_lib(
73+
name = "select_ops_in_dict_lib_optimized",
74+
functions_yaml_target = "//executorch/kernels/optimized:optimized.yaml",
75+
kernel_deps = [
76+
"//executorch/kernels/optimized:optimized_operators",
77+
],
78+
deps = [
79+
":select_ops_in_dict",
80+
],
81+
dtype_selective_build = True,
82+
visibility = ["//executorch/..."],
83+
)
84+
7285
# Select all ops from a yaml file
7386
et_operator_library(
7487
name = "select_ops_from_yaml",
@@ -121,6 +134,8 @@ def define_common_targets():
121134
lib.append(":select_ops_in_list_lib")
122135
elif select_ops == "dict":
123136
lib.append(":select_ops_in_dict_lib")
137+
elif select_ops == "dict_optimized":
138+
lib.append(":select_ops_in_dict_lib_optimized")
124139
elif select_ops == "yaml":
125140
lib.append(":select_ops_from_yaml_lib")
126141
elif select_ops == "model":

kernels/optimized/cpu/targets.bzl

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def define_common_targets():
2525
name = "add_sub_impl",
2626
srcs = [],
2727
exported_headers = ["op_add_sub_impl.h"],
28-
visibility = ["//executorch/kernels/optimized/cpu/..."],
28+
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
2929
exported_deps = [
3030
"//executorch/runtime/core:core",
3131
"//executorch/kernels/portable/cpu/util:broadcast_indexes_range",
@@ -36,14 +36,14 @@ def define_common_targets():
3636
name = "fft_utils",
3737
srcs = [],
3838
exported_headers = ["fft_utils.h"],
39-
visibility = ["//executorch/kernels/optimized/cpu/..."],
39+
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
4040
exported_deps = [] if runtime.is_oss else ["fbsource//third-party/pocket_fft:pocketfft"],
4141
)
4242

4343
runtime.cxx_library(
4444
name = "binary_ops",
4545
exported_headers = ["binary_ops.h"],
46-
visibility = ["//executorch/kernels/optimized/cpu/..."],
46+
visibility = ["//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS",],
4747
exported_deps = ["//executorch/runtime/core:core"],
4848
)
4949

@@ -58,9 +58,22 @@ def define_common_targets():
5858
name = "moments_utils",
5959
srcs = [],
6060
exported_headers = ["moments_utils.h"],
61-
visibility = ["//executorch/kernels/optimized/..."],
61+
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS",],
6262
exported_deps = [
6363
"//executorch/kernels/optimized:libvec",
6464
"//executorch/kernels/optimized:libutils",
6565
],
6666
)
67+
68+
# Used for dtype selective build. Collect source and header files.
69+
runtime.filegroup(
70+
name = "optimized_source_files",
71+
srcs = native.glob(["*.cpp"]),
72+
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
73+
)
74+
75+
runtime.filegroup(
76+
name = "optimized_header_files",
77+
srcs = native.glob(["*.h"]),
78+
visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
79+
)

kernels/optimized/op_registration_util.bzl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,11 @@ OPTIMIZED_ATEN_OPS = (
258258
],
259259
),
260260
)
261+
262+
def optimized_source_list():
263+
"""All the source file names from //executorch/kernels/optimized/cpu"""
264+
return [op["name"] + ".cpp" for op in OPTIMIZED_ATEN_OPS]
265+
266+
def optimized_header_list():
267+
"""All the header file names from //executorch/kernels/optimized/cpu"""
268+
return ["binary_ops.h", "fft_utils.h", "moments_utils.h", "op_add_sub_impl.h",]

runtime/core/portable_type/c10/c10/targets.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def define_common_targets():
5353
runtime.cxx_library(
5454
name = "aten_headers_for_executorch",
5555
srcs = [],
56-
visibility = ["//executorch/kernels/optimized/..."],
56+
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS"],
5757
exported_deps = select({
5858
"DEFAULT": [],
5959
"ovr_config//cpu:arm64": [

shim_et/xplat/executorch/codegen/codegen.bzl

Lines changed: 153 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_default_executorch_platforms", "is_xplat", "runtime", "struct_to_json")
22
load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
33
load("@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", "portable_header_list", "portable_source_list")
4+
load("@fbsource//xplat/executorch/kernels/optimized:op_registration_util.bzl", "optimized_header_list", "optimized_source_list")
5+
load(
6+
"@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl",
7+
"get_vec_deps",
8+
"get_vec_preprocessor_flags",
9+
)
410

511
# Headers that declare the function signatures of the C++ functions that
612
# map to entries in functions.yaml and custom_ops.yaml.
@@ -386,52 +392,60 @@ def exir_custom_ops_aot_lib(
386392
force_static = False,
387393
)
388394

389-
# Used for dtype selective build. Genrules to copy source and header files.
390-
def portable_outs(target_name, file_list):
391-
outs = {}
392-
for file in file_list:
393-
outs[file] = ["{}/{}".format(target_name, file)]
394-
return outs
395-
396-
def copy_portable_source_files(name):
397-
target_name = "portable_source_files"
395+
def copy_files(genrule_name, target, file_list):
396+
"""
397+
Copy files from `target` to current directory.
398+
genrule_name: name of this copy genrule.
399+
target: a runtime.filegroup that globs together files.
400+
eg. //executorch/kernels/portable/cpu:portable_source_files.
401+
file_list: list of filenames, used to generate the outfiles.
402+
eg. //executorch/kernels/portable/cpu:portable_source_list.
403+
"""
404+
target_name = target.split(":")[1]
398405
runtime.genrule(
399-
name = name,
400-
cmd = "cp -f -r $(location //executorch/kernels/portable/cpu:{}) $OUT/".format(target_name),
401-
outs = portable_outs(target_name, portable_source_list()),
406+
name = genrule_name,
407+
cmd = "cp -f -r $(location {}) $OUT/".format(target),
408+
outs = {file: ["{}/{}".format(target_name, file)] for file in file_list},
402409
default_outs = ["."],
403410
)
404411

405-
def copy_portable_header_files(name):
406-
target_name = "portable_header_files"
407-
runtime.genrule(
412+
def build_portable_header_lib(name, oplist_header_name, feature = None):
413+
"""Build the portable headers into a header-only library.
414+
Ensures that includes work across portable and optimized libs.
415+
#include "executorch/kernels/portable/cpu/<header.h>"
416+
"""
417+
# Copy portable header files.
418+
portable_header_files = {}
419+
genrule_name = name + "_copy_portable_header"
420+
copy_files(genrule_name, "//executorch/kernels/portable/cpu:portable_header_files", portable_header_list())
421+
for header in portable_header_list():
422+
portable_header_files[header] = ":{}[{}]".format(genrule_name, header)
423+
424+
# Include dtype header.
425+
portable_header_files["selected_op_variants.h"] = ":{}[selected_op_variants]".format(oplist_header_name)
426+
427+
# Build portable headers lib.
428+
runtime.cxx_library(
408429
name = name,
409-
cmd = "cp -f -r $(location //executorch/kernels/portable/cpu:{}) $OUT/".format(target_name),
410-
outs = portable_outs(target_name, portable_header_list()),
411-
default_outs = ["."],
430+
srcs = [],
431+
exported_headers = portable_header_files,
432+
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
433+
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
434+
header_namespace = "executorch/kernels/portable/cpu",
435+
feature = feature,
412436
)
413437

414-
def build_portable_lib(name, oplist_header_name, feature = None, expose_operator_symbols = False):
438+
def build_portable_lib(name, oplist_header_name, portable_header_lib, feature = None, expose_operator_symbols = False):
415439
"""Build portable lib from source. We build from source so that the generated header file,
416440
selected_op_variants.h, can be used to selectively build the lib for different dtypes.
417441
"""
418442

419443
# Copy portable cpp files.
420444
portable_source_files = []
421-
copy_portable_source_files_genrule = name + "_copy_portable_source"
422-
copy_portable_source_files(copy_portable_source_files_genrule)
445+
genrule_name = name + "_copy_portable_source"
446+
copy_files(genrule_name, "//executorch/kernels/portable/cpu:portable_source_files", portable_source_list())
423447
for op in portable_source_list():
424-
portable_source_files.append(":{}[{}]".format(copy_portable_source_files_genrule, op))
425-
426-
# Copy portable header files.
427-
portable_header_files = {}
428-
copy_portable_header_files_genrule = name + "_copy_portable_header"
429-
copy_portable_header_files(copy_portable_header_files_genrule)
430-
for header in portable_header_list():
431-
portable_header_files[header] = ":{}[{}]".format(copy_portable_header_files_genrule, header)
432-
433-
# Include dtype header.
434-
portable_header_files["selected_op_variants.h"] = ":{}[selected_op_variants]".format(oplist_header_name)
448+
portable_source_files.append(":{}[{}]".format(genrule_name, op))
435449

436450
# For shared library build, we don't want to expose symbols of
437451
# kernel implementation (ex torch::executor::native::tanh_out)
@@ -451,9 +465,8 @@ def build_portable_lib(name, oplist_header_name, feature = None, expose_operator
451465
runtime.cxx_library(
452466
name = name,
453467
srcs = portable_source_files,
454-
exported_headers = portable_header_files,
455468
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
456-
deps = ["//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/kernels/portable/cpu/util:all_deps"],
469+
deps = ["//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/kernels/portable/cpu/util:all_deps"] + [":" + portable_header_lib],
457470
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
458471
header_namespace = "executorch/kernels/portable/cpu",
459472
compiler_flags = compiler_flags,
@@ -469,6 +482,88 @@ def build_portable_lib(name, oplist_header_name, feature = None, expose_operator
469482
feature = feature,
470483
)
471484

485+
def build_optimized_lib(name, oplist_header_name, portable_header_lib, feature = None, expose_operator_symbols = False):
486+
"""Build optimized lib from source. We build from source so that the generated header file,
487+
selected_op_variants.h, can be used to selectively build the lib for different dtypes.
488+
"""
489+
490+
# Copy optimized cpp files.
491+
optimized_source_files = []
492+
source_genrule = name + "_copy_optimized_source"
493+
copy_files(source_genrule, "//executorch/kernels/optimized/cpu:optimized_source_files", optimized_source_list())
494+
for op in optimized_source_list():
495+
optimized_source_files.append(":{}[{}]".format(source_genrule, op))
496+
497+
# Copy optimized header files.
498+
optimized_header_files = {}
499+
header_genrule = name + "_copy_optimized_header"
500+
copy_files(header_genrule, "//executorch/kernels/optimized/cpu:optimized_header_files", optimized_header_list())
501+
for header in optimized_header_list():
502+
optimized_header_files[header] = ":{}[{}]".format(header_genrule, header)
503+
504+
# For shared library build, we don't want to expose symbols of
505+
# kernel implementation (ex torch::executor::native::tanh_out)
506+
# to library users. They should use kernels through registry only.
507+
# With visibility=hidden, linker won't expose kernel impl symbols
508+
# so it can prune unregistered kernels.
509+
# Currently fbcode links all dependent libraries through shared
510+
# library, and it blocks users like unit tests to use kernel
511+
# implementation directly. So we enable this for xplat only.
512+
compiler_flags = ["-Wno-missing-prototypes", "-Wno-pass-failed","-Wno-global-constructors","-Wno-shadow",]
513+
if not expose_operator_symbols:
514+
# Removing '-fvisibility=hidden' exposes operator symbols.
515+
# This allows operators to be called outside of the kernel registry.
516+
compiler_flags += ["-fvisibility=hidden"]
517+
518+
# Set up dependencies.
519+
optimized_lib_deps = [
520+
"//executorch/kernels/optimized/cpu:add_sub_impl",
521+
"//executorch/kernels/optimized/cpu:binary_ops",
522+
"//executorch/kernels/optimized/cpu:fft_utils",
523+
"//executorch/kernels/optimized/cpu:moments_utils",
524+
"//executorch/kernels/optimized:libblas",
525+
"//executorch/kernels/optimized:libutils",
526+
"//executorch/kernels/optimized:libvec",
527+
"//executorch/kernels/portable/cpu/pattern:all_deps",
528+
"//executorch/kernels/portable/cpu/util:all_deps",
529+
"//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch",
530+
"//executorch/runtime/kernel:kernel_includes",
531+
":" + portable_header_lib,
532+
] + get_vec_deps()
533+
534+
# Build optimized lib.
535+
runtime.cxx_library(
536+
name = name,
537+
srcs = optimized_source_files,
538+
exported_preprocessor_flags = ["-DEXECUTORCH_SELECTIVE_BUILD_DTYPE"],
539+
deps = optimized_lib_deps,
540+
# header_namespace is only available in xplat. See https://fburl.com/code/we2gvopk
541+
header_namespace = "executorch/kernels/optimized/cpu",
542+
compiler_flags = compiler_flags,
543+
preprocessor_flags = get_vec_preprocessor_flags(),
544+
# sleef needs to be added as a direct dependency of the operator target when building for Android,
545+
# or a linker error may occur. Not sure why this happens; it seems that fbandroid_platform_deps of
546+
# dependencies are not transitive
547+
fbandroid_platform_deps = [
548+
(
549+
"^android-arm64.*$",
550+
[
551+
"fbsource//third-party/sleef:sleef_arm",
552+
],
553+
),
554+
],
555+
# WARNING: using a deprecated API to avoid being built into a shared
556+
# library. In the case of dynamically loading so library we don't want
557+
# it to depend on other so libraries because that way we have to
558+
# specify library directory path.
559+
force_static = True,
560+
# link_whole is necessary because the operators register themselves
561+
# via static initializers that run at program startup.
562+
# @lint-ignore BUCKLINT link_whole
563+
link_whole = True,
564+
feature = feature,
565+
)
566+
472567
def executorch_generated_lib(
473568
name,
474569
functions_yaml_target = None,
@@ -631,14 +726,29 @@ def executorch_generated_lib(
631726
)
632727

633728
portable_lib = []
634-
if dtype_selective_build and is_xplat() and "//executorch/kernels/portable:operators" in kernel_deps:
635-
# Remove portable from kernel_deps as we're building it from source.
636-
kernel_deps.remove("//executorch/kernels/portable:operators")
637-
638-
# Build portable lib.
639-
portable_lib_name = name + "_portable_lib"
640-
build_portable_lib(portable_lib_name, oplist_header_name, feature, expose_operator_symbols)
641-
portable_lib = [":{}".format(portable_lib_name)]
729+
optimized_lib = []
730+
if dtype_selective_build and is_xplat():
731+
# Build portable headers lib. Used for portable and optimized kernel libraries.
732+
portable_header_lib = name + "_portable_header_lib"
733+
build_portable_header_lib(portable_header_lib, oplist_header_name, feature)
734+
735+
if "//executorch/kernels/portable:operators" in kernel_deps:
736+
# Remove portable from kernel_deps as we're building it from source.
737+
kernel_deps.remove("//executorch/kernels/portable:operators")
738+
739+
# Build portable lib.
740+
portable_lib_name = name + "_portable_lib"
741+
build_portable_lib(portable_lib_name, oplist_header_name, portable_header_lib, feature, expose_operator_symbols)
742+
portable_lib = [":{}".format(portable_lib_name)]
743+
744+
if "//executorch/kernels/optimized:optimized_operators" in kernel_deps:
745+
# Remove optimized from kernel_deps as we're building it from source.
746+
kernel_deps.remove("//executorch/kernels/optimized:optimized_operators")
747+
748+
# Build optimized lib.
749+
optimized_lib_name = name + "_optimized_lib"
750+
build_optimized_lib(optimized_lib_name, oplist_header_name, portable_header_lib, feature, expose_operator_symbols)
751+
optimized_lib = [":{}".format(optimized_lib_name)]
642752

643753
# Exports headers that declare the function signatures of the C++ functions
644754
# that map to entries in `functions.yaml` and `custom_ops.yaml`.
@@ -692,7 +802,7 @@ def executorch_generated_lib(
692802
"//executorch/kernels/prim_ops:prim_ops_registry" + aten_suffix,
693803
"//executorch/runtime/core:evalue" + aten_suffix,
694804
"//executorch/codegen:macros",
695-
] + deps + kernel_deps + portable_lib,
805+
] + deps + kernel_deps + portable_lib + optimized_lib,
696806
exported_deps = [
697807
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
698808
"//executorch/runtime/kernel:kernel_runtime_context" + aten_suffix,

0 commit comments

Comments
 (0)