Add CMake build example for custom ops

larryliu0820 · facebook-github-bot · commit 5462df7507a3 · 2023-08-09T17:27:55.000-07:00
Summary: This new example demonstrates how to register custom ops using PyTorch C++ APIs and how to build a library to link to both AOT and runtime.

Reviewed By: digantdesai

Differential Revision: D48184410

fbshipit-source-id: 661087a5183b9cfff8ebbc541ea032cdb0f80b06
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -43,9 +43,16 @@
 cmake_minimum_required(VERSION 3.13)
 project(executorch)
 
-# option to register custom ops in `examples/custom_ops`
-option(REGISTER_EXAMPLE_CUSTOM_OPS
-       "Register custom ops defined in examples/custom_ops" OFF)
+# option to register custom operator `my_ops::mul3` in
+# `examples/custom_ops/custom_ops_1.py`
+option(REGISTER_EXAMPLE_CUSTOM_OP_1
+       "Register custom operator defined in examples/custom_ops/custom_ops_1.py"
+       OFF)
+# option to register custom operator `my_ops::mul4` in
+# `examples/custom_ops/custom_ops_2.py`
+option(REGISTER_EXAMPLE_CUSTOM_OP_2
+       "Register custom operator defined in examples/custom_ops/custom_ops_2.py"
+       OFF)
 
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 if(NOT CMAKE_CXX_STANDARD)
@@ -58,6 +65,11 @@ endif()
 # TODO(dbort): Fix these warnings and remove this flag.
 set(_common_compile_options -Wno-deprecated-declarations)
 
+if(REGISTER_EXAMPLE_CUSTOM_OP_2)
+  # Need to be linked to a shared library
+  list(APPEND _common_compile_options -fPIC)
+endif()
+
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/..)
 
@@ -274,7 +286,7 @@ target_link_libraries(executor_runner executorch portable_kernels_bindings
 target_compile_options(executor_runner PUBLIC ${_common_compile_options})
 
 # Generate custom_ops_lib based on REGISTER_EXAMPLE_CUSTOM_OPS
-if(REGISTER_EXAMPLE_CUSTOM_OPS)
+if(REGISTER_EXAMPLE_CUSTOM_OP_1 OR REGISTER_EXAMPLE_CUSTOM_OP_2)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/custom_ops)
   target_link_libraries(executor_runner custom_ops_lib)
 endif()
diff --git a/examples/custom_ops/CMakeLists.txt b/examples/custom_ops/CMakeLists.txt
@@ -37,10 +37,17 @@ file(GLOB_RECURSE _codegen_tools_srcs "${EXECUTORCH_ROOT}/codegen/tools/*.py")
 file(GLOB_RECURSE _codegen_templates "${EXECUTORCH_ROOT}/codegen/templates/*")
 file(GLOB_RECURSE _torchgen_srcs "${TORCH_ROOT}/torchgen/*.py")
 
-set(_gen_oplist_command
-    "${PYTHON_EXECUTABLE}" -m codegen.tools.gen_oplist
-    --output_path=${_oplist_yaml}
-    --ops_schema_yaml_path=${CMAKE_CURRENT_LIST_DIR}/custom_ops.yaml)
+# Selective build. If we want to register all ops in custom_ops.yaml, do
+# `--ops_schema_yaml_path=${CMAKE_CURRENT_LIST_DIR}/custom_ops.yaml)` instead of
+# `root_ops`
+set(_gen_oplist_command "${PYTHON_EXECUTABLE}" -m codegen.tools.gen_oplist
+                        --output_path=${_oplist_yaml})
+
+if(REGISTER_EXAMPLE_CUSTOM_OP_2)
+  list(APPEND _gen_oplist_command --root_ops="my_ops::mul4.out")
+elseif(REGISTER_EXAMPLE_CUSTOM_OP_1)
+  list(APPEND _gen_oplist_command --root_ops="my_ops::mul3.out")
+endif()
 
 # Command to codegen C++ wrappers to register custom ops to both PyTorch and
 # Executorch runtime.
@@ -78,31 +85,56 @@ add_custom_command(
   WORKING_DIRECTORY ${EXECUTORCH_ROOT})
 # Prepare for C++ libraries.
 
-# 1. TODO: C++ library to register custom ops into PyTorch.
-# ~~~
-# add_library(custom_ops_aot_lib SHARED
-#   ${OUTPUT_DIRECTORY}/RegisterCPUCustomOps.cpp
-#   ${OUTPUT_DIRECTORY}/RegisterSchema.cpp
-#   ${OUTPUT_DIRECTORY}/CustomOpsNativeFunctions.h)
-# ~~~
+# 1. C++ library to register custom ops into PyTorch.
+if(REGISTER_EXAMPLE_CUSTOM_OP_2)
+  add_library(
+    custom_ops_aot_lib SHARED
+    ${CMAKE_CURRENT_BINARY_DIR}/RegisterCPUCustomOps.cpp
+    ${CMAKE_CURRENT_BINARY_DIR}/RegisterSchema.cpp
+    ${CMAKE_CURRENT_BINARY_DIR}/CustomOpsNativeFunctions.h
+    ${CMAKE_CURRENT_LIST_DIR}/custom_ops_2.cpp # register my_ops::mul4
+    ${CMAKE_CURRENT_LIST_DIR}/custom_ops_2_out.cpp # register my_ops::mul4.out
+  )
+  # Find `Torch`.
+  find_package(Torch REQUIRED)
+  # ATen mode is on
+  target_compile_definitions(custom_ops_aot_lib PRIVATE USE_ATEN_LIB=1)
+  target_include_directories(custom_ops_aot_lib
+                             PUBLIC ${_common_include_directories})
+  include_directories(${TORCH_INCLUDE_DIRS})
 
-# Find `Torch`.
-# ~~~
-# find_package(Torch REQUIRED)
-# target_link_libraries(custom_ops_aot_lib PUBLIC Torch)
-# ~~~
+  target_link_libraries(custom_ops_aot_lib PRIVATE torch executorch)
 
-# 1. C++ library to register custom ops into Executorch runtime.
+  # Ensure that the load-time constructor functions run. By default, the linker
+  # would remove them since there are no other references to them.
+  if((CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
+     OR (APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+    target_link_options(custom_ops_aot_lib INTERFACE
+                        "-Wl,-force_load,$<TARGET_FILE:custom_ops_aot_lib>")
+  elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
+    target_link_options(
+      custom_ops_aot_lib INTERFACE
+      "-Wl,--whole-archive,$<TARGET_FILE:custom_ops_aot_lib>,--no-whole-archive"
+    )
+  endif()
+endif()
 
+# 1. C++ library to register custom ops into Executorch runtime.
 add_library(custom_ops_lib)
 target_sources(
   custom_ops_lib
   PRIVATE
     ${CMAKE_CURRENT_BINARY_DIR}/RegisterCodegenUnboxedKernelsEverything.cpp
     ${CMAKE_CURRENT_BINARY_DIR}/Functions.h
     ${CMAKE_CURRENT_BINARY_DIR}/NativeFunctions.h
-    ${CMAKE_CURRENT_BINARY_DIR}/CustomOpsNativeFunctions.h
-    ${CMAKE_CURRENT_LIST_DIR}/custom_ops_1.cpp)
+    ${CMAKE_CURRENT_BINARY_DIR}/CustomOpsNativeFunctions.h)
+if(REGISTER_EXAMPLE_CUSTOM_OP_1)
+  target_sources(custom_ops_lib
+                 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/custom_ops_1_out.cpp)
+elseif(REGISTER_EXAMPLE_CUSTOM_OP_2)
+  target_sources(custom_ops_lib
+                 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/custom_ops_2_out.cpp)
+endif()
 
 target_link_libraries(custom_ops_lib PRIVATE executorch)
 
diff --git a/examples/custom_ops/custom_ops_1_out.cpp b/examples/custom_ops/custom_ops_1_out.cpp
diff --git a/examples/custom_ops/custom_ops_2.cpp b/examples/custom_ops/custom_ops_2.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <torch/library.h>
+
+namespace custom {
+namespace native {
+
+using at::Tensor;
+using c10::ScalarType;
+
+// mul4(Tensor input) -> Tensor
+Tensor mul4_impl(const Tensor& in) {
+  // naive approach
+  at::Tensor out = at::zeros_like(in);
+  out.copy_(in);
+  out.mul_(4);
+  return out;
+}
+
+TORCH_LIBRARY_FRAGMENT(my_ops, m) {
+  m.def(TORCH_SELECTIVE_SCHEMA("my_ops::mul4(Tensor input) -> Tensor"));
+}
+
+TORCH_LIBRARY_IMPL(my_ops, CompositeExplicitAutograd, m) {
+  m.impl(TORCH_SELECTIVE_NAME("my_ops::mul4"), TORCH_FN(mul4_impl));
+}
+} // namespace native
+} // namespace custom
diff --git a/examples/custom_ops/custom_ops_2.py b/examples/custom_ops/custom_ops_2.py
@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Example of showcasing registering custom operator through torch library API."""
+import torch
+
+from examples.export.export_example import export_to_ff
+
+torch.ops.load_library("cmake-out/examples/custom_ops/libcustom_ops_aot_lib.so")
+
+# example model
+class Model(torch.nn.Module):
+    def forward(self, a):
+        return torch.ops.my_ops.mul4.default(a)
+
+
+def main():
+    m = Model()
+    input = torch.randn(2, 3)
+    # capture and lower
+    export_to_ff("custom_ops_2", m, (input,))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/custom_ops/custom_ops_2_out.cpp b/examples/custom_ops/custom_ops_2_out.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace custom {
+namespace native {
+
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using torch::executor::RuntimeContext;
+
+namespace {
+void check_preconditions(const Tensor& in, Tensor& out) {
+  ET_CHECK_MSG(
+      out.scalar_type() == ScalarType::Float,
+      "Expected out tensor to have dtype Float, but got %hhd instead",
+      out.scalar_type());
+  ET_CHECK_MSG(
+      in.scalar_type() == ScalarType::Float,
+      "Expected in tensor to have dtype Float, but got %hhd instead",
+      in.scalar_type());
+  ET_CHECK_MSG(
+      out.dim() == in.dim(),
+      "Number of dims of out tensor is not compatible with inputs");
+  ET_CHECK_MSG(
+      out.numel() == in.numel(),
+      "Number of elements of out tensor %zd is not compatible with inputs %zd",
+      ssize_t(out.numel()),
+      ssize_t(in.numel()));
+}
+} // namespace
+// mul4.out(Tensor input, *, Tensor(a!) output) -> Tensor(a!)
+Tensor& mul4_out_impl(const Tensor& in, Tensor& out) {
+  check_preconditions(in, out);
+  float* out_data = out.mutable_data_ptr<float>();
+  const float* in_data = in.const_data_ptr<float>();
+  for (size_t out_idx = 0; out_idx < out.numel(); ++out_idx) {
+    out_data[out_idx] = in_data[out_idx] * 4;
+  }
+  return out;
+}
+
+Tensor& mul4_out_impl(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
+  (void)ctx;
+  mul4_out_impl(in, out);
+  return out;
+}
+
+} // namespace native
+} // namespace custom
diff --git a/examples/custom_ops/targets.bzl b/examples/custom_ops/targets.bzl
@@ -15,9 +15,12 @@ def define_common_targets():
         ],
     )
 
+    # ~~~ START of custom ops 1 `my_ops::mul3` library definitions ~~~
     et_operator_library(
-        name = "executorch_all_ops",
-        include_all_operators = True,
+        name = "select_custom_ops_1",
+        ops = [
+            "my_ops::mul3.out",
+        ],
         define_static_targets = True,
         visibility = [
             "//executorch/codegen/...",
@@ -26,8 +29,8 @@ def define_common_targets():
     )
 
     runtime.cxx_library(
-        name = "custom_kernel_lib",
-        srcs = ["custom_ops_1.cpp"],
+        name = "custom_ops_1",
+        srcs = ["custom_ops_1_out.cpp"],
         deps = [
             "//executorch/runtime/kernel:kernel_includes",
         ],
@@ -38,14 +41,16 @@ def define_common_targets():
     )
 
     executorch_generated_lib(
-        name = "generated_lib",
+        name = "lib_1",
         deps = [
-            ":executorch_all_ops",
-            ":custom_kernel_lib",
+            ":select_custom_ops_1",
+            ":custom_ops_1",
         ],
         custom_ops_yaml_target = ":custom_ops.yaml",
         visibility = [
             "//executorch/...",
             "@EXECUTORCH_CLIENTS",
         ],
     )
+
+    # ~~~ END of custom ops 1 `my_ops::mul3` library definitions ~~~
diff --git a/examples/custom_ops/test_custom_ops.sh b/examples/custom_ops/test_custom_ops.sh
@@ -19,7 +19,7 @@ test_buck2_custom_op_1() {
 
   echo 'Running executor_runner'
   buck2 run //fbcode/executorch/examples/executor_runner:executor_runner \
-      --config=executorch.include_custom_ops=1 -- --model_path="./${model_name}.pte"
+      --config=executorch.register_custom_op_1=1 -- --model_path="./${model_name}.pte"
   # should give correct result
 
   echo "Removing ${model_name}.pte"
@@ -34,7 +34,7 @@ test_cmake_custom_op_1() {
   (rm -rf cmake-out \
     && mkdir cmake-out \
     && cd cmake-out \
-    && cmake -DBUCK2=buck2 -DBUILD_EXAMPLE_CUSTOM_OPS=ON ..)
+    && cmake -DBUCK2=buck2 -DREGISTER_EXAMPLE_CUSTOM_OP_1=ON ..)
 
   echo 'Building executor_runner'
   cmake --build cmake-out -j9
@@ -43,5 +43,29 @@ test_cmake_custom_op_1() {
   cmake-out/executor_runner --model_path="./${model_name}.pte"
 }
 
+test_cmake_custom_op_2() {
+  local model_name='custom_ops_2'
+  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+  CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch"
+
+  (rm -rf cmake-out \
+    && mkdir cmake-out \
+    && cd cmake-out \
+    && cmake -DBUCK2=buck2 \
+      -DREGISTER_EXAMPLE_CUSTOM_OP_2=ON \
+      -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" ..)
+
+  echo 'Building executor_runner'
+  cmake --build cmake-out -j9
+
+  echo "Exporting ${model_name}.pte"
+  python3 -m "examples.custom_ops.${model_name}"
+  # should save file custom_ops_2.pte
+
+  echo 'Running executor_runner'
+  cmake-out/executor_runner "--model_path=./${model_name}.pte"
+}
+
 test_buck2_custom_op_1
 test_cmake_custom_op_1
+test_cmake_custom_op_2
diff --git a/examples/executor_runner/targets.bzl b/examples/executor_runner/targets.bzl
@@ -7,7 +7,9 @@ def define_common_targets():
     TARGETS and BUCK files that call this function.
     """
 
-    include_custom_ops = native.read_config("executorch", "include_custom_ops", "0") == "1"
+    register_custom_op_1 = native.read_config("executorch", "register_custom_op_1", "0") == "1"
+
+    custom_ops_lib = ["//executorch/examples/custom_ops:lib_1"] if register_custom_op_1 else []
 
     # Test driver for models, uses all portable kernels.
     runtime.cxx_binary(
@@ -19,7 +21,7 @@ def define_common_targets():
             "//executorch/extension/data_loader:file_data_loader",
             "//executorch/util:util",
             "//executorch/kernels/portable:generated_lib_all_ops",
-        ] + (["//executorch/examples/custom_ops:generated_lib"] if include_custom_ops else []),
+        ] + custom_ops_lib,
         external_deps = [
             "gflags",
         ],