Implement native_dropout (#10567)

swolchok · web-flow · commit 46a18cb702ab · 2025-05-06T20:43:16.000-04:00
Yet another core ATen op.

Test Plan: Comes with test. Imported to fbsource and ran test in ATen
mode as well.
diff --git a/kernels/portable/cpu/op_native_dropout.cpp b/kernels/portable/cpu/op_native_dropout.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+#include <random>
+#include <tuple>
+
+namespace torch::executor::native {
+std::tuple<Tensor&, Tensor&> native_dropout_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& input,
+    double prob,
+    torch::executor::optional<bool> train,
+    Tensor& out,
+    Tensor& mask) {
+  std::tuple<Tensor&, Tensor&> ret(out, mask);
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_dtype(input, out), InvalidArgument, ret);
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_dim_order(input, out, mask), InvalidArgument, ret);
+  ET_KERNEL_CHECK(
+      ctx,
+      resize_tensor(out, input.sizes()) == Error::Ok,
+      InvalidArgument,
+      ret);
+  ET_KERNEL_CHECK(
+      ctx,
+      resize_tensor(mask, input.sizes()) == Error::Ok,
+      InvalidArgument,
+      ret);
+  ET_KERNEL_CHECK(ctx, tensor_is_bool_type(mask), InvalidArgument, ret);
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      prob >= 0 && prob <= 1,
+      InvalidArgument,
+      ret,
+      "dropout probability has to be between 0 and 1 but got %f",
+      prob);
+
+  // @lint-ignore CLANGTIDY facebook-hte-CArray
+  static constexpr const char op_name[] = "native_dropout.out";
+  if ((!train.has_value() || train.value()) && prob != 0) {
+    {
+      std::mt19937 gen((std::random_device())());
+      std::uniform_real_distribution<double> dist;
+      bool* const mask_data_ptr = mask.mutable_data_ptr<bool>();
+      for (const auto ii : c10::irange(mask.numel())) {
+        mask_data_ptr[ii] = dist(gen) >= prob;
+      }
+    }
+    ET_SWITCH_FLOATHBF16_TYPES(
+        input.scalar_type(), ctx, op_name, CTYPE_COMPUTE, [&]() {
+          utils::apply_bitensor_elementwise_fn<CTYPE_COMPUTE, op_name>(
+              [](const auto val, const auto mask_val) {
+                if (!mask_val) {
+                  return static_cast<decltype(val)>(0);
+                }
+                return val;
+              },
+              ctx,
+              input,
+              utils::SupportedTensorDtypes::FLOATHBF16,
+              mask,
+              // TODO: should really be just BOOL
+              utils::SupportedTensorDtypes::BOOL_OR_BYTE,
+              out,
+              utils::SupportedTensorDtypes::SAME_AS_COMMON);
+        });
+  } else if (input.numel() > 0) {
+    std::memcpy(out.mutable_data_ptr(), input.data_ptr(), input.nbytes());
+    std::memset(mask.mutable_data_ptr(), true, mask.nbytes());
+  }
+  return ret;
+}
+
+} // namespace torch::executor::native
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -627,6 +627,12 @@
     - arg_meta: null
       kernel_name: torch::executor::narrow_copy_out
 
+- op: native_dropout.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::native_dropout_out
+  tags: nondeterministic_seeded
+
 - op: native_group_norm.out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt
@@ -186,6 +186,7 @@ set(all_test_sources
     "op_mul_test.cpp"
     "op_pow_test.cpp"
     "op_native_batch_norm_test.cpp"
+    "op_native_dropout_test.cpp"
     "op_native_group_norm_test.cpp"
     "op_native_layer_norm_test.cpp"
     "op_ne_test.cpp"
diff --git a/kernels/test/op_native_dropout_test.cpp b/kernels/test/op_native_dropout_test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <c10/util/irange.h>
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/kernels/test/supported_features.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using torch::executor::testing::TensorFactory;
+
+class OpNativeDropoutTest : public OperatorTest {
+ protected:
+  void op_native_dropout_out(
+      const Tensor& self,
+      double prob,
+      executorch::aten::optional<bool> train,
+      Tensor& out,
+      Tensor& mask) {
+    torch::executor::aten::native_dropout_outf(
+        context_, self, prob, train, out, mask);
+  }
+
+  template <typename CTYPE, ScalarType DTYPE>
+  void test_dropout() {
+    TensorFactory<DTYPE> tf;
+    TensorFactory<ScalarType::Bool> tf_bool;
+    const std::vector<int32_t> sizes = {3, 2};
+    Tensor in = tf.make(sizes, {1, 2, 3, 4, 5, 6});
+    Tensor out = tf.zeros(sizes);
+    Tensor mask = tf_bool.zeros(sizes);
+
+    bool* const mask_data = mask.mutable_data_ptr<bool>();
+    auto expect_no_drops = [&]() {
+      EXPECT_TENSOR_CLOSE(out, in);
+      for (const auto ii : c10::irange(mask.numel())) {
+        EXPECT_TRUE(mask_data[ii]);
+        mask_data[ii] = false;
+      }
+    };
+
+    op_native_dropout_out(in, 0, true, out, mask);
+    expect_no_drops();
+
+    op_native_dropout_out(in, 0, false, out, mask);
+    expect_no_drops();
+
+    op_native_dropout_out(in, 1, false, out, mask);
+    expect_no_drops();
+
+    op_native_dropout_out(in, 1, true, out, mask);
+    auto* const out_data = out.mutable_data_ptr<CTYPE>();
+    for (const auto ii : c10::irange(out.numel())) {
+      EXPECT_EQ(out_data[ii], CTYPE(0));
+    }
+    for (const auto ii : c10::irange(mask.numel())) {
+      EXPECT_FALSE(mask_data[ii]);
+      mask_data[ii] = 0;
+    }
+  }
+};
+
+TEST_F(OpNativeDropoutTest, Basic) {
+#define TEST_ENTRY(ctype, dtype) test_dropout<ctype, ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpNativeDropoutTest, ProbabilityRangeCheck) {
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Bool> tf_bool;
+  const std::vector<int32_t> sizes = {2, 3};
+  Tensor a = tf_float.ones(sizes);
+  Tensor out = tf_float.zeros(sizes);
+  Tensor mask = tf_bool.zeros(sizes);
+  ET_EXPECT_KERNEL_FAILURE(
+      context_, op_native_dropout_out(a, -1, true, out, mask));
+}
+
+TEST_F(OpNativeDropoutTest, MaskBoolCheck) {
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Byte> tf_byte;
+  const std::vector<int32_t> sizes = {2, 3};
+  Tensor a = tf_float.ones(sizes);
+  Tensor out = tf_float.zeros(sizes);
+  Tensor mask_byte = tf_byte.zeros(sizes);
+  Tensor mask_float = tf_float.zeros(sizes);
+  ET_EXPECT_KERNEL_FAILURE(
+      context_, op_native_dropout_out(a, 0.5, true, out, mask_byte));
+  ET_EXPECT_KERNEL_FAILURE(
+      context_, op_native_dropout_out(a, 0.5, true, out, mask_float));
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
@@ -272,6 +272,7 @@ def define_common_targets():
     _common_op_test("op_mul_test", ["aten", "portable", "optimized"])
     _common_op_test("op_narrow_copy_test", ["aten", "portable"])
     _common_op_test("op_native_batch_norm_test", ["aten", "portable"])
+    _common_op_test("op_native_dropout_test", ["aten", "portable"])
     _common_op_test("op_native_group_norm_test", ["aten", "portable"])
     _common_op_test("op_native_layer_norm_test", ["aten", "portable", "optimized"])
     _common_op_test("op_ne_test", ["aten", "portable"])
diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -883,6 +883,12 @@ ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:normalization_ops_util",
         ],
     ),
+    op_target(
+        name = "op_native_dropout",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:elementwise_util",
+        ],
+    ),
     op_target(
         name = "op_native_group_norm",
         deps = [