Add portable rand kernel implementation (#11127)

GregoryComer · web-flow · commit 809a1fd33768 · 2025-05-30T13:51:02.000-07:00
### Summary
Add a portable operator implementation for ATen rand. This is a core op
that we previously have not had a kernel for.

### Test plan
I've added operator-level tests for rand. It's relatively sparse, but
validates that the mean and stdev are reasonably sane, as well as that
the operator functions correctly for different dtypes, ranks, and sizes.
diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml
@@ -315,6 +315,8 @@
 
 - op: prod.out
 
+- op: rand.out
+
 - op: reciprocal.out
 
 - op: relu.out
diff --git a/kernels/portable/cpu/op_rand.cpp b/kernels/portable/cpu/op_rand.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <c10/util/irange.h>
+
+#include <executorch/kernels/portable/cpu/scalar_utils.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+#include <random>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using executorch::aten::IntArrayRef;
+using Tensor = executorch::aten::Tensor;
+using ScalarType = executorch::aten::ScalarType;
+
+Tensor&
+rand_out(KernelRuntimeContext& ctx, const IntArrayRef sizes, Tensor& out) {
+  (void)ctx;
+
+  std::mt19937 gen((std::random_device())());
+  std::uniform_real_distribution<double> dist(0.0, 1.0);
+
+  // Resize for dynamic shape
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_tensor(out, sizes) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor.");
+
+  ET_SWITCH_FLOATHBF16_TYPES(out.scalar_type(), ctx, "randn.out", CTYPE, [&] {
+    auto data_out = out.mutable_data_ptr<CTYPE>();
+    for (const auto i : c10::irange(out.numel())) {
+      data_out[i] = static_cast<CTYPE>(dist(gen));
+    }
+  });
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -713,6 +713,12 @@
     - arg_meta: null
       kernel_name: torch::executor::prod_out
 
+- op: rand.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::rand_out
+  tags: nondeterministic_seeded
+
 - op: reciprocal.out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt
@@ -197,6 +197,7 @@ set(all_test_sources
     "op_permute_copy_test.cpp"
     "op_pixel_shuffle_test.cpp"
     "op_prod_test.cpp"
+    "op_rand_test.cpp"
     "op_reciprocal_test.cpp"
     "op_relu_test.cpp"
     "op_remainder_test.cpp"
diff --git a/kernels/test/op_rand_test.cpp b/kernels/test/op_rand_test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <c10/util/irange.h>
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <numeric>
+
+using executorch::aten::IntArrayRef;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using torch::executor::testing::TensorFactory;
+
+class OpRandTest : public OperatorTest {
+ protected:
+  void op_rand_out(const IntArrayRef sizes, Tensor& out) {
+    torch::executor::aten::rand_outf(context_, sizes, out);
+  }
+
+  template <typename CTYPE, ScalarType DTYPE>
+  void test_rand(std::vector<int64_t>& sizes) {
+    TensorFactory<DTYPE> tf;
+
+    // Tensor factory wants int32 scales, op kernel wants int64.
+    std::vector<int32_t> sizes_i32;
+    std::transform(
+        sizes.begin(),
+        sizes.end(),
+        std::back_inserter(sizes_i32),
+        [](int64_t s) { return static_cast<int32_t>(s); });
+    Tensor out = tf.zeros(sizes_i32);
+
+    IntArrayRef sizes_ref(sizes.data(), sizes.size());
+    op_rand_out(sizes_ref, out);
+
+    // Check mean and standard deviation. To avoid flaky CI, test pretty
+    // loosely.
+    auto out_data = out.const_data_ptr<CTYPE>();
+    double mean =
+        std::accumulate(
+            out_data,
+            out_data + out.numel(),
+            0.0,
+            [](double acc, CTYPE n) { return acc + static_cast<double>(n); }) /
+        out.numel();
+    double var = std::accumulate(
+                     out_data,
+                     out_data + out.numel(),
+                     0.0,
+                     [=](double acc, CTYPE n) {
+                       return acc + std::pow(static_cast<double>(n) - mean, 2);
+                     }) /
+        out.numel();
+    auto stdev = std::sqrt(var);
+
+    // These are very rough thresholds. A better test implementation would
+    // probably do a proper statistical test to compare the generated empirical
+    // data to the reference distribution, but this should do.
+
+    // Expected mean is 0.5
+    EXPECT_NEAR(mean, 0.5, 5.0 / std::sqrt(out.numel()));
+    // Expected stdev is 1/sqrt(12) ~= 0.289
+    EXPECT_NEAR(stdev, 1.0 / std::sqrt(12), 0.1);
+    EXPECT_GT(stdev, 0);
+  }
+};
+
+TEST_F(OpRandTest, SmokeTest) {
+  std::vector<int64_t> sizes = {2, 3, 4, 128};
+
+#define TEST_ENTRY(ctype, dtype) test_rand<ctype, ScalarType::dtype>(sizes);
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpRandTest, Rank) {
+  std::vector<int64_t> sizes = {1024};
+
+  for (int64_t i = 0; i < 4; i++) {
+    sizes.push_back(i + 1);
+    test_rand<float, executorch::aten::ScalarType::Float>(sizes);
+  }
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
@@ -285,6 +285,7 @@ def define_common_targets():
     _common_op_test("op_pixel_unshuffle_test", ["aten", "portable"])
     _common_op_test("op_pow_test", ["aten", "portable"])
     _common_op_test("op_prod_test", ["aten", "portable"])
+    _common_op_test("op_rand_test", ["aten", "portable"])
     _common_op_test("op_reciprocal_test", ["aten", "portable"])
     _common_op_test("op_relu_test", ["aten", "portable"])
     _common_op_test("op_remainder_test", ["aten", "portable"])
diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -973,6 +973,14 @@ ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),
+    op_target(
+        name = "op_rand",
+        deps = [
+            ":scalar_utils",
+            "//executorch/runtime/core/exec_aten/util:scalar_type_util",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ]
+    ),
     op_target(
         name = "op_reciprocal",
         deps = [