Calibrate portable ops (#2537)

manuelcandales · facebook-github-bot · commit 634a2065e832 · 2024-03-20T18:46:16.000-07:00
Summary: Pull Request resolved: #2537 Reviewed By: SS-JIA Differential Revision: D55142429 fbshipit-source-id: 77f1d145d4fe5e45ee90a6803f2f2956179b22cf
diff --git a/kernels/portable/cpu/op_any.cpp b/kernels/portable/cpu/op_any.cpp
@@ -32,7 +32,7 @@ Tensor& any_all_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
       auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
       data_out[0] = static_cast<CTYPE_OUT>(false);
       for (auto i = 0; i < in.numel(); ++i) {
-        if (static_cast<CTYPE_OUT>(data_in[i])) {
+        if (static_cast<bool>(data_in[i])) {
           data_out[0] = static_cast<CTYPE_OUT>(true);
           break;
         }
@@ -57,11 +57,16 @@ Tensor& any_dims_out(
       InvalidArgument,
       out);
 
-  ET_KERNEL_CHECK(
-      ctx,
-      resize_reduction_out(in, dim_list, keepdim, out) == Error::Ok,
-      InvalidArgument,
-      out);
+  if (dim_list.has_value() && dim_list.value().empty()) {
+    ET_KERNEL_CHECK(
+        ctx, resize_tensor(out, in.sizes()) == Error::Ok, InvalidArgument, out);
+  } else {
+    ET_KERNEL_CHECK(
+        ctx,
+        resize_reduction_out(in, dim_list, keepdim, out) == Error::Ok,
+        InvalidArgument,
+        out);
+  }
 
   ScalarType in_type = in.scalar_type();
   ScalarType out_type = out.scalar_type();
@@ -70,17 +75,25 @@ Tensor& any_dims_out(
   ET_SWITCH_REALHB_TYPES(in_type, ctx, name, CTYPE_IN, [&] {
     ET_SWITCH_TWO_TYPES(Bool, Byte, out_type, ctx, name, CTYPE_OUT, [&] {
       CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
-      for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
-        CTYPE_OUT any = false;
-        if (in.numel() > 0) {
-          any = map_reduce_over_dim_list<CTYPE_IN, CTYPE_OUT>(
-              [](CTYPE_IN v) { return static_cast<bool>(v); },
-              [](CTYPE_OUT outv, CTYPE_OUT acc) { return acc || outv; },
-              in,
-              dim_list,
-              out_ix);
+      if (dim_list.has_value() && dim_list.value().empty()) {
+        const CTYPE_IN* in_data = in.const_data_ptr<CTYPE_IN>();
+        for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+          out_data[out_ix] =
+              static_cast<CTYPE_OUT>(static_cast<bool>(in_data[out_ix]));
+        }
+      } else {
+        for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+          bool any = false;
+          if (in.numel() > 0) {
+            any = map_reduce_over_dim_list<CTYPE_IN, bool>(
+                [](CTYPE_IN v) { return static_cast<bool>(v); },
+                [](bool outv, bool acc) { return acc || outv; },
+                in,
+                dim_list,
+                out_ix);
+          }
+          out_data[out_ix] = static_cast<CTYPE_OUT>(any);
         }
-        out_data[out_ix] = any;
       }
     });
   });
@@ -98,7 +111,8 @@ Tensor& any_out(
 
   ET_KERNEL_CHECK(
       ctx,
-      check_reduction_args_single_dim(in, dim, keepdim, {}, out),
+      check_reduction_args_single_dim(
+          in, dim, keepdim, {}, out, /*allow_empty_dim*/ true),
       InvalidArgument,
       out);
 
diff --git a/kernels/portable/cpu/op_cdist_forward.cpp b/kernels/portable/cpu/op_cdist_forward.cpp
@@ -25,9 +25,23 @@ inline ArrayRef<Tensor::SizesType> get_batch_sizes(const Tensor& tensor) {
 
 template <typename CTYPE, typename Norm>
 void cdist(const Tensor& x1, const Tensor& x2, Tensor& out, double p) {
+  if (out.numel() == 0) {
+    return;
+  }
+
+  CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
+
+  // If the last dimension of x1 (which is equal to the last dimension of x2)
+  // has size 0, then the output is filled with 0s.
+  if (x1.numel() == 0) {
+    for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+      out_data[out_ix] = 0;
+    }
+    return;
+  }
+
   const CTYPE* x1_data = x1.const_data_ptr<CTYPE>();
   const CTYPE* x2_data = x2.const_data_ptr<CTYPE>();
-  CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
 
   const ArrayRef<Tensor::SizesType> x1_batch_sizes = get_batch_sizes(x1);
   const ArrayRef<Tensor::SizesType> x2_batch_sizes = get_batch_sizes(x2);
diff --git a/kernels/portable/cpu/op_diagonal_copy.cpp b/kernels/portable/cpu/op_diagonal_copy.cpp
@@ -23,6 +23,10 @@ void diagonal_copy_impl(
     int64_t dim1,
     int64_t dim2,
     Tensor& out) {
+  if (out.numel() == 0) {
+    return;
+  }
+
   int64_t storage_offset = 0;
   size_t diag_size = out.size(out.dim() - 1);
 
@@ -89,7 +93,7 @@ Tensor& diagonal_copy_out(
 
   constexpr auto name = "diagonal_copy.out";
 
-  ET_SWITCH_REAL_TYPES(in.scalar_type(), ctx, name, CTYPE, [&] {
+  ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, name, CTYPE, [&] {
     diagonal_copy_impl<CTYPE>(in, offset, dim1, dim2, out);
   });
 
diff --git a/kernels/portable/cpu/op_div.cpp b/kernels/portable/cpu/op_div.cpp
@@ -163,25 +163,21 @@ Tensor& div_scalar_out(
 
   ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "div.Scalar_out", CTYPE_A, [&]() {
     ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, "div.Scalar_out", CTYPE_B, [&]() {
-      ET_SWITCH_FLOAT_TYPES(
-          common_type, ctx, "div.Scalar_out", CTYPE_IN, [&]() {
-            ET_SWITCH_FLOAT_TYPES(
-                out_type, ctx, "div.Scalar_out", CTYPE_OUT, [&]() {
-                  CTYPE_B b_val;
-                  utils::extract_scalar(b, &b_val);
-                  CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
-
-                  apply_unary_map_fn(
-                      [b_casted](const CTYPE_A val_a) {
-                        CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
-                        CTYPE_IN value = a_casted / b_casted;
-                        return static_cast<CTYPE_OUT>(value);
-                      },
-                      a.const_data_ptr<CTYPE_A>(),
-                      out.mutable_data_ptr<CTYPE_OUT>(),
-                      out.numel());
-                });
-          });
+      ET_SWITCH_FLOAT_TYPES(out_type, ctx, "div.Scalar_out", CTYPE, [&]() {
+        CTYPE_B b_val;
+        utils::extract_scalar(b, &b_val);
+        CTYPE b_casted = static_cast<CTYPE>(b_val);
+
+        apply_unary_map_fn(
+            [b_casted](const CTYPE_A val_a) {
+              CTYPE a_casted = static_cast<CTYPE>(val_a);
+              CTYPE value = a_casted / b_casted;
+              return static_cast<CTYPE>(value);
+            },
+            a.const_data_ptr<CTYPE_A>(),
+            out.mutable_data_ptr<CTYPE>(),
+            out.numel());
+      });
     });
   });
 
@@ -206,7 +202,7 @@ Tensor& div_scalar_mode_out(
 
   ScalarType a_type = a.scalar_type();
   ScalarType b_type = utils::get_scalar_dtype(b);
-  ScalarType common_type = isFloatingType(a_type) ? a_type : ScalarType::Float;
+  ScalarType common_type = utils::promote_type_with_scalar(a_type, b);
   ScalarType out_type = out.scalar_type();
 
   ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out);
@@ -215,27 +211,25 @@ Tensor& div_scalar_mode_out(
 
   ET_SWITCH_REALB_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
     ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
-      ET_SWITCH_FLOAT_TYPES(common_type, ctx, name, CTYPE_IN, [&]() {
-        ET_SWITCH_FLOAT_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() {
-          CTYPE_B b_val;
-          utils::extract_scalar(b, &b_val);
-          CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
-
-          apply_unary_map_fn(
-              [b_casted, mode](const CTYPE_A val_a) {
-                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
-                CTYPE_IN value = a_casted / b_casted;
-                if (mode.has_value() && mode.value() == "trunc") {
-                  value = std::trunc(value);
-                } else if (mode.has_value() && mode.value() == "floor") {
-                  value = utils::floor_divide(a_casted, b_casted);
-                }
-                return static_cast<CTYPE_OUT>(value);
-              },
-              a.const_data_ptr<CTYPE_A>(),
-              out.mutable_data_ptr<CTYPE_OUT>(),
-              out.numel());
-        });
+      ET_SWITCH_REAL_TYPES(out_type, ctx, name, CTYPE, [&]() {
+        CTYPE_B b_val;
+        utils::extract_scalar(b, &b_val);
+        CTYPE b_casted = static_cast<CTYPE>(b_val);
+
+        apply_unary_map_fn(
+            [b_casted, mode](const CTYPE_A val_a) {
+              CTYPE a_casted = static_cast<CTYPE>(val_a);
+              CTYPE value = a_casted / b_casted;
+              if (mode.has_value() && mode.value() == "trunc") {
+                value = std::trunc(value);
+              } else if (mode.has_value() && mode.value() == "floor") {
+                value = utils::floor_divide(a_casted, b_casted);
+              }
+              return value;
+            },
+            a.const_data_ptr<CTYPE_A>(),
+            out.mutable_data_ptr<CTYPE>(),
+            out.numel());
       });
     });
   });
diff --git a/kernels/portable/cpu/op_flip.cpp b/kernels/portable/cpu/op_flip.cpp
@@ -52,15 +52,15 @@ flip_out(RuntimeContext& ctx, const Tensor& in, IntArrayRef dims, Tensor& out) {
     flip_dim_data[i] = false;
   }
   for (size_t i = 0; i < dims.size(); i++) {
-    const auto d = dims[i] < 0 ? dims[i] + in.dim() : dims[i];
+    const auto d = dims[i] < 0 ? dims[i] + nonzero_dim(in) : dims[i];
     flip_dim_data[d] = true;
   }
   size_t flip_dim_length = static_cast<size_t>(in.dim()); // NOLINT
   ArrayRef<bool> flip_dim(flip_dim_data, flip_dim_length);
 
   constexpr auto name = "flip.out";
 
-  ET_SWITCH_REAL_TYPES(in.scalar_type(), ctx, name, CTYPE, [&] {
+  ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, name, CTYPE, [&] {
     const CTYPE* in_data = in.const_data_ptr<CTYPE>();
     CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
 
diff --git a/kernels/portable/cpu/op_prod.cpp b/kernels/portable/cpu/op_prod.cpp
@@ -58,7 +58,8 @@ Tensor& prod_int_out(
 
   ET_KERNEL_CHECK(
       ctx,
-      check_reduction_args_single_dim(in, dim, keepdim, dtype, out),
+      check_reduction_args_single_dim(
+          in, dim, keepdim, dtype, out, /*allow_empty_dim=*/true),
       InvalidArgument,
       out);
 
diff --git a/kernels/portable/cpu/op_roll.cpp b/kernels/portable/cpu/op_roll.cpp
@@ -19,10 +19,9 @@ bool check_roll_args(
     IntArrayRef shifts,
     IntArrayRef dims,
     const Tensor& out) {
-  for (const auto& d : dims) {
-    if (in.dim() == 0) {
-      ET_LOG_AND_RETURN_IF_FALSE(d == 0 || d == -1);
-    } else {
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_has_rank_greater_or_equal_to(in, 1));
+  if (in.numel() > 0) {
+    for (const auto& d : dims) {
       ET_LOG_AND_RETURN_IF_FALSE(dim_is_valid(d, in.dim()));
     }
   }
@@ -38,7 +37,8 @@ size_t unshift_flat_ix(size_t ix, const Tensor& in, IntArrayRef dim_shifts) {
 
   size_t shifted_coord[kTensorDimensionLimit];
   for (size_t d = 0; d < in.dim(); d++) {
-    shifted_coord[d] = (ix_coord[d] - dim_shifts[d]) % in.size(d);
+    shifted_coord[d] =
+        (ix_coord[d] + in.size(d) - dim_shifts[d] % in.size(d)) % in.size(d);
   }
 
   return coordinateToIndex(in, shifted_coord);
@@ -60,7 +60,9 @@ Tensor& roll_out(
   ET_KERNEL_CHECK(
       ctx, check_roll_args(in, shifts, dims, out), InvalidArgument, out);
 
-  constexpr auto name = "roll.out";
+  if (in.numel() == 0) {
+    return out;
+  }
 
   int64_t dim_shift_array[kTensorDimensionLimit];
   for (size_t i = 0; i < in.dim(); i++) {
@@ -74,7 +76,9 @@ Tensor& roll_out(
   size_t dim_shift_array_length = static_cast<size_t>(in.dim()); // NOLINT
   IntArrayRef dim_shifts(dim_shift_array, dim_shift_array_length);
 
-  ET_SWITCH_REAL_TYPES(in.scalar_type(), ctx, name, CTYPE, [&] {
+  constexpr auto name = "roll.out";
+
+  ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, name, CTYPE, [&] {
     const CTYPE* in_data = in.const_data_ptr<CTYPE>();
     CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
 
diff --git a/kernels/portable/cpu/op_var.cpp b/kernels/portable/cpu/op_var.cpp
@@ -26,7 +26,7 @@ void compute_variance(
     const size_t num,
     const double denominator) {
   CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
-  if (num == 0 || denominator == 0) {
+  if (num == 0 || denominator <= 0) {
     for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
       out_data[out_ix] = NAN;
     }
diff --git a/kernels/portable/cpu/util/copy_ops_util.cpp b/kernels/portable/cpu/util/copy_ops_util.cpp
@@ -886,11 +886,18 @@ void get_diagonal_copy_out_target_size(
 
   size_t diagonal_size = 0;
   if (offset >= 0) {
-    diagonal_size = std::min<size_t>(in.size(dim1), in.size(dim2) - offset);
+    if (in.size(dim2) <= offset) {
+      diagonal_size = 0;
+    } else {
+      diagonal_size = std::min<size_t>(in.size(dim1), in.size(dim2) - offset);
+    }
   } else {
-    diagonal_size = std::min<size_t>(in.size(dim1) + offset, in.size(dim2));
+    if (in.size(dim1) <= -offset) {
+      diagonal_size = 0;
+    } else {
+      diagonal_size = std::min<size_t>(in.size(dim1) + offset, in.size(dim2));
+    }
   }
-  diagonal_size = std::max<size_t>(diagonal_size, 0);
 
   size_t shift = 0;
   for (size_t d = 0; d < in.dim(); ++d) {
diff --git a/kernels/portable/cpu/util/reduce_util.cpp b/kernels/portable/cpu/util/reduce_util.cpp
@@ -336,8 +336,6 @@ bool check_reduction_args(
     ET_LOG_AND_RETURN_IF_FALSE(dtype.value() == out.scalar_type());
   }
   ET_LOG_AND_RETURN_IF_FALSE(check_dim_list_is_valid(in, dim_list));
-  ET_LOG_AND_RETURN_IF_FALSE(
-      out.dim() == compute_reduced_out_dim(in, dim_list, keepdim));
   ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(in));
   ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(out));
 
@@ -353,7 +351,8 @@ bool check_reduction_args_single_dim(
     optional<int64_t> dim,
     bool keepdim,
     optional<ScalarType> dtype,
-    Tensor& out) {
+    Tensor& out,
+    bool allow_empty_dim) {
   if (dtype.has_value()) {
     ET_LOG_AND_RETURN_IF_FALSE(dtype.value() == out.scalar_type());
   }
@@ -366,11 +365,11 @@ bool check_reduction_args_single_dim(
 
   if (dim.has_value()) {
     ET_LOG_AND_RETURN_IF_FALSE(dim_is_valid(dim.value(), in.dim()));
-    ET_LOG_AND_RETURN_IF_FALSE(tensor_has_non_empty_dim(in, dim.value()));
+    if (!allow_empty_dim) {
+      ET_LOG_AND_RETURN_IF_FALSE(tensor_has_non_empty_dim(in, dim.value()));
+    }
   }
 
-  ET_LOG_AND_RETURN_IF_FALSE(
-      out.dim() == compute_reduced_out_dim(in, dim, keepdim));
   ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(in));
   ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(out));
 
diff --git a/kernels/portable/cpu/util/reduce_util.h b/kernels/portable/cpu/util/reduce_util.h
@@ -404,6 +404,10 @@ std::tuple<CTYPE_OUT, long> map_reduce_over_dim(
   CTYPE_OUT acc_val = map_fun(in_data[init_index]);
   long acc_ix = 0;
 
+  if (in.numel() == 1) {
+    return std::tuple<CTYPE_OUT, long>{acc_val, acc_ix};
+  }
+
   apply_over_dim(
       [&acc_val, &acc_ix, reduce_fun, map_fun, in_data](
           const size_t in_ix, const size_t dim_ix) {
@@ -469,6 +473,10 @@ CTYPE_OUT map_reduce_over_dim_list(
   const CTYPE_IN* const in_data = in.const_data_ptr<CTYPE_IN>();
   CTYPE_OUT acc_val = map_fun(in_data[init_index]);
 
+  if (in.numel() == 1) {
+    return acc_val;
+  }
+
   apply_over_dim_list(
       [&acc_val, reduce_fun, map_fun, in_data](const size_t in_ix) {
         acc_val = reduce_fun(map_fun(in_data[in_ix]), acc_val);
@@ -609,7 +617,8 @@ bool check_reduction_args_single_dim(
     optional<int64_t> dim,
     bool keepdim,
     optional<ScalarType> dtype,
-    Tensor& out);
+    Tensor& out,
+    bool allow_empty_dim = false);
 
 bool check_mean_dim_args(
     const Tensor& in,
diff --git a/kernels/portable/test/op_div_test.cpp b/kernels/portable/test/op_div_test.cpp

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ void compute_variance(`
`26`	`26`	`const size_t num,`
`27`	`27`	`const double denominator) {`
`28`	`28`	`CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();`
`29`		`- if (num == 0 \|\| denominator == 0) {`
	`29`	`+ if (num == 0 \|\| denominator <= 0) {`
`30`	`30`	`for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {`
`31`	`31`	`out_data[out_ix] = NAN;`
`32`	`32`	`}`