format on "Reapply #11294 and #11295 (improve GLU test and implement using internal views to avoid copying)"

swolchok · swolchok · commit 3d3933261ae9 · 2025-06-10T10:01:04.000-07:00
These were reverted due to internal test failures. Sending this as an exported internal diff so that we can make sure we get internal signal. Original summary for #11294 (to make the GLU test input asymmetric): This way it will produce different results along each tested dim. Original summaryfor #11295: GLU requires slicing the input Tensor into two halves. Currently, we accomplish this by copying; ExecuTorch does not support views in general because it requires Tensors to be contiguous. However, nothing stops us from implementing [the ATen that uses views](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/GatedLinearUnit.cpp#L35) entirely internally to the op. To support this, I added `support_noncontiguous_tensors` as an optional template argument to BroadcastIndexesRange and plumbed it through to the elementwise_util functions as an optional SupportNonContiguousTensors parameter. Differential Revision: [D76311585](https://our.internmc.facebook.com/intern/diff/D76311585/) [ghstack-poisoned]
diff --git a/kernels/portable/cpu/op_glu.cpp b/kernels/portable/cpu/op_glu.cpp
@@ -26,7 +26,8 @@ namespace {
 
 struct SplitGLUInputTensor {
   explicit SplitGLUInputTensor(const Tensor& self, int64_t dim);
-  using SizesArray = std::array<executorch::aten::SizesType, kTensorDimensionLimit>;
+  using SizesArray =
+      std::array<executorch::aten::SizesType, kTensorDimensionLimit>;
   SizesArray half_sizes;
   TensorImpl first_half_impl;
   TensorImpl second_half_impl;
@@ -57,7 +58,7 @@ SplitGLUInputTensor::SplitGLUInputTensor(const Tensor& self, int64_t dim)
           self.dim(),
           half_sizes.data(),
           reinterpret_cast<char*>(self.mutable_data_ptr()) +
-          self.strides()[dim] * self.size(dim) / 2 * self.element_size(),
+              self.strides()[dim] * self.size(dim) / 2 * self.element_size(),
           const_cast<executorch::aten::DimOrderType*>(self.dim_order().data()),
           const_cast<executorch::aten::StridesType*>(self.strides().data()),
           self.shape_dynamism()),