buckify g3 targets, fix issues in quant, dequant, softmax (#7061)

zonglinpeng · facebook-github-bot · commit d4a7d5544a8c · 2024-11-25T15:33:55.000-08:00
Summary: update targets in fallback, fixed inherent issues from G3 PR. G3 op status page: https://docs.google.com/document/d/1ZRW6Uoq_NhpVCSH4y-t3Bl2pQZiKXMzSNT5XgrbE0fM/edit?tab=t.0 Reviewed By: hsharma35 Differential Revision: D66398494
diff --git a/backends/cadence/aot/functions_fusion_g3.yaml b/backends/cadence/aot/functions_fusion_g3.yaml
@@ -20,7 +20,7 @@
 - op: _softmax.out
   kernels:
     - arg_meta: null
-      kernel_name: cadence::impl::G3::softmax_out
+      kernel_name: cadence::impl::G3::_softmax_out
 
 - op: add.out
   kernels:
@@ -71,7 +71,7 @@
   kernels:
     - arg_meta: null
       kernel_name: cadence::impl::G3::mul_out
-      
+
 - op: mul.Scalar_out
   kernels:
     - arg_meta: null
@@ -111,8 +111,21 @@
   kernels:
     - arg_meta: null
       kernel_name: torch::executor::where_out
-      
+
 - op: native_layer_norm.out
   kernels:
     - arg_meta: null
-      kernel_name: cadence::impl::G3::native_layer_norm_out     
+      kernel_name: cadence::impl::G3::native_layer_norm_out
+
+# custom ops
+- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  kernels:
+    - arg_meta: null
+      kernel_name: cadence::impl::G3::native::quantize_per_tensor_out
+
+- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  kernels:
+    - arg_meta: null
+      kernel_name: cadence::impl::G3::native::dequantize_per_tensor_out
diff --git a/backends/cadence/fusion_g3/operators/TARGETS b/backends/cadence/fusion_g3/operators/TARGETS
@@ -0,0 +1,5 @@
+load("targets.bzl", "define_common_targets")
+
+oncall("odai_jarvis")
+
+define_common_targets()
diff --git a/backends/cadence/fusion_g3/operators/op_dequantize.cpp b/backends/cadence/fusion_g3/operators/op_dequantize.cpp
@@ -83,7 +83,7 @@ void check_dequantize_per_tensor_args(
 } // namespace
 
 /* Local function which calls the kernels based on the input datatype */
-void Dequantize_impl(
+void dequantize_impl(
     Tensor& out,
     const Tensor& input,
     float* scale_data,
@@ -502,7 +502,7 @@ Tensor& dequantize_per_tensor_out(
   float scale_data = (float)scale;
   int zero_point_data = (int)zero_point;
 
-  Dequantize_impl(out, input, &scale_data, &zero_point_data, NULL, out_dtype);
+  dequantize_impl(out, input, &scale_data, &zero_point_data, NULL, out_dtype);
 
   return out;
 }
@@ -620,7 +620,7 @@ Tensor& dequantize_per_channel_out(
   for (int i = 0; i < scale.numel(); i++) {
     scale_data[i] = (float)scale_dt[i];
   }
-  Dequantize_impl(out, input, scale_data, zero_point_ptr, axis_ptr, out_dtype);
+  dequantize_impl(out, input, scale_data, zero_point_ptr, axis_ptr, out_dtype);
 
   return out;
 }
@@ -661,13 +661,19 @@ Tensor& dequantize_per_tensor_out(
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType> out_dtype,
     Tensor& out) {
   // TODO(larryliu): Add a context arg to the real op function and remove this
   // wrapper
   (void)context;
   return dequantize_per_tensor_out(
-      input, scale, zero_point, quant_min, quant_max, dtype, out_dtype, out);
+      input,
+      scale,
+      zero_point,
+      quant_min,
+      quant_max,
+      dtype,
+      out.scalar_type(),
+      out);
 }
 
 Tensor& dequantize_per_tensor_tensor_args_out(
@@ -764,4 +770,4 @@ Tensor& dequantize_per_token_out(
 } // namespace native
 } // namespace G3
 } // namespace impl
-} // namespace cadence
+} // namespace cadence
diff --git a/backends/cadence/fusion_g3/operators/op_quantize.cpp b/backends/cadence/fusion_g3/operators/op_quantize.cpp
@@ -31,7 +31,7 @@ enum datatype { Ushort = 20, Bits4u = 21, Bits4 = 22 };
  */
 namespace cadence {
 namespace impl {
-namespace FusionG3 {
+namespace G3 {
 namespace native {
 
 namespace {
@@ -802,6 +802,6 @@ Tensor& quantize_per_token_out(
 }
 
 } // namespace native
-} // namespace FusionG3
+} // namespace G3
 } // namespace impl
-} // namespace cadence
+} // namespace cadence
diff --git a/backends/cadence/fusion_g3/operators/op_softmax.cpp b/backends/cadence/fusion_g3/operators/op_softmax.cpp
@@ -24,7 +24,7 @@ namespace impl {
 namespace G3 {
 namespace native {
 
-Tensor& softmax_out(
+Tensor& _softmax_out(
     KernelRuntimeContext& ctx,
     const Tensor& in,
     int64_t dim,
@@ -112,4 +112,4 @@ Tensor& softmax_out(
 } // namespace native
 } // namespace G3
 } // namespace impl
-} // namespace cadence
+} // namespace cadence
diff --git a/backends/cadence/fusion_g3/operators/targets.bzl b/backends/cadence/fusion_g3/operators/targets.bzl
@@ -0,0 +1,35 @@
+load("@fbsource//tools/build_defs:platform_defs.bzl", "CXX")
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    """Defines targets that should be shared between fbcode and xplat.
+
+    The directory containing this targets.bzl file should also contain both
+    TARGETS and BUCK files that call this function.
+    """
+
+    # Define build targets for all operators registered in the tables above.
+
+    runtime.cxx_library(
+        name = "cadence_g3_ops",
+        srcs = glob([
+            "*.cpp",
+        ]),
+        platforms = CXX,
+        deps = [
+            "//executorch/kernels/portable/cpu/util:all_deps",
+            "//executorch/kernels/portable/cpu/pattern:all_deps",
+            "//executorch/runtime/kernel:kernel_includes",
+            "//executorch/kernels/portable/cpu:scalar_utils",
+            "fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
+            "fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
+        ],
+        visibility = [
+            "//executorch/backends/cadence/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+        exported_deps = [
+            "fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
+            "fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
+        ],
+    )