pytorch
diff --git a/‎backends/arm/arm_backend.py
Lines changed: 7 additions & 3 deletions b/‎backends/arm/arm_backend.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎backends/arm/operators/op_placeholder.py
Lines changed: 2 additions & 1 deletion b/‎backends/arm/operators/op_placeholder.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/test/runner_utils.py
Lines changed: 29 additions & 8 deletions b/‎backends/arm/test/runner_utils.py
Lines changed: 29 additions & 8 deletions
diff --git a/‎backends/arm/tosa_quant_utils.py
Lines changed: 31 additions & 1 deletion b/‎backends/arm/tosa_quant_utils.py
Lines changed: 31 additions & 1 deletion
diff --git a/‎backends/transforms/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/transforms/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/partitioner/supported_ops.py
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/partitioner/supported_ops.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/runtime/gen_vulkan_spv.py
Lines changed: 23 additions & 2 deletions b/‎backends/vulkan/runtime/gen_vulkan_spv.py
Lines changed: 23 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
Lines changed: 2 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Lines changed: 2 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/vk_api/QueryPool.cpp
Lines changed: 17 additions & 0 deletions b/‎backends/vulkan/runtime/vk_api/QueryPool.cpp
Lines changed: 17 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/vk_api/QueryPool.h
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/runtime/vk_api/QueryPool.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/test/op_tests/cases.py
Lines changed: 16 additions & 0 deletions b/‎backends/vulkan/test/op_tests/cases.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎backends/vulkan/test/test_vulkan_delegate.py
Lines changed: 19 additions & 0 deletions b/‎backends/vulkan/test/test_vulkan_delegate.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎backends/vulkan/tools/gpuinfo/TARGETS
Lines changed: 49 additions & 0 deletions b/‎backends/vulkan/tools/gpuinfo/TARGETS
Lines changed: 49 additions & 0 deletions
@@ -17,8 +17,8 @@
 from executorch.backends.arm.arm_vela import vela_compile
 from executorch.backends.arm.operators.node_visitor import get_node_visitors
 from executorch.backends.arm.operators.op_placeholder import process_placeholder
-from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import is_quant_node
+from executorch.backends.arm.tosa_mapping import map_dtype, TosaArg
+from executorch.backends.arm.tosa_quant_utils import get_quant_node_dtype, is_quant_node
 from executorch.backends.arm.tosa_utils import (
     dbg_fail,
     dbg_tosa_dump,
@@ -280,7 +280,11 @@ def preprocess(  # noqa: C901
                         if is_permute_node_before_addmm(node)
                         else output.shape
                     ),
-                    ts.DType.INT8 if is_quant_node(node) else output.dtype,
+                    (
+                        map_dtype(get_quant_node_dtype(node))
+                        if is_quant_node(node)
+                        else output.dtype
+                    ),
                 )
 
                 # Visiting each Node
 
@@ -8,6 +8,7 @@
 import torch
 from executorch.backends.arm.tosa_mapping import TosaArg
 from executorch.backends.arm.tosa_quant_utils import (
+    get_quant_arg_dtype,
     get_quant_node_args,
     is_quant_arg,
     q_op,
@@ -166,7 +167,7 @@ def process_placeholder(
         tensor = ts.TosaSerializerTensor(
             inputs[0].name,
             input_shape,
-            ts.DType.INT8 if is_quant_arg(node) else inputs[0].dtype,
+            get_quant_arg_dtype(node) if is_quant_arg(node) else inputs[0].dtype,
             data=None,
             placeholderFilename=inputs[0].name + ".npy",
         )
 
@@ -23,13 +23,24 @@
 
 
 class QuantizationParams:
-    __slots__ = ["node_name", "zp", "scale"]
+    __slots__ = ["node_name", "zp", "scale", "qmin", "qmax", "dtype"]
 
     # todo: zps and scales can be per tensors or per channel => a list??
-    def __init__(self, node_name: str, zp: int, scale: float):
+    def __init__(
+        self,
+        node_name: str,
+        zp: int,
+        scale: float,
+        qmin: int,
+        qmax: int,
+        dtype: torch.dtype,
+    ):
         self.node_name = node_name  # not need I think, but good for error check
         self.zp = zp
         self.scale = scale
+        self.qmin = qmin
+        self.qmax = qmax
+        self.dtype = dtype
 
 
 def _get_input_names(program: ExportedProgram) -> list[str]:
@@ -74,7 +85,12 @@ def _get_input_quantization_params(
             and node.args[0].name in input_names
         ):
             qp = QuantizationParams(
-                node_name=node.args[0].name, scale=node.args[1], zp=node.args[2]
+                node_name=node.args[0].name,
+                scale=node.args[1],
+                zp=node.args[2],
+                qmin=node.args[3],
+                qmax=node.args[4],
+                dtype=node.args[5],
             )
             quant_params.append(qp)
             if (
@@ -122,7 +138,12 @@ def _get_output_quantization_params(
             and node == output_node.args[0][0]
         ):
             quant_params = QuantizationParams(
-                node_name=node.args[0].name, scale=node.args[1], zp=node.args[2]
+                node_name=node.args[0].name,
+                scale=node.args[1],
+                zp=node.args[2],
+                qmin=node.args[3],
+                qmax=node.args[4],
+                dtype=node.args[5],
             )
             break  # break early, there's only one output node
     if quant_params is None:
@@ -376,13 +397,13 @@ def prep_data_for_save(
         assert (
             quant_param.node_name == input_name
         ), "These quantization params do not match the input tensor name"
-        int8_max = np.iinfo(np.int8).max
-        int8_min = np.iinfo(np.int8).min
         data_np = (
             ((data_np / np.float32(quant_param.scale)) + quant_param.zp)
             .round()
-            .clip(int8_min, int8_max)
-            .astype(np.int8)
+            .clip(quant_param.qmin, quant_param.qmax)
+            .astype(
+                f"{quant_param.dtype}".replace("torch.", "")
+            )  # Use string format of dtype to convert to numpy dtype
         )
     return data_np
 
 
@@ -10,7 +10,7 @@
 
 import serializer.tosa_serializer as ts
 import torch.fx
-from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_mapping import map_dtype, TosaArg
 from executorch.exir.dialects._ops import ops as exir_ops
 from serializer.tosa_serializer import TosaOp, TosaSerializerTensor
 
@@ -45,11 +45,41 @@ def is_quant_node(node: torch.fx.Node):
     )
 
 
+def get_quant_node_dtype(node: torch.fx.Node):
+    if "tosa" in node.target.__name__:
+        return node.meta["val"].dtype
+
+    if node.target in dq_q_ops:
+        return node.args[5]
+
+    # if not a tosa node, nor a q/dq op, walk the graph until we find a q op
+    consumer_node = list(node.users)[0]
+    while True:
+        if consumer_node.target in dq_q_ops:
+            return consumer_node.args[5]
+
+        # Try to move on to the next node
+        if len(consumer_node.users) == 0:
+            raise RuntimeError("No quantized node found in graph")
+        consumer_node = list(consumer_node.users)[0]
+
+
 def is_quant_arg(arg):
     consumer_node = list(arg.users)[0]
     return consumer_node.target == q_op
 
 
+def get_quant_arg_dtype(node: torch.fx.Node):
+    consumer_node = list(node.users)[0]
+
+    # Get type of quant node, args differ from per_tensor and per_channel.
+    if consumer_node.target == q_op:
+        if is_quant_arg(node):
+            return map_dtype(consumer_node.args[5])
+        else:
+            raise RuntimeError("Quantization argument not found")
+
+
 def get_quant_node_args(node: torch.fx.Node):
     """
     Get the quantization parameters from a quant node.
 
@@ -120,6 +120,7 @@ runtime.python_library(
         "//executorch/backends/...",
         "//executorch/examples/...",
         "//executorch/extension/llm/...",
+        "@EXECUTORCH_CLIENTS",
     ],
     deps = [
         "//caffe2:torch",
 
@@ -45,6 +45,7 @@ def __contains__(self, op):
 BINARY_OPS = [
     exir_ops.edge.aten.add.Tensor,
     exir_ops.edge.aten.sub.Tensor,
+    exir_ops.edge.aten.minimum.default,
     exir_ops.edge.aten.mul.Tensor,
     exir_ops.edge.aten.div.Tensor,
     exir_ops.edge.aten.div.Tensor_mode,
 
@@ -525,8 +525,29 @@ def generateVariantCombinations(
             if param_name not in exclude_params:
                 param_values = []
                 for value in value_list:
-                    suffix = value.get("SUFFIX", value["VALUE"])
-                    param_values.append((param_name, suffix, value["VALUE"]))
+                    if "RANGE" in value:
+                        value_range = value["RANGE"]
+                        suffix = value.get("SUFFIX", "")
+                        if isinstance(value_range, list) and len(value_range) == 2:
+                            for i in range(value_range[0], value_range[1] + 1):
+                                curr_suffix = (
+                                    suffix + "_" + str(i) if suffix else str(i)
+                                )
+                                param_values.append((param_name, curr_suffix, str(i)))
+                        else:
+                            raise ValueError(
+                                f"{value['RANGE']} is not a valid range. Must be in format [start, end] (inclusive)."
+                            )
+
+                    elif "VALUE" in value:
+                        suffix = value.get("SUFFIX", value["VALUE"])
+                        param_values.append((param_name, suffix, value["VALUE"]))
+
+                    else:
+                        raise KeyError(
+                            "Parameter must be 'VALUE: string' or 'RANGE: [a, b]'"
+                        )
+
                 all_iterated_params.append(param_values)
 
         return list(product(*all_iterated_params))
 
@@ -28,3 +28,5 @@ binary_op:
       OPERATOR: pow(X, Y)
     - NAME: binary_floor_divide
       OPERATOR: floor(X / Y)
+    - NAME: binary_minimum
+      OPERATOR: min(X, Y)
@@ -118,6 +118,7 @@ DEFINE_BINARY_OP_WITH_ALPHA_FN(floor_divide);
 DEFINE_BINARY_OP_FN(mul);
 DEFINE_BINARY_OP_FN(div);
 DEFINE_BINARY_OP_FN(pow);
+DEFINE_BINARY_OP_FN(minimum);
 
 REGISTER_OPERATORS {
   VK_REGISTER_OP(aten.add.Tensor, add);
@@ -126,6 +127,7 @@ REGISTER_OPERATORS {
   VK_REGISTER_OP(aten.div.Tensor, div);
   VK_REGISTER_OP(aten.div.Tensor_mode, floor_divide);
   VK_REGISTER_OP(aten.pow.Tensor_Tensor, pow);
+  VK_REGISTER_OP(aten.minimum.default, minimum);
 }
 
 } // namespace vkcompute
@@ -248,5 +248,22 @@ unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
   }
   return 0;
 }
+
+unsigned long QueryPool::get_mean_shader_ns(std::string kernel_name) {
+  uint64_t total_ns = 0;
+  uint32_t count = 0;
+  for (ShaderDuration& entry : shader_durations_) {
+    if (entry.kernel_name == kernel_name) {
+      std::chrono::duration<size_t, std::nano> exec_duration_ns(
+          entry.execution_duration_ns);
+      total_ns += exec_duration_ns.count();
+      count++;
+    }
+  }
+  if (count == 0) {
+    return 0;
+  }
+  return total_ns / count;
+}
 } // namespace vkapi
 } // namespace vkcompute
@@ -102,6 +102,7 @@ class QueryPool final {
   std::string generate_string_report();
   void print_results();
   unsigned long get_total_shader_ns(std::string kernel_name);
+  unsigned long get_mean_shader_ns(std::string kernel_name);
 
   operator bool() const {
     return querypool_ != VK_NULL_HANDLE;
 
@@ -1022,3 +1022,19 @@ def get_constant_pad_nd_inputs():
         ]
     )
     return test_suite
+
+
+@register_test_suite("aten.minimum.default")
+def get_minimum_inputs():
+    test_suite = VkTestSuite(
+        [
+            ((M1, M2), (M2)),
+            ((M1, M2), (M1, M2)),
+            ((M1, M2, M), (M2, M)),
+            ((M1, M1, S1, S2), (M1, M1, S1, S2)),
+            ((S1, S1, S2, S), (S1, S2, S)),
+            ((M1, S1, S2), (L, M1, S1, S2)),
+            ((S1, S2), (L, M1, S1, S2)),
+        ]
+    )
+    return test_suite
@@ -1072,6 +1072,25 @@ def forward(self, x):
             memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
+    def test_vulkan_backend_minimum(self):
+        class MinimumModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x, y):
+                return torch.minimum(x, y)
+
+        sample_inputs = (
+            torch.rand(size=(3, 5, 6, 4), dtype=torch.float32),
+            torch.rand(size=(6, 4), dtype=torch.float32),
+        )
+
+        self.lower_module_and_test_output(
+            MinimumModule(),
+            sample_inputs,
+            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
+        )
+
     def test_vulkan_backend_reshape(self):
         class ReshapeModule(torch.nn.Module):
             def __init__(self):
 
@@ -0,0 +1,49 @@
+load("@fbcode_macros//build_defs:native_rules.bzl", "buck_filegroup")
+load("@fbsource//tools/build_defs:fb_xplat_cxx_binary.bzl", "fb_xplat_cxx_binary")
+load(
+    "@fbsource//tools/build_defs:platform_defs.bzl",
+    "ANDROID",
+)
+load(
+    "@fbsource//xplat/executorch/backends/vulkan:targets.bzl",
+    "vulkan_spv_shader_lib",
+)
+
+oncall("executorch")
+
+buck_filegroup(
+    name = "gpuinfo_shaders",
+    srcs = glob([
+        "glsl/*",
+    ]),
+    visibility = [
+        "PUBLIC",
+    ],
+)
+
+vulkan_spv_shader_lib(
+    name = "gpuinfo_shader_lib",
+    spv_filegroups = {
+        ":gpuinfo_shaders": "glsl",
+    },
+)
+
+fb_xplat_cxx_binary(
+    name = "vulkan_gpuinfo",
+    srcs = glob([
+        "**/*.cpp",
+    ]),
+    headers = glob([
+        "**/*.h",
+    ]),
+    header_namespace = "/include",
+    include_directories = ["/include"],
+    platforms = ANDROID,
+    raw_headers = glob([
+        "**/*.h",
+    ]),
+    deps = [
+        ":gpuinfo_shader_lib",
+        "//executorch/backends/vulkan:vulkan_graph_runtime",
+    ],
+)