pytorch · Esteb37 · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024
@@ -525,8 +525,29 @@ def generateVariantCombinations(
             if param_name not in exclude_params:
                 param_values = []
                 for value in value_list:
-                    suffix = value.get("SUFFIX", value["VALUE"])
-                    param_values.append((param_name, suffix, value["VALUE"]))
+                    if "RANGE" in value:
+                        value_range = value["RANGE"]
+                        suffix = value.get("SUFFIX", "")
+                        if isinstance(value_range, list) and len(value_range) == 2:
+                            for i in range(value_range[0], value_range[1] + 1):
+                                curr_suffix = (
+                                    suffix + "_" + str(i) if suffix else str(i)
+                                )
+                                param_values.append((param_name, curr_suffix, str(i)))
+                        else:
+                            raise ValueError(
+                                f"{value['RANGE']} is not a valid range. Must be in format [start, end] (inclusive)."
+                            )
+
+                    elif "VALUE" in value:
+                        suffix = value.get("SUFFIX", value["VALUE"])
+                        param_values.append((param_name, suffix, value["VALUE"]))
+
+                    else:
+                        raise KeyError(
+                            "Parameter must be 'VALUE: string' or 'RANGE: [a, b]'"
+                        )
+
                 all_iterated_params.append(param_values)
 
         return list(product(*all_iterated_params))

@@ -248,5 +248,22 @@ unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
   }
   return 0;
 }
+
+unsigned long QueryPool::get_mean_shader_ns(std::string kernel_name) {
+  uint64_t total_ns = 0;
+  uint32_t count = 0;
+  for (ShaderDuration& entry : shader_durations_) {
+    if (entry.kernel_name == kernel_name) {
+      std::chrono::duration<size_t, std::nano> exec_duration_ns(
+          entry.execution_duration_ns);
+      total_ns += exec_duration_ns.count();
+      count++;
+    }
+  }
+  if (count == 0) {
+    return 0;
+  }
+  return total_ns / count;
+}
 } // namespace vkapi
 } // namespace vkcompute
@@ -102,6 +102,7 @@ class QueryPool final {
   std::string generate_string_report();
   void print_results();
   unsigned long get_total_shader_ns(std::string kernel_name);
+  unsigned long get_mean_shader_ns(std::string kernel_name);
 
   operator bool() const {
     return querypool_ != VK_NULL_HANDLE;

@@ -0,0 +1,49 @@
+load("@fbcode_macros//build_defs:native_rules.bzl", "buck_filegroup")
+load("@fbsource//tools/build_defs:fb_xplat_cxx_binary.bzl", "fb_xplat_cxx_binary")
+load(
+    "@fbsource//tools/build_defs:platform_defs.bzl",
+    "ANDROID",
+)
+load(
+    "@fbsource//xplat/executorch/backends/vulkan:targets.bzl",
+    "vulkan_spv_shader_lib",
+)
+
+oncall("executorch")
+
+buck_filegroup(
+    name = "gpuinfo_shaders",
+    srcs = glob([
+        "glsl/*",
+    ]),
+    visibility = [
+        "PUBLIC",
+    ],
+)
+
+vulkan_spv_shader_lib(
+    name = "gpuinfo_shader_lib",
+    spv_filegroups = {
+        ":gpuinfo_shaders": "glsl",
+    },
+)
+
+fb_xplat_cxx_binary(
+    name = "vulkan_gpuinfo",
+    srcs = glob([
+        "**/*.cpp",
+    ]),
+    headers = glob([
+        "**/*.h",
+    ]),
+    header_namespace = "/include",
+    include_directories = ["/include"],
+    platforms = ANDROID,
+    raw_headers = glob([
+        "**/*.h",
+    ]),
+    deps = [
+        ":gpuinfo_shader_lib",
+        "//executorch/backends/vulkan:vulkan_graph_runtime",
+    ],
+)
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+layout(std430) buffer;
+
+layout(set = 0, binding = 0) buffer PRECISION restrict writeonly Buffer {
+  float data[];
+}
+out_buff;
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+layout(constant_id = 3) const int NITER = 1;
+
+void main() {
+
+  $for k in range(int(NREG)):
+     float reg_data${k} = float(NITER) + ${k};
+
+  int i = 0;
+  for (; i < NITER; ++i) {
+    reg_data0 *= reg_data${int(NREG)-1};
+    $for k in range(1, int(NREG)):
+      reg_data${k} *= reg_data${k-1};
+  }
+  i = i >> 31;
+
+  $for k in range(int(NREG)):
+    out_buff.data[${k} * i] = reg_data${k};
+}
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+reg_count:
+  parameter_names_with_default_values:
+    DTYPE: float
+    STORAGE: buffer
+  generate_variant_forall:
+    NREG:
+      - RANGE: [1, 512]
+
+  shader_variants:
+    - NAME: reg_count
@@ -0,0 +1,133 @@
+/*
+ * Portions (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/*
+ * Code sourced from
+ * https://github.com/microsoft/ArchProbe/blob/main/include/stats.hpp with the
+ * following MIT license
+ *
+ * MIT License
+ *
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE
+ */
+
+#pragma once
+#include <array>
+#include <cstdint>
+
+template <typename T>
+class AvgStats {
+  T sum_ = 0;
+  uint64_t n_ = 0;
+
+ public:
+  typedef T value_t;
+
+  void push(T value) {
+    sum_ += value;
+    n_ += 1;
+  }
+  inline bool has_value() const {
+    return n_ != 0;
+  }
+  operator T() const {
+    return sum_ / n_;
+  }
+};
+
+template <typename T, size_t NTap>
+class NTapAvgStats {
+  std::array<double, NTap> hist_;
+  size_t cur_idx_;
+  bool ready_;
+
+ public:
+  typedef T value_t;
+
+  void push(T value) {
+    hist_[cur_idx_++] = value;
+    if (cur_idx_ >= NTap) {
+      cur_idx_ = 0;
+      ready_ = true;
+    }
+  }
+  inline bool has_value() const {
+    return ready_;
+  }
+  operator T() const {
+    double out = 0.0;
+    for (double x : hist_) {
+      out += x;
+    }
+    out /= NTap;
+    return out;
+  }
+};
+
+template <uint32_t NTap>
+struct DtJumpFinder {
+ private:
+  NTapAvgStats<double, NTap> time_avg_;
+  AvgStats<double> dtime_avg_;
+  double compensation_;
+  double threshold_;
+
+ public:
+  // Compensation is a tiny additive to give on delta time so that the algorithm
+  // works smoothly when a sequence of identical timing is ingested, which is
+  // pretty common in our tests. Threshold is simply how many times the new
+  // delta has to be to be recognized as a deviation.
+  DtJumpFinder(double compensation = 0.01, double threshold = 10)
+      : time_avg_(),
+        dtime_avg_(),
+        compensation_(compensation),
+        threshold_(threshold) {}
+
+  // Returns true if the delta time regarding to the last data point seems
+  // normal; returns false if it seems the new data point is too much away from
+  // the historical records.
+  bool push(double time) {
+    if (time_avg_.has_value()) {
+      double dtime = std::abs(time - time_avg_) + (compensation_ * time_avg_);
+      if (dtime_avg_.has_value()) {
+        double ddtime = std::abs(dtime - dtime_avg_);
+        if (ddtime > threshold_ * dtime_avg_) {
+          return true;
+        }
+      }
+      dtime_avg_.push(dtime);
+    }
+    time_avg_.push(time);
+    return false;
+  }
+
+  double dtime_avg() const {
+    return dtime_avg_;
+  }
+  double compensate_time() const {
+    return compensation_ * time_avg_;
+  }
+};
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/vulkan/runtime/api/api.h>
+
+using namespace vkcompute;
+using namespace api;
+
+#define QP context()->querypool()
+
+auto benchmark_on_gpu(
+    std::string shader_id,
+    uint32_t niter,
+    std::function<void()> encode_kernel) {
+  auto fence = context()->fences().get_fence();
+
+  for (int i = 0; i < niter; ++i) {
+    encode_kernel();
+  };
+
+  context()->submit_cmd_to_gpu(fence.get_submit_handle());
+  fence.wait();
+  QP.extract_results();
+  uint64_t count = QP.get_mean_shader_ns(shader_id);
+  QP.reset_state();
+  context()->flush();
+
+  return count / 1000.f;
+}
+
+void ensure_min_niter(
+    double min_time_us,
+    uint32_t& niter,
+    std::function<double()> run) {
+  const uint32_t DEFAULT_NITER = 100;
+  niter = DEFAULT_NITER;
+  for (uint32_t i = 0; i < 100; ++i) {
+    double t = run();
+    if (t > min_time_us * 0.99) {
+      return;
+    }
+    niter = uint32_t(niter * min_time_us / t);
+  }
+}