[CI][Benchmarks] update to latest compute-benchmarks, add syclpreview (#18176)

pbalcer · web-flow · commit 679e66d5af90 · 2025-04-25T09:40:45.000+01:00
This patch updates to the latest revision of compute-benchmarks and
updates the scripts to match.

There two important changes:
- the addition of syclpreview SubmitKernel benchmarks
- introduction of "eventless" variants of SubmitKernel for L0 and UR.
These will now be grouped together for easy comparison.

Signed-off-by: Piotr Balcer &lt;piotr.balcer@intel.com&gt;
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
@@ -14,13 +14,15 @@
 
 
 class RUNTIMES(Enum):
+    SYCL_PREVIEW = "syclpreview"
     SYCL = "sycl"
     LEVEL_ZERO = "l0"
     UR = "ur"
 
 
 def runtime_to_name(runtime: RUNTIMES) -> str:
     return {
+        RUNTIMES.SYCL_PREVIEW: "SYCL Preview",
         RUNTIMES.SYCL: "SYCL",
         RUNTIMES.LEVEL_ZERO: "Level Zero",
         RUNTIMES.UR: "Unified Runtime",
@@ -29,6 +31,7 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
 
 def runtime_to_tag_name(runtime: RUNTIMES) -> str:
     return {
+        RUNTIMES.SYCL_PREVIEW: "SYCL",
         RUNTIMES.SYCL: "SYCL",
         RUNTIMES.LEVEL_ZERO: "L0",
         RUNTIMES.UR: "UR",
@@ -46,7 +49,7 @@ def git_url(self) -> str:
         return "https://github.com/intel/compute-benchmarks.git"
 
     def git_hash(self) -> str:
-        return "420842fc3f0c01aac7b328bf192c25d3e7b9fd9e"
+        return "9c1ed6fd59a7a40f8829251df4b5c0d847591183"
 
     def setup(self):
         if options.sycl is None:
@@ -107,10 +110,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             ),
         }
 
-    def enabled_runtimes(self, supported_runtimes=None):
+    def enabled_runtimes(self, supported_runtimes=None, extra_runtimes=None):
         # all runtimes in the RUNTIMES enum
         runtimes = supported_runtimes or list(RUNTIMES)
 
+        # filter out SYCL_PREVIEW which is not supported by default in all benchmarks
+        runtimes = [r for r in runtimes if r != RUNTIMES.SYCL_PREVIEW]
+
+        if extra_runtimes is not None:
+            runtimes.extend(extra_runtimes)
+
         # Filter out UR if not available
         if options.ur is None:
             runtimes = [r for r in runtimes if r != RUNTIMES.UR]
@@ -131,21 +140,17 @@ def benchmarks(self) -> list[Benchmark]:
         benches = []
 
         # Add SubmitKernel benchmarks using loops
-        for runtime in self.enabled_runtimes():
+        for runtime in self.enabled_runtimes(extra_runtimes=[RUNTIMES.SYCL_PREVIEW]):
             for in_order_queue in [0, 1]:
                 for measure_completion in [0, 1]:
-                    for enqueue_functions in [0, 1]:
-                        # only SYCL backend supports enqueue functions
-                        if enqueue_functions == 1 and runtime != RUNTIMES.SYCL:
-                            continue
-
+                    for use_events in [0, 1]:
                         benches.append(
                             SubmitKernel(
                                 self,
                                 runtime,
                                 in_order_queue,
                                 measure_completion,
-                                enqueue_functions,
+                                use_events,
                             )
                         )
 
@@ -305,13 +310,11 @@ def teardown(self):
 
 
 class SubmitKernel(ComputeBenchmark):
-    def __init__(
-        self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, EnqueueFunctions=0
-    ):
+    def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents=0):
         self.ioq = ioq
         self.runtime = runtime
         self.MeasureCompletion = MeasureCompletion
-        self.EnqueueFunctions = EnqueueFunctions
+        self.UseEvents = UseEvents
         super().__init__(
             bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
         )
@@ -322,25 +325,30 @@ def get_tags(self):
     def name(self):
         order = "in order" if self.ioq else "out of order"
         completion_str = " with measure completion" if self.MeasureCompletion else ""
-        enqueue_str = " using eventless SYCL enqueue" if self.EnqueueFunctions else ""
-        return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{enqueue_str}"
 
-    def explicit_group(self):
-        # make eventless enqueue its own group, since only SYCL supports this mode
-        if self.EnqueueFunctions:
-            return "Submit Kernel using eventless SYCL enqueue"
+        # this needs to be inversed (i.e., using events is empty string)
+        # to match the existing already stored results
+        events_str = " not using events" if not self.UseEvents else ""
+
+        return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}"
 
+    def explicit_group(self):
         order = "In Order" if self.ioq else "Out Of Order"
         completion_str = " With Completion" if self.MeasureCompletion else ""
-        return f"SubmitKernel {order}{completion_str}"
+
+        # this needs to be inversed (i.e., using events is empty string)
+        # to match the existing already stored results
+        events_str = " not using events" if not self.UseEvents else ""
+
+        return f"SubmitKernel {order}{completion_str}{events_str}"
 
     def description(self) -> str:
         order = "in-order" if self.ioq else "out-of-order"
         runtime_name = runtime_to_name(self.runtime)
 
-        completion_desc = ""
-        if self.runtime == RUNTIMES.UR:
-            completion_desc = f", {'including' if self.MeasureCompletion else 'excluding'} kernel completion time"
+        completion_desc = completion_desc = (
+            f", {'including' if self.MeasureCompletion else 'excluding'} kernel completion time"
+        )
 
         return (
             f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
@@ -353,13 +361,12 @@ def range(self) -> tuple[float, float]:
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
             f"--MeasureCompletion={self.MeasureCompletion}",
             "--iterations=100000",
             "--Profiling=0",
             "--NumKernels=10",
             "--KernelExecTime=1",
-            f"--EnqueueFunctions={self.EnqueueFunctions}",
+            f"--UseEvents={self.UseEvents}",
         ]
 
 
@@ -620,6 +627,9 @@ def bin_args(self) -> list[str]:
         ]
 
 
+# TODO: once L0 SubmitGraph exists, this needs to be cleaned up split benchmarks into more groups,
+# set all the parameters (NoEvents 0/1, which should get inverted into UseEvents) and
+# unify the benchmark naming scheme with SubmitKernel.
 class GraphApiSubmitGraph(ComputeBenchmark):
     def __init__(
         self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime
@@ -659,6 +669,7 @@ def bin_args(self) -> list[str]:
             f"--InOrderQueue={self.inOrderQueue}",
             "--Profiling=0",
             "--KernelExecutionTime=1",
+            "--NoEvents=1",  # not all implementations support NoEvents=0
         ]
 
 
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
@@ -60,7 +60,6 @@ def extract_timestamp(file_path: Path) -> str:
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
-
         def git_info_from_path(path: Path) -> (str, str):
             """
             Derives git repo, commit information from git repo located in path.