14
14
15
15
16
16
class RUNTIMES (Enum ):
17
+ SYCL_PREVIEW = "syclpreview"
17
18
SYCL = "sycl"
18
19
LEVEL_ZERO = "l0"
19
20
UR = "ur"
20
21
21
22
22
23
def runtime_to_name (runtime : RUNTIMES ) -> str :
23
24
return {
25
+ RUNTIMES .SYCL_PREVIEW : "SYCL Preview" ,
24
26
RUNTIMES .SYCL : "SYCL" ,
25
27
RUNTIMES .LEVEL_ZERO : "Level Zero" ,
26
28
RUNTIMES .UR : "Unified Runtime" ,
@@ -29,6 +31,7 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
29
31
30
32
def runtime_to_tag_name (runtime : RUNTIMES ) -> str :
31
33
return {
34
+ RUNTIMES .SYCL_PREVIEW : "SYCL" ,
32
35
RUNTIMES .SYCL : "SYCL" ,
33
36
RUNTIMES .LEVEL_ZERO : "L0" ,
34
37
RUNTIMES .UR : "UR" ,
@@ -46,7 +49,7 @@ def git_url(self) -> str:
46
49
return "https://github.com/intel/compute-benchmarks.git"
47
50
48
51
def git_hash (self ) -> str :
49
- return "420842fc3f0c01aac7b328bf192c25d3e7b9fd9e "
52
+ return "9c1ed6fd59a7a40f8829251df4b5c0d847591183 "
50
53
51
54
def setup (self ):
52
55
if options .sycl is None :
@@ -107,10 +110,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
107
110
),
108
111
}
109
112
110
- def enabled_runtimes (self , supported_runtimes = None ):
113
+ def enabled_runtimes (self , supported_runtimes = None , extra_runtimes = None ):
111
114
# all runtimes in the RUNTIMES enum
112
115
runtimes = supported_runtimes or list (RUNTIMES )
113
116
117
+ # filter out SYCL_PREVIEW which is not supported by default in all benchmarks
118
+ runtimes = [r for r in runtimes if r != RUNTIMES .SYCL_PREVIEW ]
119
+
120
+ if extra_runtimes is not None :
121
+ runtimes .extend (extra_runtimes )
122
+
114
123
# Filter out UR if not available
115
124
if options .ur is None :
116
125
runtimes = [r for r in runtimes if r != RUNTIMES .UR ]
@@ -131,21 +140,17 @@ def benchmarks(self) -> list[Benchmark]:
131
140
benches = []
132
141
133
142
# Add SubmitKernel benchmarks using loops
134
- for runtime in self .enabled_runtimes ():
143
+ for runtime in self .enabled_runtimes (extra_runtimes = [ RUNTIMES . SYCL_PREVIEW ] ):
135
144
for in_order_queue in [0 , 1 ]:
136
145
for measure_completion in [0 , 1 ]:
137
- for enqueue_functions in [0 , 1 ]:
138
- # only SYCL backend supports enqueue functions
139
- if enqueue_functions == 1 and runtime != RUNTIMES .SYCL :
140
- continue
141
-
146
+ for use_events in [0 , 1 ]:
142
147
benches .append (
143
148
SubmitKernel (
144
149
self ,
145
150
runtime ,
146
151
in_order_queue ,
147
152
measure_completion ,
148
- enqueue_functions ,
153
+ use_events ,
149
154
)
150
155
)
151
156
@@ -305,13 +310,11 @@ def teardown(self):
305
310
306
311
307
312
class SubmitKernel (ComputeBenchmark ):
308
- def __init__ (
309
- self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , EnqueueFunctions = 0
310
- ):
313
+ def __init__ (self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , UseEvents = 0 ):
311
314
self .ioq = ioq
312
315
self .runtime = runtime
313
316
self .MeasureCompletion = MeasureCompletion
314
- self .EnqueueFunctions = EnqueueFunctions
317
+ self .UseEvents = UseEvents
315
318
super ().__init__ (
316
319
bench , f"api_overhead_benchmark_{ runtime .value } " , "SubmitKernel"
317
320
)
@@ -322,25 +325,30 @@ def get_tags(self):
322
325
def name (self ):
323
326
order = "in order" if self .ioq else "out of order"
324
327
completion_str = " with measure completion" if self .MeasureCompletion else ""
325
- enqueue_str = " using eventless SYCL enqueue" if self .EnqueueFunctions else ""
326
- return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { enqueue_str } "
327
328
328
- def explicit_group (self ):
329
- # make eventless enqueue its own group, since only SYCL supports this mode
330
- if self .EnqueueFunctions :
331
- return "Submit Kernel using eventless SYCL enqueue"
329
+ # this needs to be inversed (i.e., using events is empty string)
330
+ # to match the existing already stored results
331
+ events_str = " not using events" if not self .UseEvents else ""
332
+
333
+ return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } "
332
334
335
+ def explicit_group (self ):
333
336
order = "In Order" if self .ioq else "Out Of Order"
334
337
completion_str = " With Completion" if self .MeasureCompletion else ""
335
- return f"SubmitKernel { order } { completion_str } "
338
+
339
+ # this needs to be inversed (i.e., using events is empty string)
340
+ # to match the existing already stored results
341
+ events_str = " not using events" if not self .UseEvents else ""
342
+
343
+ return f"SubmitKernel { order } { completion_str } { events_str } "
336
344
337
345
def description (self ) -> str :
338
346
order = "in-order" if self .ioq else "out-of-order"
339
347
runtime_name = runtime_to_name (self .runtime )
340
348
341
- completion_desc = ""
342
- if self .runtime == RUNTIMES . UR :
343
- completion_desc = f", { 'including' if self . MeasureCompletion else 'excluding' } kernel completion time"
349
+ completion_desc = completion_desc = (
350
+ f", { 'including' if self .MeasureCompletion else 'excluding' } kernel completion time"
351
+ )
344
352
345
353
return (
346
354
f"Measures CPU time overhead of submitting { order } kernels through { runtime_name } API{ completion_desc } . "
@@ -353,13 +361,12 @@ def range(self) -> tuple[float, float]:
353
361
def bin_args (self ) -> list [str ]:
354
362
return [
355
363
f"--Ioq={ self .ioq } " ,
356
- "--DiscardEvents=0" ,
357
364
f"--MeasureCompletion={ self .MeasureCompletion } " ,
358
365
"--iterations=100000" ,
359
366
"--Profiling=0" ,
360
367
"--NumKernels=10" ,
361
368
"--KernelExecTime=1" ,
362
- f"--EnqueueFunctions ={ self .EnqueueFunctions } " ,
369
+ f"--UseEvents ={ self .UseEvents } " ,
363
370
]
364
371
365
372
@@ -620,6 +627,9 @@ def bin_args(self) -> list[str]:
620
627
]
621
628
622
629
630
+ # TODO: once L0 SubmitGraph exists, this needs to be cleaned up split benchmarks into more groups,
631
+ # set all the parameters (NoEvents 0/1, which should get inverted into UseEvents) and
632
+ # unify the benchmark naming scheme with SubmitKernel.
623
633
class GraphApiSubmitGraph (ComputeBenchmark ):
624
634
def __init__ (
625
635
self , bench , runtime : RUNTIMES , inOrderQueue , numKernels , measureCompletionTime
@@ -659,6 +669,7 @@ def bin_args(self) -> list[str]:
659
669
f"--InOrderQueue={ self .inOrderQueue } " ,
660
670
"--Profiling=0" ,
661
671
"--KernelExecutionTime=1" ,
672
+ "--NoEvents=1" , # not all implementations support NoEvents=0
662
673
]
663
674
664
675
0 commit comments