8
8
import io
9
9
from utils .utils import run , git_clone , create_build_path
10
10
from .base import Benchmark , Suite
11
- from .result import Result
11
+ from utils .result import Result
12
12
from options import options
13
13
from enum import Enum
14
14
15
+
15
16
class ComputeBench (Suite ):
16
17
def __init__ (self , directory ):
17
18
self .directory = directory
@@ -47,9 +48,8 @@ def setup(self):
47
48
f"-Dunified-runtime_DIR={ options .ur } /lib/cmake/unified-runtime" ,
48
49
]
49
50
50
- print (f"{ self .__class__ .__name__ } : Run { configure_command } " )
51
51
run (configure_command , add_sycl = True )
52
- print ( f" { self . __class__ . __name__ } : Run cmake --build { build_path } -j" )
52
+
53
53
run (f"cmake --build { build_path } -j" , add_sycl = True )
54
54
55
55
self .built = True
@@ -73,16 +73,6 @@ def benchmarks(self) -> list[Benchmark]:
73
73
ExecImmediateCopyQueue (self , 0 , 1 , "Device" , "Device" , 1024 ),
74
74
ExecImmediateCopyQueue (self , 1 , 1 , "Device" , "Host" , 1024 ),
75
75
VectorSum (self ),
76
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
77
- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 , 1 ),
78
- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 , 1 ),
79
- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 , 1 ),
80
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
81
- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 , 1 ),
82
- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 , 1 ),
83
- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 , 1 ),
84
- MemcpyExecute (self , 4096 , 1 , 1024 , 10 , 0 , 1 , 0 ),
85
- MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
86
76
GraphApiSinKernelGraph (self , RUNTIMES .SYCL , 0 , 5 ),
87
77
GraphApiSinKernelGraph (self , RUNTIMES .SYCL , 1 , 5 ),
88
78
GraphApiSinKernelGraph (self , RUNTIMES .SYCL , 0 , 100 ),
@@ -98,6 +88,16 @@ def benchmarks(self) -> list[Benchmark]:
98
88
SubmitKernelUR (self , 0 , 0 ),
99
89
SubmitKernelUR (self , 1 , 0 ),
100
90
SubmitKernelUR (self , 1 , 1 ),
91
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
92
+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 , 1 ),
93
+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 , 1 ),
94
+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 , 1 ),
95
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
96
+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 , 1 ),
97
+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 , 1 ),
98
+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 , 1 ),
99
+ MemcpyExecute (self , 4096 , 1 , 1024 , 10 , 0 , 1 , 0 ),
100
+ MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
101
101
GraphApiSinKernelGraph (self , RUNTIMES .UR , 0 , 5 ),
102
102
GraphApiSinKernelGraph (self , RUNTIMES .UR , 1 , 5 ),
103
103
GraphApiSinKernelGraph (self , RUNTIMES .UR , 0 , 100 ),
@@ -136,6 +136,9 @@ def setup(self):
136
136
def explicit_group (self ):
137
137
return ""
138
138
139
+ def description (self ) -> str :
140
+ return ""
141
+
139
142
def run (self , env_vars ) -> list [Result ]:
140
143
command = [
141
144
f"{ self .benchmark_bin } " ,
@@ -167,6 +170,7 @@ def run(self, env_vars) -> list[Result]:
167
170
env = env_vars ,
168
171
stdout = result ,
169
172
unit = parse_unit_type (unit ),
173
+ description = self .description ()
170
174
)
171
175
)
172
176
return ret
@@ -221,6 +225,13 @@ def bin_args(self) -> list[str]:
221
225
"--KernelExecTime=1" ,
222
226
]
223
227
228
+ def description (self ) -> str :
229
+ order = "in-order" if self .ioq else "out-of-order"
230
+ return (
231
+ f"Measures CPU time overhead of submitting { order } kernels through SYCL API."
232
+ "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
233
+ )
234
+
224
235
225
236
class SubmitKernelUR (ComputeBenchmark ):
226
237
def __init__ (self , bench , ioq , measureCompletion ):
@@ -237,6 +248,15 @@ def name(self):
237
248
def explicit_group (self ):
238
249
return "SubmitKernel"
239
250
251
+ def description (self ) -> str :
252
+ order = "in-order" if self .ioq else "out-of-order"
253
+ completion = "including" if self .measureCompletion else "excluding"
254
+ return (
255
+ f"Measures CPU time overhead of submitting { order } kernels through Unified Runtime API, "
256
+ f"{ completion } kernel completion time. Uses 10 simple kernels with minimal execution time "
257
+ f"to isolate API overhead."
258
+ )
259
+
240
260
def bin_args (self ) -> list [str ]:
241
261
return [
242
262
f"--Ioq={ self .ioq } " ,
@@ -261,6 +281,14 @@ def name(self):
261
281
def explicit_group (self ):
262
282
return "SubmitKernel"
263
283
284
+ def description (self ) -> str :
285
+ order = "in-order" if self .ioq else "out-of-order"
286
+ return (
287
+ f"Measures CPU time overhead of submitting { order } kernels through Level Zero API. "
288
+ f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead "
289
+ f"from execution time."
290
+ )
291
+
264
292
def bin_args (self ) -> list [str ]:
265
293
return [
266
294
f"--Ioq={ self .ioq } " ,
@@ -286,6 +314,14 @@ def name(self):
286
314
order = "in order" if self .ioq else "out of order"
287
315
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } "
288
316
317
+ def description (self ) -> str :
318
+ order = "in-order" if self .ioq else "out-of-order"
319
+ operation = "copy-only" if self .isCopyOnly else "copy and command submission"
320
+ return (
321
+ f"Measures SYCL { order } queue overhead for { operation } from { self .source } to "
322
+ f"{ self .destination } memory with { self .size } bytes. Tests immediate execution overheads."
323
+ )
324
+
289
325
def bin_args (self ) -> list [str ]:
290
326
return [
291
327
"--iterations=100000" ,
@@ -309,6 +345,13 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
309
345
def name (self ):
310
346
return f"memory_benchmark_sycl QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } "
311
347
348
+ def description (self ) -> str :
349
+ operation = "copy-only" if self .isCopyOnly else "copy and command submission"
350
+ return (
351
+ f"Measures SYCL in-order queue memory copy performance for { operation } from "
352
+ f"{ self .source } to { self .destination } with { self .size } bytes, executed 100 times per iteration."
353
+ )
354
+
312
355
def bin_args (self ) -> list [str ]:
313
356
return [
314
357
"--iterations=10000" ,
@@ -330,6 +373,12 @@ def __init__(self, bench, source, destination, size):
330
373
def name (self ):
331
374
return f"memory_benchmark_sycl QueueMemcpy from { self .source } to { self .destination } , size { self .size } "
332
375
376
+ def description (self ) -> str :
377
+ return (
378
+ f"Measures general SYCL queue memory copy performance from { self .source } to "
379
+ f"{ self .destination } with { self .size } bytes per operation."
380
+ )
381
+
333
382
def bin_args (self ) -> list [str ]:
334
383
return [
335
384
"--iterations=10000" ,
@@ -349,6 +398,12 @@ def __init__(self, bench, type, size, placement):
349
398
def name (self ):
350
399
return f"memory_benchmark_sycl StreamMemory, placement { self .placement } , type { self .type } , size { self .size } "
351
400
401
+ def description (self ) -> str :
402
+ return (
403
+ f"Measures { self .placement } memory bandwidth using { self .type } pattern with "
404
+ f"{ self .size } bytes. Higher values (GB/s) indicate better performance."
405
+ )
406
+
352
407
# measurement is in GB/s
353
408
def lower_is_better (self ):
354
409
return False
@@ -362,6 +417,7 @@ def bin_args(self) -> list[str]:
362
417
"--useEvents=0" ,
363
418
"--contents=Zeros" ,
364
419
"--multiplier=1" ,
420
+ "--vectorSize=1" ,
365
421
]
366
422
367
423
@@ -372,6 +428,12 @@ def __init__(self, bench):
372
428
def name (self ):
373
429
return f"miscellaneous_benchmark_sycl VectorSum"
374
430
431
+ def description (self ) -> str :
432
+ return (
433
+ "Measures performance of vector addition across 3D grid (512x256x256 elements) "
434
+ "using SYCL."
435
+ )
436
+
375
437
def bin_args (self ) -> list [str ]:
376
438
return [
377
439
"--iterations=1000" ,
@@ -408,6 +470,16 @@ def name(self):
408
470
+ (" without events" if not self .useEvents else "" )
409
471
)
410
472
473
+ def description (self ) -> str :
474
+ src_type = "device" if self .srcUSM == 1 else "host"
475
+ dst_type = "device" if self .dstUSM == 1 else "host"
476
+ events = "with" if self .useEvents else "without"
477
+ return (
478
+ f"Measures multithreaded memory copy performance with { self .numThreads } threads "
479
+ f"each performing { self .numOpsPerThread } operations on { self .allocSize } bytes "
480
+ f"from { src_type } to { dst_type } memory { events } events."
481
+ )
482
+
411
483
def bin_args (self ) -> list [str ]:
412
484
return [
413
485
"--Ioq=1" ,
@@ -441,6 +513,13 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
441
513
def explicit_group (self ):
442
514
return f"SinKernelGraph { self .numKernels } "
443
515
516
+ def description (self ) -> str :
517
+ execution = "using graphs" if self .withGraphs else "without graphs"
518
+ return (
519
+ f"Measures { self .runtime .value .upper ()} performance when executing { self .numKernels } "
520
+ f"sin kernels { execution } . Tests overhead and benefits of graph-based execution."
521
+ )
522
+
444
523
def name (self ):
445
524
return f"graph_api_benchmark_{ self .runtime .value } SinKernelGraph graphs:{ self .withGraphs } , numKernels:{ self .numKernels } "
446
525
@@ -452,28 +531,3 @@ def bin_args(self) -> list[str]:
452
531
"--withCopyOffload=1" ,
453
532
"--immediateAppendCmdList=0" ,
454
533
]
455
-
456
-
457
- class GraphApiSubmitExecGraph (ComputeBenchmark ):
458
- def __init__ (self , bench , ioq , submit , numKernels ):
459
- self .ioq = ioq
460
- self .submit = submit
461
- self .numKernels = numKernels
462
- super ().__init__ (bench , "graph_api_benchmark_sycl" , "SubmitExecGraph" )
463
-
464
- def name (self ):
465
- return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{ self .ioq } , submit:{ self .submit } , numKernels:{ self .numKernels } "
466
-
467
- def explicit_group (self ):
468
- if self .submit :
469
- return "SubmitGraph"
470
- else :
471
- return "ExecGraph"
472
-
473
- def bin_args (self ) -> list [str ]:
474
- return [
475
- "--iterations=100" ,
476
- f"--measureSubmit={ self .submit } " ,
477
- f"--ioq={ self .ioq } " ,
478
- f"--numKernels={ self .numKernels } " ,
479
- ]
0 commit comments