6
6
import os
7
7
import csv
8
8
import io
9
+ import copy
9
10
from utils .utils import run , git_clone , create_build_path
10
11
from .base import Benchmark , Suite
11
12
from utils .result import BenchmarkMetadata , Result
@@ -336,6 +337,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents
336
337
self .runtime = runtime
337
338
self .MeasureCompletion = MeasureCompletion
338
339
self .UseEvents = UseEvents
340
+ self .NumKernels = 10
339
341
super ().__init__ (
340
342
bench , f"api_overhead_benchmark_{ runtime .value } " , "SubmitKernel"
341
343
)
@@ -353,6 +355,16 @@ def name(self):
353
355
354
356
return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } "
355
357
358
+ def display_name (self ) -> str :
359
+ order = "in order" if self .ioq else "out of order"
360
+ info = []
361
+ if self .MeasureCompletion :
362
+ info .append ("with measure completion" )
363
+ if self .UseEvents :
364
+ info .append ("using events" )
365
+ additional_info = f" { ' ' .join (info )} " if info else ""
366
+ return f"{ self .runtime .value .upper ()} SubmitKernel { order } { additional_info } , NumKernels { self .NumKernels } "
367
+
356
368
def explicit_group (self ):
357
369
order = "In Order" if self .ioq else "Out Of Order"
358
370
completion_str = " With Completion" if self .MeasureCompletion else ""
@@ -373,7 +385,7 @@ def description(self) -> str:
373
385
374
386
return (
375
387
f"Measures CPU time overhead of submitting { order } kernels through { runtime_name } API{ completion_desc } . "
376
- f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
388
+ f"Runs { self . NumKernels } simple kernels with minimal execution time to isolate API overhead from kernel execution time."
377
389
)
378
390
379
391
def range (self ) -> tuple [float , float ]:
@@ -385,11 +397,23 @@ def bin_args(self) -> list[str]:
385
397
f"--MeasureCompletion={ self .MeasureCompletion } " ,
386
398
"--iterations=100000" ,
387
399
"--Profiling=0" ,
388
- "--NumKernels=10 " ,
400
+ f "--NumKernels={ self . NumKernels } " ,
389
401
"--KernelExecTime=1" ,
390
402
f"--UseEvents={ self .UseEvents } " ,
391
403
]
392
404
405
+ def get_metadata (self ) -> dict [str , BenchmarkMetadata ]:
406
+ metadata_dict = super ().get_metadata ()
407
+
408
+ # Create CPU count variant with modified display name
409
+ cpu_count_name = self .name () + " CPU count"
410
+ cpu_count_metadata = copy .deepcopy (metadata_dict [self .name ()])
411
+ cpu_count_display_name = self .display_name () + ", CPU count"
412
+ cpu_count_metadata .display_name = cpu_count_display_name
413
+ metadata_dict [cpu_count_name ] = cpu_count_metadata
414
+
415
+ return metadata_dict
416
+
393
417
394
418
class ExecImmediateCopyQueue (ComputeBenchmark ):
395
419
def __init__ (self , bench , ioq , isCopyOnly , source , destination , size ):
@@ -404,6 +428,10 @@ def name(self):
404
428
order = "in order" if self .ioq else "out of order"
405
429
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } "
406
430
431
+ def display_name (self ) -> str :
432
+ order = "in order" if self .ioq else "out of order"
433
+ return f"SYCL ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } "
434
+
407
435
def description (self ) -> str :
408
436
order = "in-order" if self .ioq else "out-of-order"
409
437
operation = "copy-only" if self .isCopyOnly else "copy and command submission"
@@ -438,6 +466,9 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
438
466
def name (self ):
439
467
return f"memory_benchmark_sycl QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } "
440
468
469
+ def display_name (self ) -> str :
470
+ return f"SYCL QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } "
471
+
441
472
def description (self ) -> str :
442
473
operation = "copy-only" if self .isCopyOnly else "copy and command submission"
443
474
return (
@@ -469,6 +500,9 @@ def __init__(self, bench, source, destination, size):
469
500
def name (self ):
470
501
return f"memory_benchmark_sycl QueueMemcpy from { self .source } to { self .destination } , size { self .size } "
471
502
503
+ def display_name (self ) -> str :
504
+ return f"SYCL QueueMemcpy from { self .source } to { self .destination } , size { self .size } "
505
+
472
506
def description (self ) -> str :
473
507
return (
474
508
f"Measures general SYCL queue memory copy performance from { self .source } to "
@@ -497,6 +531,9 @@ def __init__(self, bench, type, size, placement):
497
531
def name (self ):
498
532
return f"memory_benchmark_sycl StreamMemory, placement { self .placement } , type { self .type } , size { self .size } "
499
533
534
+ def display_name (self ) -> str :
535
+ return f"SYCL StreamMemory, placement { self .placement } , type { self .type } , size { self .size } "
536
+
500
537
def description (self ) -> str :
501
538
return (
502
539
f"Measures { self .placement } memory bandwidth using { self .type } pattern with "
@@ -530,6 +567,9 @@ def __init__(self, bench):
530
567
def name (self ):
531
568
return f"miscellaneous_benchmark_sycl VectorSum"
532
569
570
+ def display_name (self ) -> str :
571
+ return f"SYCL VectorSum"
572
+
533
573
def description (self ) -> str :
534
574
return (
535
575
"Measures performance of vector addition across 3D grid (512x256x256 elements) "
@@ -591,6 +631,19 @@ def name(self):
591
631
+ (" with barrier" if self .useBarrier else "" )
592
632
)
593
633
634
+ def display_name (self ) -> str :
635
+ info = []
636
+ if not self .useEvents :
637
+ info .append ("without events" )
638
+ if not self .useCopyOffload :
639
+ info .append ("without copy offload" )
640
+ additional_info = f", { ' ' .join (info )} " if info else ""
641
+ return (
642
+ f"UR MemcpyExecute, opsPerThread { self .numOpsPerThread } , "
643
+ f"numThreads { self .numThreads } , allocSize { self .allocSize } , srcUSM { self .srcUSM } , "
644
+ f"dstUSM { self .dstUSM } { additional_info } "
645
+ )
646
+
594
647
def explicit_group (self ):
595
648
return (
596
649
"MemcpyExecute opsPerThread: "
@@ -655,6 +708,9 @@ def description(self) -> str:
655
708
def name (self ):
656
709
return f"graph_api_benchmark_{ self .runtime .value } SinKernelGraph graphs:{ self .withGraphs } , numKernels:{ self .numKernels } "
657
710
711
+ def display_name (self ) -> str :
712
+ return f"{ self .runtime .value .upper ()} SinKernelGraph, graphs { self .withGraphs } , numKernels { self .numKernels } "
713
+
658
714
def unstable (self ) -> str :
659
715
return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
660
716
@@ -703,6 +759,9 @@ def description(self) -> str:
703
759
def name (self ):
704
760
return f"graph_api_benchmark_{ self .runtime .value } SubmitGraph numKernels:{ self .numKernels } ioq { self .inOrderQueue } measureCompletion { self .measureCompletionTime } "
705
761
762
+ def display_name (self ) -> str :
763
+ return f"{ self .runtime .value .upper ()} SubmitGraph, numKernels { self .numKernels } , ioq { self .inOrderQueue } , measureCompletion { self .measureCompletionTime } "
764
+
706
765
def get_tags (self ):
707
766
return [
708
767
"graph" ,
@@ -741,6 +800,11 @@ def description(self) -> str:
741
800
def name (self ):
742
801
return f"ulls_benchmark_{ self .runtime .value } EmptyKernel wgc:{ self .wgc } , wgs:{ self .wgs } "
743
802
803
+ def display_name (self ) -> str :
804
+ return (
805
+ f"{ self .runtime .value .upper ()} EmptyKernel, wgc { self .wgc } , wgs { self .wgs } "
806
+ )
807
+
744
808
def get_tags (self ):
745
809
return [runtime_to_tag_name (self .runtime ), "micro" , "latency" , "submit" ]
746
810
@@ -782,6 +846,9 @@ def description(self) -> str:
782
846
def name (self ):
783
847
return f"ulls_benchmark_{ self .runtime .value } KernelSwitch count { self .count } kernelTime { self .kernelTime } "
784
848
849
+ def display_name (self ) -> str :
850
+ return f"{ self .runtime .value .upper ()} KernelSwitch, count { self .count } , kernelTime { self .kernelTime } "
851
+
785
852
def get_tags (self ):
786
853
return [runtime_to_tag_name (self .runtime ), "micro" , "latency" , "submit" ]
787
854
@@ -818,6 +885,12 @@ def name(self):
818
885
f"usmMemoryPlacement:{ self .usm_memory_placement } size:{ self .size } measureMode:{ self .measure_mode } "
819
886
)
820
887
888
+ def display_name (self ) -> str :
889
+ return (
890
+ f"{ self .runtime .value .upper ()} UsmMemoryAllocation, "
891
+ f"usmMemoryPlacement { self .usm_memory_placement } , size { self .size } , measureMode { self .measure_mode } "
892
+ )
893
+
821
894
def explicit_group (self ):
822
895
return f"UsmMemoryAllocation"
823
896
@@ -870,6 +943,12 @@ def name(self):
870
943
f"usmMemoryPlacement:{ self .usm_memory_placement } allocationCount:{ self .allocation_count } size:{ self .size } measureMode:{ self .measure_mode } "
871
944
)
872
945
946
+ def display_name (self ) -> str :
947
+ return (
948
+ f"{ self .runtime .value .upper ()} UsmBatchMemoryAllocation, "
949
+ f"usmMemoryPlacement { self .usm_memory_placement } , allocationCount { self .allocation_count } , size { self .size } , measureMode { self .measure_mode } "
950
+ )
951
+
873
952
def explicit_group (self ):
874
953
return f"UsmBatchMemoryAllocation"
875
954
0 commit comments