@@ -59,14 +59,16 @@ def benchmarks(self) -> list[Benchmark]:
59
59
ExecImmediateCopyQueue (self , 0 , 1 , 'Device' , 'Device' , 1024 ),
60
60
ExecImmediateCopyQueue (self , 1 , 1 , 'Device' , 'Host' , 1024 ),
61
61
VectorSum (self ),
62
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 ),
63
- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 ),
64
- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 ),
65
- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 ),
66
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 ),
67
- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 ),
68
- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 ),
69
- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 ),
62
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
63
+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 , 1 ),
64
+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 , 1 ),
65
+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 , 1 ),
66
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
67
+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 , 1 ),
68
+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 , 1 ),
69
+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 , 1 ),
70
+ MemcpyExecute (self , 4096 , 1 , 1024 , 10 , 0 , 1 , 0 ),
71
+ MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
70
72
]
71
73
72
74
if options .ur is not None :
@@ -282,22 +284,23 @@ def bin_args(self) -> list[str]:
282
284
]
283
285
284
286
class MemcpyExecute (ComputeBenchmark ):
285
- def __init__ (self , bench , numOpsPerThread , numThreads , allocSize , iterations , srcUSM , dstUSM ):
287
+ def __init__ (self , bench , numOpsPerThread , numThreads , allocSize , iterations , srcUSM , dstUSM , useEvent ):
286
288
self .numOpsPerThread = numOpsPerThread
287
289
self .numThreads = numThreads
288
290
self .allocSize = allocSize
289
291
self .iterations = iterations
290
292
self .srcUSM = srcUSM
291
293
self .dstUSM = dstUSM
294
+ self .useEvents = useEvent
292
295
super ().__init__ (bench , "multithread_benchmark_ur" , "MemcpyExecute" )
293
296
294
297
def name (self ):
295
- return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } "
298
+ return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } " + ( " without events" if not self . useEvents else "" )
296
299
297
300
def bin_args (self ) -> list [str ]:
298
301
return [
299
302
"--Ioq=1" ,
300
- "--UseEvents=1 " ,
303
+ f "--UseEvents={ self . useEvents } " ,
301
304
"--MeasureCompletion=1" ,
302
305
"--UseQueuePerThread=1" ,
303
306
f"--AllocSize={ self .allocSize } " ,
0 commit comments