Skip to content

Commit 726ee40

Browse files
committed
[OpenMP] Move the recording code to account for KernelLaunchEnvironment
We need to record late to account for the kernel launch environment as well as the potential changes in block and thread count.
1 parent b6f2597 commit 726ee40

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,12 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
504504
printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
505505
return Err;
506506

507+
if (RecordReplay.isRecording())
508+
RecordReplay.saveKernelInputInfo(
509+
getName(), getImage(), ArgPtrs, ArgOffsets,
510+
KernelArgs.NumArgs - /* KernelLaunchEnvironment */ 1, NumBlocks,
511+
NumThreads, KernelArgs.Tripcount);
512+
507513
return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs,
508514
KernelArgsPtr, AsyncInfoWrapper);
509515
}
@@ -1405,12 +1411,6 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
14051411
GenericKernelTy &GenericKernel =
14061412
*reinterpret_cast<GenericKernelTy *>(EntryPtr);
14071413

1408-
if (RecordReplay.isRecording())
1409-
RecordReplay.saveKernelInputInfo(
1410-
GenericKernel.getName(), GenericKernel.getImage(), ArgPtrs, ArgOffsets,
1411-
KernelArgs.NumArgs, KernelArgs.NumTeams[0], KernelArgs.ThreadLimit[0],
1412-
KernelArgs.Tripcount);
1413-
14141414
if (RecordReplay.isRecording())
14151415
RecordReplay.saveImage(GenericKernel.getName(), GenericKernel.getImage());
14161416

0 commit comments

Comments
 (0)