@@ -231,10 +231,9 @@ struct RecordReplayTy {
231
231
OS.close ();
232
232
}
233
233
234
- void saveKernelInputInfo (const char *Name, DeviceImageTy &Image,
235
- void **ArgPtrs, ptrdiff_t *ArgOffsets,
236
- int32_t NumArgs, uint64_t NumTeamsClause,
237
- uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
234
+ void saveKernelDescr (const char *Name, void **ArgPtrs, ptrdiff_t *ArgOffsets,
235
+ int32_t NumArgs, uint64_t NumTeamsClause,
236
+ uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
238
237
json::Object JsonKernelInfo;
239
238
JsonKernelInfo[" Name" ] = Name;
240
239
JsonKernelInfo[" NumArgs" ] = NumArgs;
@@ -255,12 +254,6 @@ struct RecordReplayTy {
255
254
JsonArgOffsets.push_back (ArgOffsets[I]);
256
255
JsonKernelInfo[" ArgOffsets" ] = json::Value (std::move (JsonArgOffsets));
257
256
258
- SmallString<128 > MemoryFilename = {Name, " .memory" };
259
- dumpDeviceMemory (MemoryFilename);
260
-
261
- SmallString<128 > GlobalsFilename = {Name, " .globals" };
262
- dumpGlobals (GlobalsFilename, Image);
263
-
264
257
SmallString<128 > JsonFilename = {Name, " .json" };
265
258
std::error_code EC;
266
259
raw_fd_ostream JsonOS (JsonFilename.str (), EC);
@@ -271,6 +264,14 @@ struct RecordReplayTy {
271
264
JsonOS.close ();
272
265
}
273
266
267
+ void saveKernelInput (const char *Name, DeviceImageTy &Image) {
268
+ SmallString<128 > GlobalsFilename = {Name, " .globals" };
269
+ dumpGlobals (GlobalsFilename, Image);
270
+
271
+ SmallString<128 > MemoryFilename = {Name, " .memory" };
272
+ dumpDeviceMemory (MemoryFilename);
273
+ }
274
+
274
275
void saveKernelOutputInfo (const char *Name) {
275
276
SmallString<128 > OutputFilename = {
276
277
Name, (isRecording () ? " .original.output" : " .replay.output" )};
@@ -504,12 +505,6 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
504
505
printLaunchInfo (GenericDevice, KernelArgs, NumThreads, NumBlocks))
505
506
return Err;
506
507
507
- if (RecordReplay.isRecording ())
508
- RecordReplay.saveKernelInputInfo (
509
- getName (), getImage (), ArgPtrs, ArgOffsets,
510
- KernelArgs.NumArgs - /* KernelLaunchEnvironment */ 1 , NumBlocks,
511
- NumThreads, KernelArgs.Tripcount );
512
-
513
508
return launchImpl (GenericDevice, NumThreads, NumBlocks, KernelArgs,
514
509
KernelArgsPtr, AsyncInfoWrapper);
515
510
}
@@ -1411,12 +1406,21 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
1411
1406
GenericKernelTy &GenericKernel =
1412
1407
*reinterpret_cast <GenericKernelTy *>(EntryPtr);
1413
1408
1414
- if (RecordReplay.isRecording ())
1409
+ if (RecordReplay.isRecording ()) {
1415
1410
RecordReplay.saveImage (GenericKernel.getName (), GenericKernel.getImage ());
1411
+ RecordReplay.saveKernelInput (GenericKernel.getName (),
1412
+ GenericKernel.getImage ());
1413
+ }
1416
1414
1417
1415
auto Err = GenericKernel.launch (*this , ArgPtrs, ArgOffsets, KernelArgs,
1418
1416
AsyncInfoWrapper);
1419
1417
1418
+ if (RecordReplay.isRecording ())
1419
+ RecordReplay.saveKernelDescr (GenericKernel.getName (), ArgPtrs, ArgOffsets,
1420
+ KernelArgs.NumArgs , KernelArgs.NumTeams [0 ],
1421
+ KernelArgs.ThreadLimit [0 ],
1422
+ KernelArgs.Tripcount );
1423
+
1420
1424
// 'finalize' here to guarantee next record-replay actions are in-sync
1421
1425
AsyncInfoWrapper.finalize (Err);
1422
1426
@@ -1845,7 +1849,8 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
1845
1849
int32_t DstDeviceId, void *DstPtr,
1846
1850
int64_t Size) {
1847
1851
return __tgt_rtl_data_exchange_async (SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
1848
- Size, /* AsyncInfoPtr */ nullptr );
1852
+ Size,
1853
+ /* AsyncInfoPtr */ nullptr );
1849
1854
}
1850
1855
1851
1856
int32_t __tgt_rtl_data_exchange_async (int32_t SrcDeviceId, void *SrcPtr,
0 commit comments