Skip to content

[OpenMP] Fix record-replay allocation order for kernel environment #71863

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,9 @@ struct RecordReplayTy {
OS.close();
}

void saveKernelInputInfo(const char *Name, DeviceImageTy &Image,
void **ArgPtrs, ptrdiff_t *ArgOffsets,
int32_t NumArgs, uint64_t NumTeamsClause,
uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
void saveKernelDescr(const char *Name, void **ArgPtrs, ptrdiff_t *ArgOffsets,
int32_t NumArgs, uint64_t NumTeamsClause,
uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
json::Object JsonKernelInfo;
JsonKernelInfo["Name"] = Name;
JsonKernelInfo["NumArgs"] = NumArgs;
Expand All @@ -255,12 +254,6 @@ struct RecordReplayTy {
JsonArgOffsets.push_back(ArgOffsets[I]);
JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets));

SmallString<128> MemoryFilename = {Name, ".memory"};
dumpDeviceMemory(MemoryFilename);

SmallString<128> GlobalsFilename = {Name, ".globals"};
dumpGlobals(GlobalsFilename, Image);

SmallString<128> JsonFilename = {Name, ".json"};
std::error_code EC;
raw_fd_ostream JsonOS(JsonFilename.str(), EC);
Expand All @@ -271,6 +264,14 @@ struct RecordReplayTy {
JsonOS.close();
}

void saveKernelInput(const char *Name, DeviceImageTy &Image) {
SmallString<128> GlobalsFilename = {Name, ".globals"};
dumpGlobals(GlobalsFilename, Image);

SmallString<128> MemoryFilename = {Name, ".memory"};
dumpDeviceMemory(MemoryFilename);
}

void saveKernelOutputInfo(const char *Name) {
SmallString<128> OutputFilename = {
Name, (isRecording() ? ".original.output" : ".replay.output")};
Expand Down Expand Up @@ -504,12 +505,6 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
return Err;

if (RecordReplay.isRecording())
RecordReplay.saveKernelInputInfo(
getName(), getImage(), ArgPtrs, ArgOffsets,
KernelArgs.NumArgs - /* KernelLaunchEnvironment */ 1, NumBlocks,
NumThreads, KernelArgs.Tripcount);

return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs,
KernelArgsPtr, AsyncInfoWrapper);
}
Expand Down Expand Up @@ -1411,12 +1406,21 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
GenericKernelTy &GenericKernel =
*reinterpret_cast<GenericKernelTy *>(EntryPtr);

if (RecordReplay.isRecording())
if (RecordReplay.isRecording()) {
RecordReplay.saveImage(GenericKernel.getName(), GenericKernel.getImage());
RecordReplay.saveKernelInput(GenericKernel.getName(),
GenericKernel.getImage());
}

auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs,
AsyncInfoWrapper);

if (RecordReplay.isRecording())
RecordReplay.saveKernelDescr(GenericKernel.getName(), ArgPtrs, ArgOffsets,
KernelArgs.NumArgs, KernelArgs.NumTeams[0],
KernelArgs.ThreadLimit[0],
KernelArgs.Tripcount);

// 'finalize' here to guarantee next record-replay actions are in-sync
AsyncInfoWrapper.finalize(Err);

Expand Down Expand Up @@ -1845,7 +1849,8 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
int32_t DstDeviceId, void *DstPtr,
int64_t Size) {
return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
Size, /* AsyncInfoPtr */ nullptr);
Size,
/* AsyncInfoPtr */ nullptr);
}

int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ int main(int argc, char **argv) {
uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()];
std::memcpy(recored_data,
const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
DeviceMemorySizeJson.value() * sizeof(uint8_t));
DeviceMemoryMB.get()->getBufferSize());

__tgt_target_kernel_replay(
/* Loc */ nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,
Expand Down