Skip to content

Commit f48c4d8

Browse files
committed
[OpenMP] Be more forgiving during record and replay
When we record and replay kernels we should not error out early if there is a chance the program might still run fine. This patch will: 1) Fallback to the allocation heuristic if the VAMap doesn't work. 2) Adjust the memory start to match the required address if possible. 3) Adjust the (guessed) pointer arguments if the memory start adjustment is impossible. This will allow kernels without indirect accesses to work while indirect accesses will most likely fail.
1 parent 41566fb commit f48c4d8

File tree

8 files changed

+80
-33
lines changed

8 files changed

+80
-33
lines changed

openmp/libomptarget/include/omptarget.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,8 @@ void __tgt_set_info_flag(uint32_t);
450450
int __tgt_print_device_info(int64_t DeviceId);
451451

452452
int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
453-
void *VAddr, bool IsRecord, bool SaveOutput);
453+
void *VAddr, bool IsRecord, bool SaveOutput,
454+
uint64_t &ReqPtrArgOffset);
454455

455456
#ifdef __cplusplus
456457
}

openmp/libomptarget/include/rtl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "omptarget.h"
2222

23+
#include <cstdint>
2324
#include <list>
2425
#include <map>
2526
#include <mutex>
@@ -74,7 +75,7 @@ struct RTLInfoTy {
7475
typedef int32_t(data_notify_unmapped_ty)(int32_t, void *);
7576
typedef int32_t(set_device_offset_ty)(int32_t);
7677
typedef int32_t(activate_record_replay_ty)(int32_t, uint64_t, void *, bool,
77-
bool);
78+
bool, uint64_t &);
7879

7980
int32_t Idx = -1; // RTL index, index is the number of devices
8081
// of other RTLs that were registered before,

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ struct RecordReplayTy {
5555

5656
RRStatusTy Status;
5757
bool ReplaySaveOutput;
58+
bool UsedVAMap = false;
59+
uintptr_t MemoryOffset = 0;
5860

5961
void *suggestAddress(uint64_t MaxMemoryAllocation) {
6062
// Get a valid pointer address for this system
@@ -89,10 +91,12 @@ struct RecordReplayTy {
8991
MemoryPtr = MemoryStart;
9092
MemorySize = 0;
9193
TotalSize = ASize;
94+
UsedVAMap = true;
9295
return Plugin::success();
9396
}
9497

95-
Error preAllocateHeuristic(uint64_t MaxMemoryAllocation, void *VAddr) {
98+
Error preAllocateHeuristic(uint64_t MaxMemoryAllocation,
99+
uint64_t RequiredMemoryAllocation, void *VAddr) {
96100
const size_t MAX_MEMORY_ALLOCATION = MaxMemoryAllocation;
97101
constexpr size_t STEP = 1024 * 1024 * 1024ULL;
98102
MemoryStart = nullptr;
@@ -102,32 +106,55 @@ struct RecordReplayTy {
102106
if (MemoryStart)
103107
break;
104108
}
105-
106-
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
107-
"Allocated %" PRIu64 " bytes at %p for replay.\n", TotalSize,
108-
MemoryStart);
109-
110109
if (!MemoryStart)
111110
return Plugin::error("Allocating record/replay memory");
112111

113112
if (VAddr && VAddr != MemoryStart)
114-
return Plugin::error("Cannot allocate recorded address");
113+
MemoryOffset = uintptr_t(VAddr) - uintptr_t(MemoryStart);
115114

116115
MemoryPtr = MemoryStart;
117116
MemorySize = 0;
118117

118+
// Check if we need adjustment.
119+
if (MemoryOffset > 0 &&
120+
TotalSize >= RequiredMemoryAllocation + MemoryOffset) {
121+
// If we are off but "before" the required address and with enough space,
122+
// we just "allocate" the offset to match the required address.
123+
MemoryPtr = (char *)MemoryPtr + MemoryOffset;
124+
MemorySize += MemoryOffset;
125+
MemoryOffset = 0;
126+
assert(MemoryPtr == VAddr && "Expected offset adjustment to work");
127+
} else if (MemoryOffset) {
128+
// If we are off and in a situation we cannot just "waste" memory to force
129+
// a match, we hope adjusting the arguments is sufficient.
130+
REPORT(
131+
"WARNING Failed to allocate replay memory at required location %p, "
132+
"got %p, trying to offset argument pointers by %" PRIi64 "\n",
133+
VAddr, MemoryStart, MemoryOffset);
134+
}
135+
136+
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
137+
"Allocated %" PRIu64 " bytes at %p for replay.\n", TotalSize,
138+
MemoryStart);
139+
119140
return Plugin::success();
120141
}
121142

122143
Error preallocateDeviceMemory(uint64_t DeviceMemorySize, void *ReqVAddr) {
123-
if (Device->supportVAManagement())
124-
return preAllocateVAMemory(DeviceMemorySize, ReqVAddr);
144+
if (Device->supportVAManagement()) {
145+
auto Err = preAllocateVAMemory(DeviceMemorySize, ReqVAddr);
146+
if (Err) {
147+
REPORT("WARNING VA mapping failed, fallback to heuristic: "
148+
"(Error: %s)\n",
149+
toString(std::move(Err)).data());
150+
}
151+
}
125152

126153
uint64_t DevMemSize;
127154
if (Device->getDeviceMemorySize(DevMemSize))
128155
return Plugin::error("Cannot determine Device Memory Size");
129156

130-
return preAllocateHeuristic(DevMemSize, ReqVAddr);
157+
return preAllocateHeuristic(DevMemSize, DeviceMemorySize, ReqVAddr);
131158
}
132159

133160
void dumpDeviceMemory(StringRef Filename) {
@@ -293,7 +320,7 @@ struct RecordReplayTy {
293320
}
294321

295322
Error init(GenericDeviceTy *Device, uint64_t MemSize, void *VAddr,
296-
RRStatusTy Status, bool SaveOutput) {
323+
RRStatusTy Status, bool SaveOutput, uint64_t &ReqPtrArgOffset) {
297324
this->Device = Device;
298325
this->Status = Status;
299326
this->ReplaySaveOutput = SaveOutput;
@@ -308,11 +335,14 @@ struct RecordReplayTy {
308335
MemoryStart, TotalSize,
309336
Status == RRStatusTy::RRRecording ? "Recording" : "Replaying");
310337

338+
// Tell the user to offset pointer arguments as the memory allocation does
339+
// not match.
340+
ReqPtrArgOffset = MemoryOffset;
311341
return Plugin::success();
312342
}
313343

314344
void deinit() {
315-
if (Device->supportVAManagement()) {
345+
if (UsedVAMap) {
316346
if (auto Err = Device->memoryVAUnMap(MemoryStart, TotalSize))
317347
report_fatal_error("Error on releasing virtual memory space");
318348
} else {
@@ -1694,15 +1724,16 @@ int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId,
16941724

16951725
int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
16961726
void *VAddr, bool isRecord,
1697-
bool SaveOutput) {
1727+
bool SaveOutput,
1728+
uint64_t &ReqPtrArgOffset) {
16981729
GenericPluginTy &Plugin = Plugin::get();
16991730
GenericDeviceTy &Device = Plugin.getDevice(DeviceId);
17001731
RecordReplayTy::RRStatusTy Status =
17011732
isRecord ? RecordReplayTy::RRStatusTy::RRRecording
17021733
: RecordReplayTy::RRStatusTy::RRReplaying;
17031734

1704-
if (auto Err =
1705-
RecordReplay.init(&Device, MemorySize, VAddr, Status, SaveOutput)) {
1735+
if (auto Err = RecordReplay.init(&Device, MemorySize, VAddr, Status,
1736+
SaveOutput, ReqPtrArgOffset)) {
17061737
REPORT("WARNING RR did not intialize RR-properly with %lu bytes"
17071738
"(Error: %s)\n",
17081739
MemorySize, toString(std::move(Err)).data());

openmp/libomptarget/src/device.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -539,15 +539,10 @@ void DeviceTy::init() {
539539
// Enables saving the device memory kernel output post execution if set.
540540
llvm::omp::target::BoolEnvar OMPX_ReplaySaveOutput(
541541
"LIBOMPTARGET_RR_SAVE_OUTPUT", false);
542-
// Sets the maximum to pre-allocate device memory.
543-
llvm::omp::target::UInt64Envar OMPX_DeviceMemorySize(
544-
"LIBOMPTARGET_RR_DEVMEM_SIZE", 16);
545-
DP("Activating Record-Replay for Device %d with %lu GB memory\n",
546-
RTLDeviceID, OMPX_DeviceMemorySize.get());
547-
548-
RTL->activate_record_replay(RTLDeviceID,
549-
OMPX_DeviceMemorySize * 1024 * 1024 * 1024,
550-
nullptr, true, OMPX_ReplaySaveOutput);
542+
543+
uint64_t ReqPtrArgOffset;
544+
RTL->activate_record_replay(RTLDeviceID, 0, nullptr, true,
545+
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
551546
}
552547

553548
IsInit = true;

openmp/libomptarget/src/interface.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "Utilities.h"
2222

2323
#include <cassert>
24+
#include <cstdint>
2425
#include <cstdio>
2526
#include <cstdlib>
2627
#include <mutex>
@@ -347,15 +348,16 @@ EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
347348
/// execution on persistent storage
348349
EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
349350
void *VAddr, bool IsRecord,
350-
bool SaveOutput) {
351+
bool SaveOutput,
352+
uint64_t &ReqPtrArgOffset) {
351353
if (!deviceIsReady(DeviceId)) {
352354
DP("Device %" PRId64 " is not ready\n", DeviceId);
353355
return OMP_TGT_FAIL;
354356
}
355357

356358
DeviceTy &Device = *PM->Devices[DeviceId];
357-
[[maybe_unused]] int Rc =
358-
target_activate_rr(Device, MemorySize, VAddr, IsRecord, SaveOutput);
359+
[[maybe_unused]] int Rc = target_activate_rr(
360+
Device, MemorySize, VAddr, IsRecord, SaveOutput, ReqPtrArgOffset);
359361
assert(Rc == OFFLOAD_SUCCESS &&
360362
"__tgt_activate_record_replay unexpected failure!");
361363
return OMP_TGT_SUCCESS;

openmp/libomptarget/src/omptarget.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,9 +1725,11 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
17251725
/// and informing the record-replayer of whether to store the output
17261726
/// in some file.
17271727
int target_activate_rr(DeviceTy &Device, uint64_t MemorySize, void *VAddr,
1728-
bool isRecord, bool SaveOutput) {
1728+
bool isRecord, bool SaveOutput,
1729+
uint64_t &ReqPtrArgOffset) {
17291730
return Device.RTL->activate_record_replay(Device.DeviceID, MemorySize, VAddr,
1730-
isRecord, SaveOutput);
1731+
isRecord, SaveOutput,
1732+
ReqPtrArgOffset);
17311733
}
17321734

17331735
/// Executes a kernel using pre-recorded information for loading to

openmp/libomptarget/src/private.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
4242
KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
4343

4444
extern int target_activate_rr(DeviceTy &Device, uint64_t MemorySize,
45-
void *ReqAddr, bool isRecord, bool SaveOutput);
45+
void *ReqAddr, bool isRecord, bool SaveOutput,
46+
uint64_t &ReqPtrArgOffset);
4647

4748
extern int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
4849
void *DeviceMemory, int64_t DeviceMemorySize,

openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/Support/CommandLine.h"
1717
#include "llvm/Support/JSON.h"
1818
#include "llvm/Support/MemoryBuffer.h"
19+
#include <cstdint>
1920
#include <cstdlib>
2021

2122
using namespace llvm;
@@ -128,8 +129,9 @@ int main(int argc, char **argv) {
128129

129130
__tgt_register_lib(&Desc);
130131

132+
uint64_t ReqPtrArgOffset = 0;
131133
int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart,
132-
false, VerifyOpt);
134+
false, VerifyOpt, ReqPtrArgOffset);
133135

134136
if (Rc != OMP_TGT_SUCCESS) {
135137
report_fatal_error("Cannot activate record replay\n");
@@ -149,6 +151,18 @@ int main(int argc, char **argv) {
149151
const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
150152
DeviceMemoryMB.get()->getBufferSize());
151153

154+
// If necessary, adjust pointer arguments.
155+
if (ReqPtrArgOffset) {
156+
for (auto *&Arg : TgtArgs) {
157+
auto ArgInt = uintptr_t(Arg);
158+
// Try to find pointer arguments.
159+
if (ArgInt < uintptr_t(BAllocStart) ||
160+
ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize)
161+
continue;
162+
Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset);
163+
}
164+
}
165+
152166
__tgt_target_kernel_replay(
153167
/* Loc */ nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,
154168
DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(),

0 commit comments

Comments
 (0)