Skip to content

Commit c3a3bab

Browse files
committed
[Offload] Implement double free (and other allocation error) reporting
As a first step towards a GPU sanitizer we now can track allocations and deallocations in order to report double frees, and other problems during deallocation.
1 parent d0c8e26 commit c3a3bab

File tree

9 files changed

+449
-1
lines changed

9 files changed

+449
-1
lines changed
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
//===- ErrorReporting.h - Helper to provide nice error messages ----- c++ -===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
12+
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
13+
14+
#include "PluginInterface.h"
15+
16+
#include "llvm/ADT/SmallString.h"
17+
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/Support/ErrorHandling.h"
19+
20+
#include <cstdio>
21+
#include <cstdlib>
22+
#include <string>
23+
24+
namespace llvm {
25+
namespace omp {
26+
namespace target {
27+
namespace plugin {
28+
29+
class ErrorReporter {
30+
/// The banner printed at the beginning of an error report.
31+
static constexpr auto ErrorBanner = "OFFLOAD ERROR: ";
32+
33+
/// Terminal color codes
34+
///
35+
/// TODO: determine if the terminal supports colors.
36+
///@{
37+
static constexpr auto Green = []() { return "\033[1m\033[32m"; };
38+
static constexpr auto Blue = []() { return "\033[1m\033[34m"; };
39+
static constexpr auto Red = []() { return "\033[1m\033[31m"; };
40+
static constexpr auto Magenta = []() { return "\033[1m\033[35m"; };
41+
static constexpr auto Cyan = []() { return "\033[1m\033[36m"; };
42+
static constexpr auto Default = []() { return "\033[1m\033[0m"; };
43+
///@}
44+
45+
/// The size of the getBuffer() buffer.
46+
static constexpr unsigned BufferSize = 1024;
47+
48+
/// Return a buffer of size BufferSize that can be used for formatting.
49+
static char *getBuffer() {
50+
static char *Buffer = nullptr;
51+
if (!Buffer)
52+
Buffer = reinterpret_cast<char *>(malloc(BufferSize));
53+
return Buffer;
54+
}
55+
56+
/// Return the device id as string, or n/a if not available.
57+
static std::string getDeviceIdStr(GenericDeviceTy *Device) {
58+
return Device ? std::to_string(Device->getDeviceId()) : "n/a";
59+
}
60+
61+
/// Return a nice name for an TargetAllocTy.
62+
static std::string getAllocTyName(TargetAllocTy Kind) {
63+
switch (Kind) {
64+
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
65+
case TARGET_ALLOC_DEFAULT:
66+
case TARGET_ALLOC_DEVICE:
67+
return "device memory";
68+
case TARGET_ALLOC_HOST:
69+
return "pinned host memory";
70+
case TARGET_ALLOC_SHARED:
71+
return "managed memory";
72+
break;
73+
}
74+
llvm_unreachable("Unknown target alloc kind");
75+
}
76+
77+
/// Return a C string after \p Format has been instantiated with \p Args.
78+
template <typename... ArgsTy>
79+
static const char *getCString(const char *Format, ArgsTy &&...Args) {
80+
std::snprintf(getBuffer(), BufferSize, Format,
81+
std::forward<ArgsTy>(Args)...);
82+
return getBuffer();
83+
}
84+
85+
/// Print \p Format, instantiated with \p Args to stderr.
86+
/// TODO: Allow redirection into a file stream.
87+
template <typename... ArgsTy>
88+
static void print(const char *Format, ArgsTy &&...Args) {
89+
fprintf(stderr, Format, std::forward<ArgsTy>(Args)...);
90+
}
91+
92+
/// Pretty print a stack trace.
93+
static void reportStackTrace(StringRef StackTrace) {
94+
if (StackTrace.empty())
95+
return;
96+
97+
SmallVector<StringRef> Lines, Parts;
98+
StackTrace.split(Lines, "\n", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
99+
int Start = Lines.empty() || !Lines[0].contains("PrintStackTrace") ? 0 : 1;
100+
for (int I = Start, E = Lines.size(); I < E; ++I) {
101+
auto Line = Lines[I];
102+
Parts.clear();
103+
Line.split(Parts, " ", /*MaxSplit=*/2);
104+
if (Parts.size() != 3 || Parts[0].size() < 2 || Parts[0][0] != '#') {
105+
print("%s\n", Line.str().c_str());
106+
continue;
107+
}
108+
unsigned FrameIdx = std::stoi(Parts[0].drop_front(1).str());
109+
if (Start)
110+
FrameIdx -= 1;
111+
print(" %s%s%s%u %s%s%s %s\n", Magenta(),
112+
Parts[0].take_front().str().c_str(), Green(), FrameIdx, Blue(),
113+
Parts[1].str().c_str(), Default(), Parts[2].str().c_str());
114+
}
115+
116+
printf("\n");
117+
}
118+
119+
/// Report an error.
120+
static void reportError(const char *Message, StringRef StackTrace) {
121+
print("%s%s%s\n%s", Red(), ErrorBanner, Message, Default());
122+
reportStackTrace(StackTrace);
123+
}
124+
125+
/// Report information about an allocation associated with \p ATI.
126+
static void reportAllocationInfo(AllocationTraceInfoTy *ATI) {
127+
if (!ATI)
128+
return;
129+
130+
if (!ATI->DeallocationTrace.empty()) {
131+
print("%s%s%s\n%s", Cyan(), "Last deallocation:", Default());
132+
reportStackTrace(ATI->DeallocationTrace);
133+
}
134+
135+
if (ATI->HostPtr)
136+
print("%sLast allocation of size %lu for host pointer %p:\n%s", Cyan(),
137+
ATI->Size, ATI->HostPtr, Default());
138+
else
139+
print("%sLast allocation of size %lu:\n%s", Cyan(), ATI->Size, Default());
140+
reportStackTrace(ATI->AllocationTrace);
141+
if (!ATI->LastAllocationInfo)
142+
return;
143+
144+
unsigned I = 0;
145+
print("%sPrior allocations with the same base pointer:", Cyan());
146+
while (ATI->LastAllocationInfo) {
147+
print("\n%s", Default());
148+
ATI = ATI->LastAllocationInfo;
149+
print("%s #%u Prior deallocation of size %lu:\n%s", Cyan(), I, ATI->Size,
150+
Default());
151+
reportStackTrace(ATI->DeallocationTrace);
152+
if (ATI->HostPtr)
153+
print("%s #%u Prior allocation for host pointer %p:\n%s", Cyan(), I,
154+
ATI->HostPtr, Default());
155+
else
156+
print("%s #%u Prior allocation:\n%s", Cyan(), I, Default());
157+
reportStackTrace(ATI->AllocationTrace);
158+
++I;
159+
}
160+
}
161+
162+
public:
163+
/// Check if the deallocation of \p DevicePtr is valid given \p ATI. Stores \p
164+
/// StackTrace to \p ATI->DeallocationTrace if there was no error.
165+
static void checkDeallocation(GenericDeviceTy *Device, void *DevicePtr,
166+
TargetAllocTy Kind, AllocationTraceInfoTy *ATI,
167+
std::string &StackTrace) {
168+
#define DEALLOCATION_ERROR(Format, ...) \
169+
reportError(getCString(Format, __VA_ARGS__), StackTrace); \
170+
reportAllocationInfo(ATI); \
171+
abort();
172+
173+
if (!ATI) {
174+
DEALLOCATION_ERROR("deallocation of non-allocated %s: %p",
175+
getAllocTyName(Kind).c_str(), DevicePtr);
176+
}
177+
178+
if (!ATI->DeallocationTrace.empty()) {
179+
DEALLOCATION_ERROR("double-free of %s: %p", getAllocTyName(Kind).c_str(),
180+
DevicePtr);
181+
}
182+
183+
if (ATI->Kind != Kind) {
184+
DEALLOCATION_ERROR("deallocation requires %s but allocation was %s: %p",
185+
getAllocTyName(Kind).c_str(),
186+
getAllocTyName(ATI->Kind).c_str(), DevicePtr);
187+
}
188+
189+
ATI->DeallocationTrace = StackTrace;
190+
191+
#undef DEALLOCATION_ERROR
192+
}
193+
};
194+
195+
} // namespace plugin
196+
} // namespace target
197+
} // namespace omp
198+
} // namespace llvm
199+
200+
#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <shared_mutex>
2020
#include <vector>
2121

22+
#include "ExclusiveAccess.h"
2223
#include "Shared/APITypes.h"
2324
#include "Shared/Debug.h"
2425
#include "Shared/Environment.h"
@@ -382,6 +383,32 @@ struct GenericKernelTy {
382383
bool IsBareKernel = false;
383384
};
384385

386+
/// Information about an allocation, when it has been allocated, and when/if it
387+
/// has been deallocated, for error reporting purposes.
388+
struct AllocationTraceInfoTy {
389+
390+
/// The stack trace of the allocation itself.
391+
std::string AllocationTrace;
392+
393+
/// The stack trace of the deallocation, or empty.
394+
std::string DeallocationTrace;
395+
396+
/// The allocated device pointer.
397+
void *DevicePtr = nullptr;
398+
399+
/// The corresponding host pointer (can be null).
400+
void *HostPtr = nullptr;
401+
402+
/// The size of the allocation.
403+
uint64_t Size = 0;
404+
405+
/// The kind of the allocation.
406+
TargetAllocTy Kind = TargetAllocTy::TARGET_ALLOC_DEFAULT;
407+
408+
/// Information about the last allocation at this address, if any.
409+
AllocationTraceInfoTy *LastAllocationInfo = nullptr;
410+
};
411+
385412
/// Class representing a map of host pinned allocations. We track these pinned
386413
/// allocations, so memory tranfers invloving these buffers can be optimized.
387414
class PinnedAllocationMapTy {
@@ -866,6 +893,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
866893
/// Reference to the underlying plugin that created this device.
867894
GenericPluginTy &Plugin;
868895

896+
/// Map to record when allocations have been performed, and when they have
897+
/// been deallocated, both for error reporting purposes.
898+
ProtectedObj<DenseMap<void *, AllocationTraceInfoTy *>> AllocationTraces;
899+
869900
private:
870901
/// Get and set the stack size and heap size for the device. If not used, the
871902
/// plugin can implement the setters as no-op and setting the output
@@ -916,6 +947,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
916947
UInt32Envar OMPX_InitialNumStreams;
917948
UInt32Envar OMPX_InitialNumEvents;
918949

950+
/// Environment variable to determine if stack traces for allocations and
951+
/// deallocations are tracked.
952+
BoolEnvar OMPX_TrackAllocationTraces =
953+
BoolEnvar("OFFLOAD_TRACK_ALLOCATION_TRACES", false);
954+
919955
/// Array of images loaded into the device. Images are automatically
920956
/// deallocated by the allocator.
921957
llvm::SmallVector<DeviceImageTy *> LoadedImages;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "Shared/Debug.h"
1515
#include "Shared/Environment.h"
1616

17+
#include "ErrorReporting.h"
1718
#include "GlobalHandler.h"
1819
#include "JIT.h"
1920
#include "Utils/ELF.h"
@@ -30,6 +31,8 @@
3031
#include "llvm/Support/JSON.h"
3132
#include "llvm/Support/MathExtras.h"
3233
#include "llvm/Support/MemoryBuffer.h"
34+
#include "llvm/Support/Signals.h"
35+
#include "llvm/Support/raw_ostream.h"
3336

3437
#include <cstdint>
3538
#include <limits>
@@ -1337,6 +1340,25 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
13371340
if (auto Err = PinnedAllocs.registerHostBuffer(Alloc, Alloc, Size))
13381341
return std::move(Err);
13391342

1343+
// Keep track of the allocation stack if we track allocation traces.
1344+
if (OMPX_TrackAllocationTraces) {
1345+
std::string StackTrace;
1346+
llvm::raw_string_ostream OS(StackTrace);
1347+
llvm::sys::PrintStackTrace(OS);
1348+
1349+
AllocationTraceInfoTy *ATI = new AllocationTraceInfoTy();
1350+
ATI->AllocationTrace = std::move(StackTrace);
1351+
ATI->DevicePtr = Alloc;
1352+
ATI->HostPtr = HostPtr;
1353+
ATI->Size = Size;
1354+
ATI->Kind = Kind;
1355+
1356+
auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
1357+
auto *&MapATI = (*AllocationTraceMap)[Alloc];
1358+
ATI->LastAllocationInfo = MapATI;
1359+
MapATI = ATI;
1360+
}
1361+
13401362
return Alloc;
13411363
}
13421364

@@ -1345,6 +1367,21 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
13451367
if (Plugin.getRecordReplay().isRecordingOrReplaying())
13461368
return Plugin::success();
13471369

1370+
// Keep track of the deallocation stack if we track allocation traces.
1371+
if (OMPX_TrackAllocationTraces) {
1372+
AllocationTraceInfoTy *ATI = nullptr;
1373+
{
1374+
auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
1375+
ATI = (*AllocationTraceMap)[TgtPtr];
1376+
}
1377+
1378+
std::string StackTrace;
1379+
llvm::raw_string_ostream OS(StackTrace);
1380+
llvm::sys::PrintStackTrace(OS);
1381+
1382+
ErrorReporter::checkDeallocation(this, TgtPtr, Kind, ATI, StackTrace);
1383+
}
1384+
13481385
int Res;
13491386
switch (Kind) {
13501387
case TARGET_ALLOC_DEFAULT:

offload/src/omptarget.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,9 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
462462
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
463463

464464
if (DeviceOrErr->deleteData(DevicePtr, Kind) == OFFLOAD_FAIL)
465-
FATAL_MESSAGE(DeviceNum, "%s", "Failed to deallocate device ptr");
465+
FATAL_MESSAGE(DeviceNum, "%s",
466+
"Failed to deallocate device ptr. Set "
467+
"OFFLOAD_TRACK_ALLOCATION_TRACES=1 to track allocations.");
466468

467469
DP("omp_target_free deallocated device ptr\n");
468470
}

0 commit comments

Comments
 (0)