Skip to content

Commit e44480e

Browse files
committed
[Offload] Implement double free (and other allocation error) reporting
As a first step towards a GPU sanitizer we now can track allocations and deallocations in order to report double frees, and other problems during deallocation.
1 parent d0c8e26 commit e44480e

File tree

9 files changed

+483
-1
lines changed

9 files changed

+483
-1
lines changed
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
//===- ErrorReporting.h - Helper to provide nice error messages ----- c++ -===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
12+
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
13+
14+
#include "PluginInterface.h"
15+
#include "Shared/EnvironmentVar.h"
16+
17+
#include "llvm/ADT/SmallString.h"
18+
#include "llvm/ADT/StringRef.h"
19+
#include "llvm/Support/ErrorHandling.h"
20+
#include "llvm/Support/WithColor.h"
21+
#include "llvm/Support/raw_ostream.h"
22+
23+
#include <cstdint>
24+
#include <cstdio>
25+
#include <cstdlib>
26+
#include <functional>
27+
#include <optional>
28+
#include <string>
29+
#include <unistd.h>
30+
31+
namespace llvm {
32+
namespace omp {
33+
namespace target {
34+
namespace plugin {
35+
36+
class ErrorReporter {
37+
38+
enum ColorTy {
39+
Yellow = int(HighlightColor::Address),
40+
Green = int(HighlightColor::String),
41+
DarkBlue = int(HighlightColor::Tag),
42+
Cyan = int(HighlightColor::Attribute),
43+
DarkPurple = int(HighlightColor::Enumerator),
44+
DarkRed = int(HighlightColor::Macro),
45+
BoldRed = int(HighlightColor::Error),
46+
BoldLightPurple = int(HighlightColor::Warning),
47+
BoldDarkGrey = int(HighlightColor::Note),
48+
BoldLightBlue = int(HighlightColor::Remark),
49+
};
50+
51+
/// The banner printed at the beginning of an error report.
52+
static constexpr auto ErrorBanner = "OFFLOAD ERROR: ";
53+
54+
/// The size of the getBuffer() buffer.
55+
static constexpr unsigned BufferSize = 1024;
56+
57+
/// Return a buffer of size BufferSize that can be used for formatting.
58+
static char *getBuffer() {
59+
static char *Buffer = nullptr;
60+
if (!Buffer)
61+
Buffer = reinterpret_cast<char *>(malloc(BufferSize));
62+
return Buffer;
63+
}
64+
65+
/// Return the device id as string, or n/a if not available.
66+
static std::string getDeviceIdStr(GenericDeviceTy *Device) {
67+
return Device ? std::to_string(Device->getDeviceId()) : "n/a";
68+
}
69+
70+
/// Return a nice name for an TargetAllocTy.
71+
static std::string getAllocTyName(TargetAllocTy Kind) {
72+
switch (Kind) {
73+
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
74+
case TARGET_ALLOC_DEFAULT:
75+
case TARGET_ALLOC_DEVICE:
76+
return "device memory";
77+
case TARGET_ALLOC_HOST:
78+
return "pinned host memory";
79+
case TARGET_ALLOC_SHARED:
80+
return "managed memory";
81+
break;
82+
}
83+
llvm_unreachable("Unknown target alloc kind");
84+
}
85+
86+
#pragma clang diagnostic push
87+
#pragma clang diagnostic ignored "-Wgcc-compat"
88+
#pragma clang diagnostic ignored "-Wformat-security"
89+
/// Print \p Format, instantiated with \p Args to stderr.
90+
/// TODO: Allow redirection into a file stream.
91+
template <typename... ArgsTy>
92+
[[gnu::format(__printf__, 1, 2)]] static void print(const char *Format,
93+
ArgsTy &&...Args) {
94+
raw_fd_ostream OS(STDERR_FILENO, false);
95+
OS << llvm::format(Format, Args...);
96+
}
97+
98+
/// Print \p Format, instantiated with \p Args to stderr, but colored.
99+
/// TODO: Allow redirection into a file stream.
100+
template <typename... ArgsTy>
101+
[[gnu::format(__printf__, 2, 3)]] static void
102+
print(ColorTy Color, const char *Format, ArgsTy &&...Args) {
103+
raw_fd_ostream OS(STDERR_FILENO, false);
104+
WithColor(OS, HighlightColor(Color)) << llvm::format(Format, Args...);
105+
}
106+
107+
/// Print \p Format, instantiated with \p Args to stderr, but colored and with
108+
/// a banner.
109+
/// TODO: Allow redirection into a file stream.
110+
template <typename... ArgsTy>
111+
[[gnu::format(__printf__, 1, 2)]] static void reportError(const char *Format,
112+
ArgsTy &&...Args) {
113+
raw_fd_ostream OS(STDERR_FILENO, false);
114+
WithColor(OS, HighlightColor::Error)
115+
<< ErrorBanner << llvm::format(Format, Args...) << "\n";
116+
}
117+
#pragma clang diagnostic pop
118+
119+
static void reportError(const char *Str) { reportError("%s", Str); }
120+
static void print(const char *Str) { print("%s", Str); }
121+
static void print(StringRef Str) { print("%s", Str.str().c_str()); }
122+
static void print(ColorTy Color, const char *Str) { print(Color, "%s", Str); }
123+
static void print(ColorTy Color, StringRef Str) {
124+
print(Color, "%s", Str.str().c_str());
125+
}
126+
127+
/// Pretty print a stack trace.
128+
static void reportStackTrace(StringRef StackTrace) {
129+
if (StackTrace.empty())
130+
return;
131+
132+
SmallVector<StringRef> Lines, Parts;
133+
StackTrace.split(Lines, "\n", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
134+
int Start = Lines.empty() || !Lines[0].contains("PrintStackTrace") ? 0 : 1;
135+
unsigned NumDigits =
136+
(int)(floor(log10(Lines.size() - Start - /*0*/ 1)) + 1);
137+
for (int I = Start, E = Lines.size(); I < E; ++I) {
138+
auto Line = Lines[I];
139+
Parts.clear();
140+
Line = Line.drop_while([](char C) { return std::isspace(C); });
141+
Line.split(Parts, " ", /*MaxSplit=*/2);
142+
if (Parts.size() != 3 || Parts[0].size() < 2 || Parts[0][0] != '#') {
143+
print("%s\n", Line.str().c_str());
144+
continue;
145+
}
146+
unsigned FrameIdx = std::stoi(Parts[0].drop_front(1).str());
147+
if (Start)
148+
FrameIdx -= 1;
149+
print(DarkPurple, " %s", Parts[0].take_front().str().c_str());
150+
print(Green, "%*u", NumDigits, FrameIdx);
151+
print(BoldLightBlue, " %s", Parts[1].str().c_str());
152+
print(" %s\n", Parts[2].str().c_str());
153+
}
154+
print("\n");
155+
}
156+
157+
/// Report information about an allocation associated with \p ATI.
158+
static void reportAllocationInfo(AllocationTraceInfoTy *ATI) {
159+
if (!ATI)
160+
return;
161+
162+
if (!ATI->DeallocationTrace.empty()) {
163+
print(BoldLightPurple, "Last deallocation:\n");
164+
reportStackTrace(ATI->DeallocationTrace);
165+
}
166+
167+
if (ATI->HostPtr)
168+
print(BoldLightPurple,
169+
"Last allocation of size %lu for host pointer %p:\n", ATI->Size,
170+
ATI->HostPtr);
171+
else
172+
print(BoldLightPurple, "Last allocation of size %lu:\n", ATI->Size);
173+
reportStackTrace(ATI->AllocationTrace);
174+
if (!ATI->LastAllocationInfo)
175+
return;
176+
177+
unsigned I = 0;
178+
print(BoldLightPurple, "Prior allocations with the same base pointer:");
179+
while (ATI->LastAllocationInfo) {
180+
print("\n");
181+
ATI = ATI->LastAllocationInfo;
182+
print(BoldLightPurple, " #%u Prior deallocation of size %lu:\n", I,
183+
ATI->Size);
184+
reportStackTrace(ATI->DeallocationTrace);
185+
if (ATI->HostPtr)
186+
print(BoldLightPurple, " #%u Prior allocation for host pointer %p:\n",
187+
I, ATI->HostPtr);
188+
else
189+
print(BoldLightPurple, " #%u Prior allocation:\n", I);
190+
reportStackTrace(ATI->AllocationTrace);
191+
++I;
192+
}
193+
}
194+
195+
public:
196+
/// Check if the deallocation of \p DevicePtr is valid given \p ATI. Stores \p
197+
/// StackTrace to \p ATI->DeallocationTrace if there was no error.
198+
static void checkDeallocation(GenericDeviceTy *Device, void *DevicePtr,
199+
TargetAllocTy Kind, AllocationTraceInfoTy *ATI,
200+
std::string &StackTrace) {
201+
#define DEALLOCATION_ERROR(Format, ...) \
202+
reportError(Format, __VA_ARGS__); \
203+
reportStackTrace(StackTrace); \
204+
reportAllocationInfo(ATI); \
205+
abort();
206+
207+
if (!ATI) {
208+
DEALLOCATION_ERROR("deallocation of non-allocated %s: %p",
209+
getAllocTyName(Kind).c_str(), DevicePtr);
210+
}
211+
212+
if (!ATI->DeallocationTrace.empty()) {
213+
DEALLOCATION_ERROR("double-free of %s: %p", getAllocTyName(Kind).c_str(),
214+
DevicePtr);
215+
}
216+
217+
if (ATI->Kind != Kind) {
218+
DEALLOCATION_ERROR("deallocation requires %s but allocation was %s: %p",
219+
getAllocTyName(Kind).c_str(),
220+
getAllocTyName(ATI->Kind).c_str(), DevicePtr);
221+
}
222+
223+
ATI->DeallocationTrace = StackTrace;
224+
225+
#undef DEALLOCATION_ERROR
226+
}
227+
};
228+
229+
} // namespace plugin
230+
} // namespace target
231+
} // namespace omp
232+
} // namespace llvm
233+
234+
#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <shared_mutex>
2020
#include <vector>
2121

22+
#include "ExclusiveAccess.h"
2223
#include "Shared/APITypes.h"
2324
#include "Shared/Debug.h"
2425
#include "Shared/Environment.h"
@@ -382,6 +383,32 @@ struct GenericKernelTy {
382383
bool IsBareKernel = false;
383384
};
384385

386+
/// Information about an allocation, when it has been allocated, and when/if it
387+
/// has been deallocated, for error reporting purposes.
388+
struct AllocationTraceInfoTy {
389+
390+
/// The stack trace of the allocation itself.
391+
std::string AllocationTrace;
392+
393+
/// The stack trace of the deallocation, or empty.
394+
std::string DeallocationTrace;
395+
396+
/// The allocated device pointer.
397+
void *DevicePtr = nullptr;
398+
399+
/// The corresponding host pointer (can be null).
400+
void *HostPtr = nullptr;
401+
402+
/// The size of the allocation.
403+
uint64_t Size = 0;
404+
405+
/// The kind of the allocation.
406+
TargetAllocTy Kind = TargetAllocTy::TARGET_ALLOC_DEFAULT;
407+
408+
/// Information about the last allocation at this address, if any.
409+
AllocationTraceInfoTy *LastAllocationInfo = nullptr;
410+
};
411+
385412
/// Class representing a map of host pinned allocations. We track these pinned
386413
/// allocations, so memory tranfers invloving these buffers can be optimized.
387414
class PinnedAllocationMapTy {
@@ -866,6 +893,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
866893
/// Reference to the underlying plugin that created this device.
867894
GenericPluginTy &Plugin;
868895

896+
/// Map to record when allocations have been performed, and when they have
897+
/// been deallocated, both for error reporting purposes.
898+
ProtectedObj<DenseMap<void *, AllocationTraceInfoTy *>> AllocationTraces;
899+
869900
private:
870901
/// Get and set the stack size and heap size for the device. If not used, the
871902
/// plugin can implement the setters as no-op and setting the output
@@ -916,6 +947,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
916947
UInt32Envar OMPX_InitialNumStreams;
917948
UInt32Envar OMPX_InitialNumEvents;
918949

950+
/// Environment variable to determine if stack traces for allocations and
951+
/// deallocations are tracked.
952+
BoolEnvar OMPX_TrackAllocationTraces =
953+
BoolEnvar("OFFLOAD_TRACK_ALLOCATION_TRACES", false);
954+
919955
/// Array of images loaded into the device. Images are automatically
920956
/// deallocated by the allocator.
921957
llvm::SmallVector<DeviceImageTy *> LoadedImages;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "Shared/Debug.h"
1515
#include "Shared/Environment.h"
1616

17+
#include "ErrorReporting.h"
1718
#include "GlobalHandler.h"
1819
#include "JIT.h"
1920
#include "Utils/ELF.h"
@@ -30,6 +31,8 @@
3031
#include "llvm/Support/JSON.h"
3132
#include "llvm/Support/MathExtras.h"
3233
#include "llvm/Support/MemoryBuffer.h"
34+
#include "llvm/Support/Signals.h"
35+
#include "llvm/Support/raw_ostream.h"
3336

3437
#include <cstdint>
3538
#include <limits>
@@ -1337,6 +1340,25 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
13371340
if (auto Err = PinnedAllocs.registerHostBuffer(Alloc, Alloc, Size))
13381341
return std::move(Err);
13391342

1343+
// Keep track of the allocation stack if we track allocation traces.
1344+
if (OMPX_TrackAllocationTraces) {
1345+
std::string StackTrace;
1346+
llvm::raw_string_ostream OS(StackTrace);
1347+
llvm::sys::PrintStackTrace(OS);
1348+
1349+
AllocationTraceInfoTy *ATI = new AllocationTraceInfoTy();
1350+
ATI->AllocationTrace = std::move(StackTrace);
1351+
ATI->DevicePtr = Alloc;
1352+
ATI->HostPtr = HostPtr;
1353+
ATI->Size = Size;
1354+
ATI->Kind = Kind;
1355+
1356+
auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
1357+
auto *&MapATI = (*AllocationTraceMap)[Alloc];
1358+
ATI->LastAllocationInfo = MapATI;
1359+
MapATI = ATI;
1360+
}
1361+
13401362
return Alloc;
13411363
}
13421364

@@ -1345,6 +1367,21 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
13451367
if (Plugin.getRecordReplay().isRecordingOrReplaying())
13461368
return Plugin::success();
13471369

1370+
// Keep track of the deallocation stack if we track allocation traces.
1371+
if (OMPX_TrackAllocationTraces) {
1372+
AllocationTraceInfoTy *ATI = nullptr;
1373+
{
1374+
auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
1375+
ATI = (*AllocationTraceMap)[TgtPtr];
1376+
}
1377+
1378+
std::string StackTrace;
1379+
llvm::raw_string_ostream OS(StackTrace);
1380+
llvm::sys::PrintStackTrace(OS);
1381+
1382+
ErrorReporter::checkDeallocation(this, TgtPtr, Kind, ATI, StackTrace);
1383+
}
1384+
13481385
int Res;
13491386
switch (Kind) {
13501387
case TARGET_ALLOC_DEFAULT:

offload/src/omptarget.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,9 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
462462
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
463463

464464
if (DeviceOrErr->deleteData(DevicePtr, Kind) == OFFLOAD_FAIL)
465-
FATAL_MESSAGE(DeviceNum, "%s", "Failed to deallocate device ptr");
465+
FATAL_MESSAGE(DeviceNum, "%s",
466+
"Failed to deallocate device ptr. Set "
467+
"OFFLOAD_TRACK_ALLOCATION_TRACES=1 to track allocations.");
466468

467469
DP("omp_target_free deallocated device ptr\n");
468470
}

0 commit comments

Comments
 (0)