Skip to content

[Offload] Implement double free (and other allocation error) reporting #100261

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 226 additions & 0 deletions offload/plugins-nextgen/common/include/ErrorReporting.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
//===- ErrorReporting.h - Helper to provide nice error messages ----- c++ -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//

#ifndef OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
#define OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H

#include "PluginInterface.h"
#include "Shared/EnvironmentVar.h"

#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"

#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <functional>
#include <optional>
#include <string>
#include <unistd.h>

namespace llvm {
namespace omp {
namespace target {
namespace plugin {

class ErrorReporter {

enum ColorTy {
Yellow = int(HighlightColor::Address),
Green = int(HighlightColor::String),
DarkBlue = int(HighlightColor::Tag),
Cyan = int(HighlightColor::Attribute),
DarkPurple = int(HighlightColor::Enumerator),
DarkRed = int(HighlightColor::Macro),
BoldRed = int(HighlightColor::Error),
BoldLightPurple = int(HighlightColor::Warning),
BoldDarkGrey = int(HighlightColor::Note),
BoldLightBlue = int(HighlightColor::Remark),
};

/// The banner printed at the beginning of an error report.
static constexpr auto ErrorBanner = "OFFLOAD ERROR: ";

/// Return the device id as string, or n/a if not available.
static std::string getDeviceIdStr(GenericDeviceTy *Device) {
return Device ? std::to_string(Device->getDeviceId()) : "n/a";
}

/// Return a nice name for an TargetAllocTy.
static StringRef getAllocTyName(TargetAllocTy Kind) {
switch (Kind) {
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
return "device memory";
case TARGET_ALLOC_HOST:
return "pinned host memory";
case TARGET_ALLOC_SHARED:
return "managed memory";
break;
}
llvm_unreachable("Unknown target alloc kind");
}

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wgcc-compat"
#pragma clang diagnostic ignored "-Wformat-security"
/// Print \p Format, instantiated with \p Args to stderr.
/// TODO: Allow redirection into a file stream.
template <typename... ArgsTy>
[[gnu::format(__printf__, 1, 2)]] static void print(const char *Format,
ArgsTy &&...Args) {
raw_fd_ostream OS(STDERR_FILENO, false);
OS << llvm::format(Format, Args...);
}

/// Print \p Format, instantiated with \p Args to stderr, but colored.
/// TODO: Allow redirection into a file stream.
template <typename... ArgsTy>
[[gnu::format(__printf__, 2, 3)]] static void
print(ColorTy Color, const char *Format, ArgsTy &&...Args) {
raw_fd_ostream OS(STDERR_FILENO, false);
WithColor(OS, HighlightColor(Color)) << llvm::format(Format, Args...);
}

/// Print \p Format, instantiated with \p Args to stderr, but colored and with
/// a banner.
/// TODO: Allow redirection into a file stream.
template <typename... ArgsTy>
[[gnu::format(__printf__, 1, 2)]] static void reportError(const char *Format,
ArgsTy &&...Args) {
print(BoldRed, "%s", ErrorBanner);
print(BoldRed, Format, Args...);
print("\n");
}
#pragma clang diagnostic pop

static void reportError(const char *Str) { reportError("%s", Str); }
static void print(const char *Str) { print("%s", Str); }
static void print(StringRef Str) { print("%s", Str.str().c_str()); }
static void print(ColorTy Color, const char *Str) { print(Color, "%s", Str); }
static void print(ColorTy Color, StringRef Str) {
print(Color, "%s", Str.str().c_str());
}

/// Pretty print a stack trace.
static void reportStackTrace(StringRef StackTrace) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that LLVM had their own format for presenting stack traces. I guess we want to add more color and information than what it exports?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use LLVM to get the stack trace as string.
From there, we 1) remove the first line (nobody want's to know we are printing the stack trace) 2) add color and 3) decide where to print it to (for now stderr).

Top is LLVMs stack trace as printed to errs(), bottom what we print with this.
Screenshot 2024-07-25 at 12 56 16 PM

if (StackTrace.empty())
return;

SmallVector<StringRef> Lines, Parts;
StackTrace.split(Lines, "\n", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
int Start = Lines.empty() || !Lines[0].contains("PrintStackTrace") ? 0 : 1;
unsigned NumDigits =
(int)(floor(log10(Lines.size() - Start - /*0*/ 1)) + 1);
for (int I = Start, E = Lines.size(); I < E; ++I) {
auto Line = Lines[I];
Parts.clear();
Line = Line.drop_while([](char C) { return std::isspace(C); });
Line.split(Parts, " ", /*MaxSplit=*/2);
if (Parts.size() != 3 || Parts[0].size() < 2 || Parts[0][0] != '#') {
print("%s\n", Line.str().c_str());
continue;
}
unsigned FrameIdx = std::stoi(Parts[0].drop_front(1).str());
if (Start)
FrameIdx -= 1;
print(DarkPurple, " %s", Parts[0].take_front().str().c_str());
print(Green, "%*u", NumDigits, FrameIdx);
print(BoldLightBlue, " %s", Parts[1].str().c_str());
print(" %s\n", Parts[2].str().c_str());
}
print("\n");
}

/// Report information about an allocation associated with \p ATI.
static void reportAllocationInfo(AllocationTraceInfoTy *ATI) {
if (!ATI)
return;

if (!ATI->DeallocationTrace.empty()) {
print(BoldLightPurple, "Last deallocation:\n");
reportStackTrace(ATI->DeallocationTrace);
}

if (ATI->HostPtr)
print(BoldLightPurple,
"Last allocation of size %lu for host pointer %p:\n", ATI->Size,
ATI->HostPtr);
else
print(BoldLightPurple, "Last allocation of size %lu:\n", ATI->Size);
reportStackTrace(ATI->AllocationTrace);
if (!ATI->LastAllocationInfo)
return;

unsigned I = 0;
print(BoldLightPurple, "Prior allocations with the same base pointer:");
while (ATI->LastAllocationInfo) {
print("\n");
ATI = ATI->LastAllocationInfo;
print(BoldLightPurple, " #%u Prior deallocation of size %lu:\n", I,
ATI->Size);
reportStackTrace(ATI->DeallocationTrace);
if (ATI->HostPtr)
print(BoldLightPurple, " #%u Prior allocation for host pointer %p:\n",
I, ATI->HostPtr);
else
print(BoldLightPurple, " #%u Prior allocation:\n", I);
reportStackTrace(ATI->AllocationTrace);
++I;
}
}

/// End the execution of the program.
static void abortExecution() { abort(); }

public:
#define DEALLOCATION_ERROR(Format, ...) \
reportError(Format, __VA_ARGS__); \
reportStackTrace(StackTrace); \
reportAllocationInfo(ATI); \
abortExecution();

static void reportDeallocationOfNonAllocatedPtr(void *DevicePtr,
TargetAllocTy Kind,
AllocationTraceInfoTy *ATI,
std::string &StackTrace) {
DEALLOCATION_ERROR("deallocation of non-allocated %s: %p",
getAllocTyName(Kind).data(), DevicePtr);
}

static void reportDeallocationOfDeallocatedPtr(void *DevicePtr,
TargetAllocTy Kind,
AllocationTraceInfoTy *ATI,
std::string &StackTrace) {
DEALLOCATION_ERROR("double-free of %s: %p", getAllocTyName(Kind).data(),
DevicePtr);
}

static void reportDeallocationOfWrongPtrKind(void *DevicePtr,
TargetAllocTy Kind,
AllocationTraceInfoTy *ATI,
std::string &StackTrace) {
DEALLOCATION_ERROR("deallocation requires %s but allocation was %s: %p",
getAllocTyName(Kind).data(),
getAllocTyName(ATI->Kind).data(), DevicePtr);
#undef DEALLOCATION_ERROR
}
};

} // namespace plugin
} // namespace target
} // namespace omp
} // namespace llvm

#endif // OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
39 changes: 39 additions & 0 deletions offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <shared_mutex>
#include <vector>

#include "ExclusiveAccess.h"
#include "Shared/APITypes.h"
#include "Shared/Debug.h"
#include "Shared/Environment.h"
Expand Down Expand Up @@ -382,6 +383,35 @@ struct GenericKernelTy {
bool IsBareKernel = false;
};

/// Information about an allocation, when it has been allocated, and when/if it
/// has been deallocated, for error reporting purposes.
struct AllocationTraceInfoTy {

/// The stack trace of the allocation itself.
std::string AllocationTrace;

/// The stack trace of the deallocation, or empty.
std::string DeallocationTrace;

/// The allocated device pointer.
void *DevicePtr = nullptr;

/// The corresponding host pointer (can be null).
void *HostPtr = nullptr;

/// The size of the allocation.
uint64_t Size = 0;

/// The kind of the allocation.
TargetAllocTy Kind = TargetAllocTy::TARGET_ALLOC_DEFAULT;

/// Information about the last allocation at this address, if any.
AllocationTraceInfoTy *LastAllocationInfo = nullptr;

/// Lock to keep accesses race free.
std::mutex Lock;
};

/// Class representing a map of host pinned allocations. We track these pinned
/// allocations, so memory tranfers invloving these buffers can be optimized.
class PinnedAllocationMapTy {
Expand Down Expand Up @@ -866,6 +896,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Reference to the underlying plugin that created this device.
GenericPluginTy &Plugin;

/// Map to record when allocations have been performed, and when they have
/// been deallocated, both for error reporting purposes.
ProtectedObj<DenseMap<void *, AllocationTraceInfoTy *>> AllocationTraces;

private:
/// Get and set the stack size and heap size for the device. If not used, the
/// plugin can implement the setters as no-op and setting the output
Expand Down Expand Up @@ -916,6 +950,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
UInt32Envar OMPX_InitialNumStreams;
UInt32Envar OMPX_InitialNumEvents;

/// Environment variable to determine if stack traces for allocations and
/// deallocations are tracked.
BoolEnvar OMPX_TrackAllocationTraces =
BoolEnvar("OFFLOAD_TRACK_ALLOCATION_TRACES", false);

/// Array of images loaded into the device. Images are automatically
/// deallocated by the allocator.
llvm::SmallVector<DeviceImageTy *> LoadedImages;
Expand Down
53 changes: 53 additions & 0 deletions offload/plugins-nextgen/common/src/PluginInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "Shared/Debug.h"
#include "Shared/Environment.h"

#include "ErrorReporting.h"
#include "GlobalHandler.h"
#include "JIT.h"
#include "Utils/ELF.h"
Expand All @@ -30,6 +31,8 @@
#include "llvm/Support/JSON.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"

#include <cstdint>
#include <limits>
Expand Down Expand Up @@ -1337,6 +1340,25 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
if (auto Err = PinnedAllocs.registerHostBuffer(Alloc, Alloc, Size))
return std::move(Err);

// Keep track of the allocation stack if we track allocation traces.
if (OMPX_TrackAllocationTraces) {
std::string StackTrace;
llvm::raw_string_ostream OS(StackTrace);
llvm::sys::PrintStackTrace(OS);

AllocationTraceInfoTy *ATI = new AllocationTraceInfoTy();
ATI->AllocationTrace = std::move(StackTrace);
ATI->DevicePtr = Alloc;
ATI->HostPtr = HostPtr;
ATI->Size = Size;
ATI->Kind = Kind;

auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
auto *&MapATI = (*AllocationTraceMap)[Alloc];
ATI->LastAllocationInfo = MapATI;
MapATI = ATI;
}

return Alloc;
}

Expand All @@ -1345,6 +1367,37 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
if (Plugin.getRecordReplay().isRecordingOrReplaying())
return Plugin::success();

// Keep track of the deallocation stack if we track allocation traces.
if (OMPX_TrackAllocationTraces) {
AllocationTraceInfoTy *ATI = nullptr;
{
auto AllocationTraceMap = AllocationTraces.getExclusiveAccessor();
ATI = (*AllocationTraceMap)[TgtPtr];
}

std::string StackTrace;
llvm::raw_string_ostream OS(StackTrace);
llvm::sys::PrintStackTrace(OS);

if (!ATI)
ErrorReporter::reportDeallocationOfNonAllocatedPtr(TgtPtr, Kind, ATI,
StackTrace);

// ATI is not null, thus we can lock it to inspect and modify it further.
std::lock_guard<std::mutex> LG(ATI->Lock);
if (!ATI->DeallocationTrace.empty())
ErrorReporter::reportDeallocationOfDeallocatedPtr(TgtPtr, Kind, ATI,
StackTrace);

if (ATI->Kind != Kind)
ErrorReporter::reportDeallocationOfWrongPtrKind(TgtPtr, Kind, ATI,
StackTrace);

ATI->DeallocationTrace = StackTrace;

#undef DEALLOCATION_ERROR
}

int Res;
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
Expand Down
4 changes: 3 additions & 1 deletion offload/src/omptarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,9 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());

if (DeviceOrErr->deleteData(DevicePtr, Kind) == OFFLOAD_FAIL)
FATAL_MESSAGE(DeviceNum, "%s", "Failed to deallocate device ptr");
FATAL_MESSAGE(DeviceNum, "%s",
"Failed to deallocate device ptr. Set "
"OFFLOAD_TRACK_ALLOCATION_TRACES=1 to track allocations.");

DP("omp_target_free deallocated device ptr\n");
}
Expand Down
Loading
Loading