-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Offload] Allow to record kernel launch stack traces #100472
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
#include "PluginInterface.h" | ||
#include "Shared/EnvironmentVar.h" | ||
|
||
#include "llvm/ADT/STLExtras.h" | ||
#include "llvm/ADT/SmallString.h" | ||
#include "llvm/ADT/StringRef.h" | ||
#include "llvm/Support/ErrorHandling.h" | ||
|
@@ -216,6 +217,90 @@ class ErrorReporter { | |
getAllocTyName(ATI->Kind).data(), DevicePtr); | ||
#undef DEALLOCATION_ERROR | ||
} | ||
|
||
/// Report that a kernel encountered a trap instruction. | ||
static void reportTrapInKernel( | ||
GenericDeviceTy &Device, KernelTraceInfoRecordTy &KTIR, | ||
std::function<bool(__tgt_async_info &)> AsyncInfoWrapperMatcher) { | ||
assert(AsyncInfoWrapperMatcher && "A matcher is required"); | ||
|
||
uint32_t Idx = 0; | ||
for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) { | ||
auto KTI = KTIR.getKernelTraceInfo(I); | ||
Comment on lines
+228
to
+229
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likely, is that better? It has no precedent in Offload rn, maybe worth introducing in one swoop, if it is preferred. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a huge deal, just tends to be cleaner. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can change it here for the final version, assuming I won't encounter any problems. |
||
if (KTI.Kernel == nullptr) | ||
break; | ||
// Skip kernels issued in other queues. | ||
if (KTI.AsyncInfo && !(AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) | ||
continue; | ||
Idx = I; | ||
break; | ||
} | ||
|
||
auto KTI = KTIR.getKernelTraceInfo(Idx); | ||
if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) | ||
reportError("Kernel '%s'", KTI.Kernel->getName()); | ||
reportError("execution interrupted by hardware trap instruction"); | ||
if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) { | ||
if (!KTI.LaunchTrace.empty()) | ||
reportStackTrace(KTI.LaunchTrace); | ||
else | ||
print(Yellow, "Use '%s=1' to show the stack trace of the kernel\n", | ||
Device.OMPX_TrackNumKernelLaunches.getName().data()); | ||
} | ||
abort(); | ||
} | ||
|
||
/// Report the kernel traces taken from \p KTIR, up to | ||
/// OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES many. | ||
static void reportKernelTraces(GenericDeviceTy &Device, | ||
KernelTraceInfoRecordTy &KTIR) { | ||
uint32_t NumKTIs = 0; | ||
for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) { | ||
auto KTI = KTIR.getKernelTraceInfo(I); | ||
if (KTI.Kernel == nullptr) | ||
break; | ||
++NumKTIs; | ||
} | ||
if (NumKTIs == 0) { | ||
print(BoldRed, "No kernel launches known\n"); | ||
return; | ||
} | ||
|
||
uint32_t TracesToShow = | ||
std::min(Device.OMPX_TrackNumKernelLaunches.get(), NumKTIs); | ||
if (TracesToShow == 0) { | ||
if (NumKTIs == 1) | ||
print(BoldLightPurple, "Display only launched kernel:\n"); | ||
else | ||
print(BoldLightPurple, "Display last %u kernels launched:\n", NumKTIs); | ||
} else { | ||
if (NumKTIs == 1) | ||
print(BoldLightPurple, "Display kernel launch trace:\n"); | ||
else | ||
print(BoldLightPurple, | ||
"Display %u of the %u last kernel launch traces:\n", TracesToShow, | ||
NumKTIs); | ||
} | ||
|
||
for (uint32_t Idx = 0, I = 0; I < NumKTIs; ++Idx) { | ||
auto KTI = KTIR.getKernelTraceInfo(Idx); | ||
if (NumKTIs == 1) | ||
print(BoldLightPurple, "Kernel '%s'\n", KTI.Kernel->getName()); | ||
else | ||
print(BoldLightPurple, "Kernel %d: '%s'\n", I, KTI.Kernel->getName()); | ||
reportStackTrace(KTI.LaunchTrace); | ||
++I; | ||
} | ||
|
||
if (NumKTIs != 1) { | ||
print(Yellow, | ||
"Use '%s=<num>' to adjust the number of shown stack traces (%u " | ||
"now, up to %zu)\n", | ||
Device.OMPX_TrackNumKernelLaunches.getName().data(), | ||
Device.OMPX_TrackNumKernelLaunches.get(), KTIR.size()); | ||
} | ||
// TODO: Let users know how to serialize kernels | ||
} | ||
jdoerfert marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
|
||
} // namespace plugin | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -412,6 +412,44 @@ struct AllocationTraceInfoTy { | |
std::mutex Lock; | ||
}; | ||
|
||
/// Information about an allocation, when it has been allocated, and when/if it | ||
/// has been deallocated, for error reporting purposes. | ||
struct KernelTraceInfoTy { | ||
|
||
/// The launched kernel. | ||
GenericKernelTy *Kernel; | ||
|
||
/// The stack trace of the launch itself. | ||
std::string LaunchTrace; | ||
|
||
/// The async info the kernel was launched in. | ||
__tgt_async_info *AsyncInfo; | ||
}; | ||
|
||
struct KernelTraceInfoRecordTy { | ||
KernelTraceInfoRecordTy() { KTIs.fill({}); } | ||
|
||
/// Return the (maximal) record size. | ||
auto size() const { return KTIs.size(); } | ||
|
||
/// Create a new kernel trace info and add it into the record. | ||
void emplace(GenericKernelTy *Kernel, const std::string &&StackTrace, | ||
__tgt_async_info *AsyncInfo) { | ||
KTIs[Idx] = {Kernel, std::move(StackTrace), AsyncInfo}; | ||
Idx = (Idx + 1) % size(); | ||
} | ||
|
||
/// Return the \p I'th last kernel trace info. | ||
auto getKernelTraceInfo(int32_t I) const { | ||
// Note that kernel trace infos "grow forward", so lookup is backwards. | ||
return KTIs[(Idx - I - 1 + size()) % size()]; | ||
} | ||
|
||
private: | ||
std::array<KernelTraceInfoTy, 8> KTIs; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we using a static array here? We could use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right now, I use it as a fixed size ring buffer. It is unclear to me why one would store more than the last few traces, at least for now. |
||
unsigned Idx = 0; | ||
}; | ||
|
||
/// Class representing a map of host pinned allocations. We track these pinned | ||
/// allocations, so memory tranfers invloving these buffers can be optimized. | ||
class PinnedAllocationMapTy { | ||
|
@@ -900,6 +938,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy { | |
/// been deallocated, both for error reporting purposes. | ||
ProtectedObj<DenseMap<void *, AllocationTraceInfoTy *>> AllocationTraces; | ||
|
||
/// Map to record kernel have been launchedl, for error reporting purposes. | ||
ProtectedObj<KernelTraceInfoRecordTy> KernelLaunchTraces; | ||
|
||
/// Environment variable to determine if stack traces for kernel launches are | ||
/// tracked. | ||
UInt32Envar OMPX_TrackNumKernelLaunches = | ||
UInt32Envar("OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES", 0); | ||
|
||
private: | ||
/// Get and set the stack size and heap size for the device. If not used, the | ||
/// plugin can implement the setters as no-op and setting the output | ||
|
Uh oh!
There was an error while loading. Please reload this page.