Skip to content

Commit 7cd0443

Browse files
authored
Merge pull request #2325 from AllanZyne/review/yang/restructure_asan_msan
[DeviceMSAN] Support MemorySanitizer for device offloading
2 parents ce69da1 + 3e662f1 commit 7cd0443

24 files changed

+3640
-158
lines changed

source/loader/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,21 @@ if(UR_ENABLE_SANITIZER)
151151
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_statistics.hpp
152152
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.cpp
153153
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.hpp
154+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_allocator.cpp
155+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_allocator.hpp
156+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_buffer.cpp
157+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_buffer.hpp
158+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_ddi.cpp
159+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_ddi.hpp
160+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.cpp
161+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.hpp
162+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_libdevice.hpp
163+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_options.cpp
164+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_options.hpp
165+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.cpp
166+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.hpp
167+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.cpp
168+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.hpp
154169
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp
155170
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp
156171
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp
@@ -160,6 +175,7 @@ if(UR_ENABLE_SANITIZER)
160175
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp
161176
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp
162177
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp
178+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp
163179
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.cpp
164180
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp
165181
)

source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,26 +2053,10 @@ __urdlllocal ur_result_t UR_APICALL urGetVirtualMemProcAddrTable(
20532053
}
20542054
} // namespace asan
20552055

2056-
ur_result_t context_t::init(ur_dditable_t *dditable,
2057-
const std::set<std::string> &enabledLayerNames,
2058-
[[maybe_unused]] codeloc_data codelocData) {
2056+
ur_result_t initAsanDDITable(ur_dditable_t *dditable) {
20592057
ur_result_t result = UR_RESULT_SUCCESS;
20602058

2061-
if (enabledLayerNames.count("UR_LAYER_ASAN")) {
2062-
enabledType = SanitizerType::AddressSanitizer;
2063-
initAsanInterceptor();
2064-
} else if (enabledLayerNames.count("UR_LAYER_MSAN")) {
2065-
enabledType = SanitizerType::MemorySanitizer;
2066-
} else if (enabledLayerNames.count("UR_LAYER_TSAN")) {
2067-
enabledType = SanitizerType::ThreadSanitizer;
2068-
}
2069-
2070-
// Only support AddressSanitizer now
2071-
if (enabledType != SanitizerType::AddressSanitizer) {
2072-
return result;
2073-
}
2074-
2075-
urDdiTable = *dditable;
2059+
getContext()->logger.always("==== DeviceSanitizer: ASAN");
20762060

20772061
if (UR_RESULT_SUCCESS == result) {
20782062
result = ur_sanitizer_layer::asan::urGetGlobalProcAddrTable(
@@ -2134,6 +2118,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable,
21342118
UR_API_VERSION_CURRENT, &dditable->VirtualMem);
21352119
}
21362120

2121+
if (result != UR_RESULT_SUCCESS) {
2122+
getContext()->logger.error("Initialize ASAN DDI table failed: {}",
2123+
result);
2124+
}
2125+
21372126
return result;
21382127
}
21392128

source/loader/layers/sanitizer/asan/asan_ddi.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ namespace ur_sanitizer_layer {
1717
void initAsanInterceptor();
1818
void destroyAsanInterceptor();
1919

20+
ur_result_t initAsanDDITable(ur_dditable_t *dditable);
21+
2022
} // namespace ur_sanitizer_layer

source/loader/layers/sanitizer/asan/asan_interceptor.cpp

Lines changed: 131 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -676,162 +676,155 @@ ur_result_t AsanInterceptor::prepareLaunch(
676676
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
677677
ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {
678678

679-
do {
680-
auto KernelInfo = getKernelInfo(Kernel);
681-
assert(KernelInfo && "Kernel should be instrumented");
682-
683-
// Validate pointer arguments
684-
if (getOptions().DetectKernelArguments) {
685-
for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) {
686-
auto Ptr = PtrPair.first;
687-
if (Ptr == nullptr) {
688-
continue;
689-
}
690-
if (auto ValidateResult = ValidateUSMPointer(
691-
ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
692-
ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr,
693-
ValidateResult, PtrPair.second);
694-
exitWithErrors();
695-
}
679+
auto KernelInfo = getKernelInfo(Kernel);
680+
assert(KernelInfo && "Kernel should be instrumented");
681+
682+
// Validate pointer arguments
683+
if (getOptions().DetectKernelArguments) {
684+
for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) {
685+
auto Ptr = PtrPair.first;
686+
if (Ptr == nullptr) {
687+
continue;
688+
}
689+
if (auto ValidateResult = ValidateUSMPointer(
690+
ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
691+
ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr,
692+
ValidateResult, PtrPair.second);
693+
exitWithErrors();
696694
}
697695
}
696+
}
698697

699-
// Set membuffer arguments
700-
for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) {
701-
char *ArgPointer = nullptr;
702-
UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer));
703-
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
704-
Kernel, ArgIndex, nullptr, ArgPointer);
705-
if (URes != UR_RESULT_SUCCESS) {
706-
getContext()->logger.error(
707-
"Failed to set buffer {} as the {} arg to kernel {}: {}",
708-
ur_cast<ur_mem_handle_t>(MemBuffer.get()), ArgIndex, Kernel,
709-
URes);
710-
}
698+
// Set membuffer arguments
699+
for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) {
700+
char *ArgPointer = nullptr;
701+
UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer));
702+
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
703+
Kernel, ArgIndex, nullptr, ArgPointer);
704+
if (URes != UR_RESULT_SUCCESS) {
705+
getContext()->logger.error(
706+
"Failed to set buffer {} as the {} arg to kernel {}: {}",
707+
ur_cast<ur_mem_handle_t>(MemBuffer.get()), ArgIndex, Kernel,
708+
URes);
711709
}
710+
}
712711

713-
auto ArgNums = GetKernelNumArgs(Kernel);
714-
// We must prepare all kernel args before call
715-
// urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
716-
// CPU device.
717-
if (ArgNums) {
718-
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
719-
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr());
720-
if (URes != UR_RESULT_SUCCESS) {
721-
getContext()->logger.error("Failed to set launch info: {}",
722-
URes);
723-
return URes;
724-
}
712+
auto ArgNums = GetKernelNumArgs(Kernel);
713+
// We must prepare all kernel args before call
714+
// urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
715+
// CPU device.
716+
if (ArgNums) {
717+
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
718+
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr());
719+
if (URes != UR_RESULT_SUCCESS) {
720+
getContext()->logger.error("Failed to set launch info: {}", URes);
721+
return URes;
725722
}
723+
}
726724

727-
if (LaunchInfo.LocalWorkSize.empty()) {
728-
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
729-
auto URes =
730-
getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
731-
Kernel, Queue, LaunchInfo.WorkDim,
732-
LaunchInfo.GlobalWorkOffset, LaunchInfo.GlobalWorkSize,
733-
LaunchInfo.LocalWorkSize.data());
734-
if (URes != UR_RESULT_SUCCESS) {
735-
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
736-
return URes;
737-
}
738-
// If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
739-
// to inefficient implementation
740-
for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
741-
LaunchInfo.LocalWorkSize[Dim] = 1;
742-
}
725+
if (LaunchInfo.LocalWorkSize.empty()) {
726+
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
727+
auto URes =
728+
getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
729+
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
730+
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
731+
if (URes != UR_RESULT_SUCCESS) {
732+
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
733+
return URes;
734+
}
735+
// If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
736+
// to inefficient implementation
737+
for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
738+
LaunchInfo.LocalWorkSize[Dim] = 1;
743739
}
744740
}
741+
}
745742

746-
const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data();
747-
uint32_t NumWG = 1;
748-
for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
749-
NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) /
750-
LocalWorkSize[Dim];
751-
}
743+
const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data();
744+
uint32_t NumWG = 1;
745+
for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
746+
NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) /
747+
LocalWorkSize[Dim];
748+
}
752749

753-
// Prepare asan runtime data
754-
LaunchInfo.Data.Host.GlobalShadowOffset =
755-
DeviceInfo->Shadow->ShadowBegin;
756-
LaunchInfo.Data.Host.GlobalShadowOffsetEnd =
757-
DeviceInfo->Shadow->ShadowEnd;
758-
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
759-
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;
760-
761-
auto LocalMemoryUsage =
762-
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
763-
auto PrivateMemoryUsage =
764-
GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle);
765-
766-
getContext()->logger.info(
767-
"KernelInfo {} (LocalMemory={}, PrivateMemory={})", (void *)Kernel,
768-
LocalMemoryUsage, PrivateMemoryUsage);
769-
770-
// Write shadow memory offset for local memory
771-
if (getOptions().DetectLocals) {
772-
if (DeviceInfo->Shadow->AllocLocalShadow(
773-
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
774-
LaunchInfo.Data.Host.LocalShadowOffsetEnd) !=
775-
UR_RESULT_SUCCESS) {
776-
getContext()->logger.warning(
777-
"Failed to allocate shadow memory for local "
778-
"memory, maybe the number of workgroup ({}) is too "
779-
"large",
780-
NumWG);
781-
getContext()->logger.warning(
782-
"Skip checking local memory of kernel <{}>",
783-
GetKernelName(Kernel));
784-
} else {
785-
getContext()->logger.info(
786-
"ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
787-
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
788-
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
789-
}
750+
// Prepare asan runtime data
751+
LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
752+
LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
753+
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
754+
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;
755+
756+
auto LocalMemoryUsage =
757+
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
758+
auto PrivateMemoryUsage =
759+
GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle);
760+
761+
getContext()->logger.info(
762+
"KernelInfo {} (LocalMemory={}, PrivateMemory={})", (void *)Kernel,
763+
LocalMemoryUsage, PrivateMemoryUsage);
764+
765+
// Write shadow memory offset for local memory
766+
if (getOptions().DetectLocals) {
767+
if (DeviceInfo->Shadow->AllocLocalShadow(
768+
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
769+
LaunchInfo.Data.Host.LocalShadowOffsetEnd) !=
770+
UR_RESULT_SUCCESS) {
771+
getContext()->logger.warning(
772+
"Failed to allocate shadow memory for local "
773+
"memory, maybe the number of workgroup ({}) is too "
774+
"large",
775+
NumWG);
776+
getContext()->logger.warning(
777+
"Skip checking local memory of kernel <{}>",
778+
GetKernelName(Kernel));
779+
} else {
780+
getContext()->logger.info(
781+
"ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
782+
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
783+
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
790784
}
785+
}
791786

792-
// Write shadow memory offset for private memory
793-
if (getOptions().DetectPrivates) {
794-
if (DeviceInfo->Shadow->AllocPrivateShadow(
795-
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
796-
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) !=
797-
UR_RESULT_SUCCESS) {
798-
getContext()->logger.warning(
799-
"Failed to allocate shadow memory for private "
800-
"memory, maybe the number of workgroup ({}) is too "
801-
"large",
802-
NumWG);
803-
getContext()->logger.warning(
804-
"Skip checking private memory of kernel <{}>",
805-
GetKernelName(Kernel));
806-
} else {
807-
getContext()->logger.info(
808-
"ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
809-
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
810-
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
811-
}
787+
// Write shadow memory offset for private memory
788+
if (getOptions().DetectPrivates) {
789+
if (DeviceInfo->Shadow->AllocPrivateShadow(
790+
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
791+
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) !=
792+
UR_RESULT_SUCCESS) {
793+
getContext()->logger.warning(
794+
"Failed to allocate shadow memory for private "
795+
"memory, maybe the number of workgroup ({}) is too "
796+
"large",
797+
NumWG);
798+
getContext()->logger.warning(
799+
"Skip checking private memory of kernel <{}>",
800+
GetKernelName(Kernel));
801+
} else {
802+
getContext()->logger.info(
803+
"ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
804+
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
805+
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
812806
}
807+
}
813808

814-
// Write local arguments info
815-
if (!KernelInfo->LocalArgs.empty()) {
816-
std::vector<LocalArgsInfo> LocalArgsInfo;
817-
for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) {
818-
LocalArgsInfo.push_back(ArgInfo);
819-
getContext()->logger.debug(
820-
"local_args (argIndex={}, size={}, sizeWithRZ={})",
821-
ArgIndex, ArgInfo.Size, ArgInfo.SizeWithRedZone);
822-
}
823-
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
809+
// Write local arguments info
810+
if (!KernelInfo->LocalArgs.empty()) {
811+
std::vector<LocalArgsInfo> LocalArgsInfo;
812+
for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) {
813+
LocalArgsInfo.push_back(ArgInfo);
814+
getContext()->logger.debug(
815+
"local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex,
816+
ArgInfo.Size, ArgInfo.SizeWithRedZone);
824817
}
818+
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
819+
}
825820

826-
// sync asan runtime data to device side
827-
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));
821+
// sync asan runtime data to device side
822+
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));
828823

829-
getContext()->logger.debug(
830-
"launch_info {} (numLocalArgs={}, localArgs={})",
831-
(void *)LaunchInfo.Data.getDevicePtr(),
832-
LaunchInfo.Data.Host.NumLocalArgs,
833-
(void *)LaunchInfo.Data.Host.LocalArgs);
834-
} while (false);
824+
getContext()->logger.debug("launch_info {} (numLocalArgs={}, localArgs={})",
825+
(void *)LaunchInfo.Data.getDevicePtr(),
826+
LaunchInfo.Data.Host.NumLocalArgs,
827+
(void *)LaunchInfo.Data.Host.LocalArgs);
835828

836829
return UR_RESULT_SUCCESS;
837830
}

source/loader/layers/sanitizer/asan/asan_shadow.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
#pragma once
1414

15-
#include "asan/asan_allocator.hpp"
15+
#include "asan_allocator.hpp"
1616
#include "sanitizer_common/sanitizer_libdevice.hpp"
1717

1818
#include <unordered_set>
@@ -134,7 +134,7 @@ struct ShadowMemoryPVC final : public ShadowMemoryGPU {
134134
size_t GetShadowSize() override { return 0x180000000000ULL; }
135135
};
136136

137-
/// Shadow Memory layout of GPU PVC device
137+
/// Shadow Memory layout of GPU DG2 device
138138
///
139139
/// USM Allocation Range (48 bits)
140140
/// Host/Shared USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff

0 commit comments

Comments
 (0)