Skip to content

Commit 4b3a6e9

Browse files
feature: capture multiple cpu pagefault handler
Recorded multiple page fault handlers by using vector in cpu_page_fault_manager_linux. Added a static handlerIndex in order to track the depth of handler logic to call appropriate previous handlers. Related-To: NEO-11563 Signed-off-by: Young Jin Yoon <[email protected]>
1 parent 8e5e3d1 commit 4b3a6e9

11 files changed

+317
-35
lines changed

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, WaitForPagingFenceInController, -1, "Instead of
442442

443443
/*FEATURE FLAGS*/
444444
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, false, "Evict USM allocation after implicit migration to GPU")
445-
DECLARE_DEBUG_VARIABLE(bool, RegisterPageFaultHandlerOnMigration, true, "Register handler on migration to GPU when current is not from pagefault manager")
445+
DECLARE_DEBUG_VARIABLE(bool, RegisterPageFaultHandlerOnMigration, false, "Register handler on migration to GPU when current is not from pagefault manager")
446446
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
447447
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
448448
DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter")

shared/source/page_fault_manager/cpu_page_fault_manager.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,16 @@ inline void PageFaultManager::migrateStorageToGpuDomain(void *ptr, PageFaultData
9595
pageFaultData.domain = AllocationDomain::gpu;
9696
}
9797

98-
bool PageFaultManager::verifyPageFault(void *ptr) {
98+
bool PageFaultManager::verifyAndHandlePageFault(void *ptr, bool handlePageFault) {
9999
std::unique_lock<SpinLock> lock{mtx};
100100
for (auto &alloc : this->memoryData) {
101101
auto allocPtr = alloc.first;
102102
auto &pageFaultData = alloc.second;
103103
if (ptr >= allocPtr && ptr < ptrOffset(allocPtr, pageFaultData.size)) {
104-
this->setAubWritable(true, allocPtr, pageFaultData.unifiedMemoryManager);
105-
gpuDomainHandler(this, allocPtr, pageFaultData);
104+
if (handlePageFault) {
105+
this->setAubWritable(true, allocPtr, pageFaultData.unifiedMemoryManager);
106+
gpuDomainHandler(this, allocPtr, pageFaultData);
107+
}
106108
return true;
107109
}
108110
}

shared/source/page_fault_manager/cpu_page_fault_manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class PageFaultManager : public NonCopyableOrMovableClass {
5959
virtual void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) = 0;
6060
virtual void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) = 0;
6161

62-
MOCKABLE_VIRTUAL bool verifyPageFault(void *ptr);
62+
MOCKABLE_VIRTUAL bool verifyAndHandlePageFault(void *ptr, bool handlePageFault);
6363
MOCKABLE_VIRTUAL void transferToGpu(void *ptr, void *cmdQ);
6464
MOCKABLE_VIRTUAL void setAubWritable(bool writable, void *ptr, SVMAllocsManager *unifiedMemoryManager);
6565
MOCKABLE_VIRTUAL void setCpuAllocEvictable(bool evictable, void *ptr, SVMAllocsManager *unifiedMemoryManager);

shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "shared/source/helpers/debug_helpers.h"
1414
#include "shared/source/memory_manager/memory_operations_handler.h"
1515

16+
#include <algorithm>
1617
#include <sys/mman.h>
1718

1819
namespace NEO {
@@ -35,8 +36,9 @@ PageFaultManagerLinux::PageFaultManagerLinux() {
3536

3637
PageFaultManagerLinux::~PageFaultManagerLinux() {
3738
if (!previousHandlerRestored) {
38-
auto retVal = sigaction(SIGSEGV, &previousPageFaultHandler, nullptr);
39+
auto retVal = sigaction(SIGSEGV, &previousPageFaultHandlers[0], nullptr);
3940
UNRECOVERABLE_IF(retVal != 0);
41+
previousPageFaultHandlers.clear();
4042
}
4143
}
4244

@@ -47,8 +49,21 @@ bool PageFaultManagerLinux::checkFaultHandlerFromPageFaultManager() {
4749
}
4850

4951
void PageFaultManagerLinux::registerFaultHandler() {
52+
struct sigaction previousPageFaultHandler = {};
53+
auto retVal = sigaction(SIGSEGV, nullptr, &previousPageFaultHandler);
54+
UNRECOVERABLE_IF(retVal != 0);
55+
56+
auto compareHandler = [&ph = previousPageFaultHandler](const struct sigaction &h) -> bool {
57+
return (h.sa_flags & SA_SIGINFO) ? (h.sa_sigaction == ph.sa_sigaction) : (h.sa_handler == ph.sa_handler);
58+
};
59+
if (std::find_if(previousPageFaultHandlers.begin(),
60+
previousPageFaultHandlers.end(),
61+
compareHandler) == previousPageFaultHandlers.end()) {
62+
previousPageFaultHandlers.push_back(previousPageFaultHandler);
63+
}
64+
5065
pageFaultHandler = [&](int signal, siginfo_t *info, void *context) {
51-
if (!this->verifyPageFault(info->si_addr)) {
66+
if (!this->verifyAndHandlePageFault(info->si_addr, this->handlerIndex == 0)) {
5267
callPreviousHandler(signal, info, context);
5368
}
5469
};
@@ -57,7 +72,7 @@ void PageFaultManagerLinux::registerFaultHandler() {
5772
pageFaultManagerHandler.sa_flags = SA_SIGINFO;
5873
pageFaultManagerHandler.sa_sigaction = pageFaultHandlerWrapper;
5974

60-
auto retVal = sigaction(SIGSEGV, &pageFaultManagerHandler, &previousPageFaultHandler);
75+
retVal = sigaction(SIGSEGV, &pageFaultManagerHandler, &previousPageFaultHandler);
6176
UNRECOVERABLE_IF(retVal != 0);
6277
}
6378

@@ -76,19 +91,22 @@ void PageFaultManagerLinux::protectCPUMemoryAccess(void *ptr, size_t size) {
7691
}
7792

7893
void PageFaultManagerLinux::callPreviousHandler(int signal, siginfo_t *info, void *context) {
94+
handlerIndex++;
95+
UNRECOVERABLE_IF(handlerIndex < 0 && handlerIndex >= static_cast<int>(previousPageFaultHandlers.size()));
96+
auto previousPageFaultHandler = previousPageFaultHandlers[previousPageFaultHandlers.size() - handlerIndex];
7997
if (previousPageFaultHandler.sa_flags & SA_SIGINFO) {
8098
previousPageFaultHandler.sa_sigaction(signal, info, context);
8199
} else {
82100
if (previousPageFaultHandler.sa_handler == SIG_DFL) {
83101
auto retVal = sigaction(SIGSEGV, &previousPageFaultHandler, nullptr);
84102
UNRECOVERABLE_IF(retVal != 0);
85103
previousHandlerRestored = true;
86-
} else if (previousPageFaultHandler.sa_handler == SIG_IGN) {
87-
return;
88-
} else {
104+
previousPageFaultHandlers.clear();
105+
} else if (previousPageFaultHandler.sa_handler != SIG_IGN) {
89106
previousPageFaultHandler.sa_handler(signal);
90107
}
91108
}
109+
handlerIndex--;
92110
}
93111

94112
void PageFaultManagerLinux::evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) {

shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <csignal>
1313
#include <functional>
14+
#include <vector>
1415

1516
namespace NEO {
1617
class PageFaultManagerLinux : public PageFaultManager {
@@ -35,8 +36,9 @@ class PageFaultManagerLinux : public PageFaultManager {
3536

3637
static std::function<void(int signal, siginfo_t *info, void *context)> pageFaultHandler;
3738

38-
struct sigaction previousPageFaultHandler = {};
39+
std::vector<struct sigaction> previousPageFaultHandlers;
3940

4041
bool evictMemoryAfterCopy = false;
42+
int handlerIndex = 0;
4143
};
4244
} // namespace NEO

shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ bool PageFaultManagerWindows::checkFaultHandlerFromPageFaultManager() {
3939
void PageFaultManagerWindows::registerFaultHandler() {
4040
pageFaultHandler = [this](struct _EXCEPTION_POINTERS *exceptionInfo) {
4141
if (static_cast<long>(exceptionInfo->ExceptionRecord->ExceptionCode) == EXCEPTION_ACCESS_VIOLATION) {
42-
if (this->verifyPageFault(reinterpret_cast<void *>(exceptionInfo->ExceptionRecord->ExceptionInformation[1]))) {
42+
if (this->verifyAndHandlePageFault(reinterpret_cast<void *>(exceptionInfo->ExceptionRecord->ExceptionInformation[1]), true)) {
4343
// this is our fault that we serviced, continue app execution
4444
return EXCEPTION_CONTINUE_EXECUTION;
4545
}

shared/test/common/mocks/mock_cpu_page_fault_manager.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ class MockPageFaultManager : public PageFaultManager {
2323
using PageFaultManager::selectGpuDomainHandler;
2424
using PageFaultManager::transferAndUnprotectMemory;
2525
using PageFaultManager::unprotectAndTransferMemory;
26-
using PageFaultManager::verifyPageFault;
26+
using PageFaultManager::verifyAndHandlePageFault;
2727

2828
bool checkFaultHandlerFromPageFaultManager() override {
2929
checkFaultHandlerCalled++;
30-
return (registerFaultHandlerCalled != 0);
30+
return isFaultHandlerFromPageFaultManager;
3131
}
3232
void registerFaultHandler() override {
3333
registerFaultHandlerCalled++;
@@ -115,6 +115,7 @@ class MockPageFaultManager : public PageFaultManager {
115115
size_t protectedSize = 0;
116116
bool isAubWritable = true;
117117
bool isCpuAllocEvictable = true;
118+
bool isFaultHandlerFromPageFaultManager = false;
118119
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
119120
EngineUsage engineUsage = EngineUsage::engineUsageCount;
120121
};
@@ -130,8 +131,8 @@ class MockPageFaultManagerHandlerInvoke : public T {
130131
using T::registerFaultHandler;
131132
using T::T;
132133

133-
bool verifyPageFault(void *ptr) override {
134-
handlerInvoked = true;
134+
bool verifyAndHandlePageFault(void *ptr, bool handlePageFault) override {
135+
handlerInvoked = handlePageFault;
135136
if (allowCPUMemoryAccessOnPageFault) {
136137
this->allowCPUMemoryAccess(ptr, size);
137138
}

shared/test/common/test_files/igdrcl.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ SetCommandStreamReceiver = -1
2727
TbxPort = 4321
2828
TbxFrontdoorMode = 0
2929
FlattenBatchBufferForAUBDump = 0
30-
RegisterPageFaultHandlerOnMigration = 1
30+
RegisterPageFaultHandlerOnMigration = 0
3131
AddPatchInfoCommentsForAUBDump = 0
3232
UseAubStream = 1
3333
AUBDumpAllocsOnEnqueueReadOnly = 0

0 commit comments

Comments
 (0)