Skip to content

Commit b84b001

Browse files
Move and extend local memory DRM allocations
Signed-off-by: Daniel Chabrowski [email protected] Related-To: NEO-6591
1 parent ad6fc15 commit b84b001

File tree

12 files changed

+221
-41
lines changed

12 files changed

+221
-41
lines changed

level_zero/core/source/event/event.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,14 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
104104
}
105105

106106
} else {
107+
const bool isShareable = (eventPoolFlags & ZE_EVENT_POOL_FLAG_IPC);
108+
if (isShareable) {
109+
allocationType = NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
110+
}
111+
107112
NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), alignedSize, allocationType, systemMemoryBitfield};
108113
allocationProperties.alignment = eventAlignment;
114+
allocationProperties.flags.shareable = isShareable;
109115

110116
std::vector<uint32_t> rootDeviceIndicesVector = {rootDeviceIndices.begin(), rootDeviceIndices.end()};
111117
eventPoolPtr = driver->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector,

level_zero/core/test/unit_tests/sources/event/test_event.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenHandleAndNum
258258
ze_event_pool_desc_t eventPoolDesc = {
259259
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
260260
nullptr,
261-
ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
261+
ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_IPC,
262262
numEvents};
263263

264264
auto deviceHandle = device->toHandle();

opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,34 @@ TEST_F(DrmMemoryManagerTest, GivenShareableEnabledWhenAskedToCreateGraphicsAlloc
12401240
memoryManager->freeGraphicsMemory(allocation);
12411241
}
12421242

1243+
TEST_F(DrmMemoryManagerTest, GivenAllocationTypeThatRequiresCpuAccessForKmdAllocationThenLockTheResourceIsCalled) {
1244+
struct DrmMemoryManagerToTestLockResource : public DrmMemoryManager {
1245+
using DrmMemoryManager::allocateMemoryByKMD;
1246+
1247+
DrmMemoryManagerToTestLockResource(ExecutionEnvironment &executionEnvironment)
1248+
: DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) {
1249+
}
1250+
void *lockResourceImpl(GraphicsAllocation &allocation) override {
1251+
return reinterpret_cast<void *>(0xDEADBEEF);
1252+
}
1253+
};
1254+
1255+
DrmMemoryManagerToTestLockResource drmMemoryManager(*executionEnvironment);
1256+
1257+
mock->ioctl_expected.gemWait = 1;
1258+
mock->ioctl_expected.gemCreate = 1;
1259+
mock->ioctl_expected.gemClose = 1;
1260+
1261+
allocationData.type = AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
1262+
1263+
auto allocation = drmMemoryManager.allocateMemoryByKMD(allocationData);
1264+
EXPECT_NE(nullptr, allocation);
1265+
EXPECT_NE(0u, allocation->getGpuAddress());
1266+
EXPECT_EQ(0xDEADBEEF, reinterpret_cast<uintptr_t>(allocation->getUnderlyingBuffer()));
1267+
1268+
memoryManager->freeGraphicsMemory(allocation);
1269+
}
1270+
12431271
TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAskedForGraphicsAllocationThenItContainsAllFragmentsWithProperGpuAdrresses) {
12441272
mock->ioctl_expected.gemUserptr = 3;
12451273
mock->ioctl_expected.gemWait = 3;

opencl/test/unit_test/test_files/igdrcl.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,3 +389,4 @@ AccessCountersGranularity = -1
389389
OverridePatIndex = -1
390390
UseTileMemoryBankInVirtualMemoryCreation = -1
391391
DisableScratchPages = 0
392+
SetVmAdviseAtomicAttribute = -1

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersTrigger, -1, "-1: default - disabl
192192
DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersGranularity, -1, "-1: default - ACG_2MB, >= 0: granularites - 0: ACG_128K, 1: ACG_2M, 2: ACG_16M, 3: ACG_16M")
193193
DECLARE_DEBUG_VARIABLE(int32_t, OverridePatIndex, -1, "-1: default, >=0: PatIndex to override")
194194
DECLARE_DEBUG_VARIABLE(int32_t, UseTileMemoryBankInVirtualMemoryCreation, -1, "-1: default - on, 0: do not assign tile memory bank to virtual memory space, 1: assign tile memory bank to virtual memory space")
195+
DECLARE_DEBUG_VARIABLE(int32_t, SetVmAdviseAtomicAttribute, -1, "-1: default - atomic system, 0: atomic none, 1: atomic device, 2: atomic system)")
195196
DECLARE_DEBUG_VARIABLE(bool, DisableScratchPages, false, "Disable scratch pages during VM creations")
196197
/*LOGGING FLAGS*/
197198
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5062,7 +5062,7 @@ typedef struct tagMI_STORE_DATA_IMM {
50625062
ADDRESS_ALIGN_SIZE = 0x4,
50635063
} ADDRESS;
50645064
inline void setAddress(const uint64_t value) {
5065-
UNRECOVERABLE_IF(value > 0x3fffffffffffffffL);
5065+
UNRECOVERABLE_IF((value >> ADDRESS_BIT_SHIFT) > 0x3fffffffffffffffL);
50665066
TheStructure.Common.Address = value >> ADDRESS_BIT_SHIFT;
50675067
}
50685068
inline uint64_t getAddress() const {

shared/source/os_interface/linux/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,10 @@ set(NEO_CORE_OS_INTERFACE_LINUX
3737
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler.h
3838
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_bind.cpp
3939
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_bind.h
40+
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_create.cpp
4041
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_default.cpp
4142
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_default.h
4243
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_create_multi_host_allocation.cpp
43-
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_local_memory.cpp
44-
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_create.cpp
4544
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_query.cpp
4645
${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_drm.cpp
4746
${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id.h

shared/source/os_interface/linux/drm_memory_manager.cpp

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include "shared/source/helpers/string.h"
2222
#include "shared/source/helpers/surface_format_info.h"
2323
#include "shared/source/memory_manager/host_ptr_manager.h"
24+
#include "shared/source/memory_manager/memory_banks.h"
25+
#include "shared/source/memory_manager/memory_pool.h"
2426
#include "shared/source/memory_manager/residency.h"
2527
#include "shared/source/os_interface/linux/allocator_helper.h"
2628
#include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
@@ -504,6 +506,12 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData &
504506
allocation->setDefaultGmm(gmm.release());
505507

506508
allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), bufferSize);
509+
510+
if (GraphicsAllocation::isCpuAccessRequired(allocationData.type)) {
511+
auto cpuAddress = lockResource(allocation);
512+
allocation->setCpuPtrAndGpuAddress(cpuAddress, gpuRange);
513+
}
514+
507515
bo.release();
508516
return allocation;
509517
}
@@ -1616,4 +1624,160 @@ void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) {
16161624
return bo->peekLockedAddress();
16171625
}
16181626

1627+
void createMemoryRegionsForSharedAllocation(const HardwareInfo &hwInfo, MemoryInfo &memoryInfo, const AllocationData &allocationData, MemRegionsVec &memRegions) {
1628+
auto memoryBanks = allocationData.storageInfo.memoryBanks;
1629+
1630+
if (allocationData.usmInitialPlacement == GraphicsAllocation::UsmInitialPlacement::CPU) {
1631+
//System memory region
1632+
auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(0u, hwInfo);
1633+
memRegions.push_back(regionClassAndInstance);
1634+
}
1635+
1636+
//All local memory regions
1637+
size_t currentBank = 0;
1638+
size_t i = 0;
1639+
1640+
while (i < memoryBanks.count()) {
1641+
if (memoryBanks.test(currentBank)) {
1642+
auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(1u << currentBank, hwInfo);
1643+
memRegions.push_back(regionClassAndInstance);
1644+
i++;
1645+
}
1646+
currentBank++;
1647+
}
1648+
1649+
if (allocationData.usmInitialPlacement == GraphicsAllocation::UsmInitialPlacement::GPU) {
1650+
//System memory region
1651+
auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(0u, hwInfo);
1652+
memRegions.push_back(regionClassAndInstance);
1653+
}
1654+
}
1655+
1656+
GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const AllocationData &allocationData) {
1657+
auto &drm = this->getDrm(allocationData.rootDeviceIndex);
1658+
1659+
const auto vmAdviseAttribute = drm.getIoctlHelper()->getVmAdviseAtomicAttribute();
1660+
if (vmAdviseAttribute == 0) {
1661+
return nullptr;
1662+
}
1663+
1664+
auto memoryInfo = drm.getMemoryInfo();
1665+
if (!memoryInfo) {
1666+
return nullptr;
1667+
}
1668+
1669+
auto size = allocationData.size;
1670+
auto alignment = allocationData.alignment;
1671+
1672+
auto pHwInfo = drm.getRootDeviceEnvironment().getHardwareInfo();
1673+
1674+
MemRegionsVec memRegions;
1675+
createMemoryRegionsForSharedAllocation(*pHwInfo, *memoryInfo, allocationData, memRegions);
1676+
1677+
uint32_t handle = 0;
1678+
auto ret = memoryInfo->createGemExt(&drm, memRegions, size, handle);
1679+
1680+
if (ret) {
1681+
return nullptr;
1682+
}
1683+
1684+
std::unique_ptr<BufferObject, BufferObject::Deleter> bo(new BufferObject(&drm, handle, size, maxOsContextCount));
1685+
1686+
if (!drm.getIoctlHelper()->setVmBoAdvise(&drm, bo->peekHandle(), vmAdviseAttribute, nullptr)) {
1687+
return nullptr;
1688+
}
1689+
1690+
uint64_t offset = 0;
1691+
if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, I915_MMAP_OFFSET_WB, offset)) {
1692+
return nullptr;
1693+
}
1694+
1695+
auto totalSizeToAlloc = size + alignment;
1696+
auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1697+
1698+
auto cpuBasePointer = cpuPointer;
1699+
cpuPointer = alignUp(cpuPointer, alignment);
1700+
1701+
this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
1702+
1703+
bo->setAddress(reinterpret_cast<uintptr_t>(cpuPointer));
1704+
1705+
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, bo.get(), cpuPointer, bo->peekAddress(), size, MemoryPool::System4KBPages);
1706+
allocation->setMmapPtr(cpuBasePointer);
1707+
allocation->setMmapSize(totalSizeToAlloc);
1708+
if (!allocation->setCacheRegion(&this->getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
1709+
this->munmapFunction(cpuPointer, totalSizeToAlloc);
1710+
return nullptr;
1711+
}
1712+
1713+
bo.release();
1714+
1715+
return allocation.release();
1716+
}
1717+
1718+
DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool hasMappedPtr) {
1719+
drm_prime_handle openFd = {0, 0, 0};
1720+
openFd.fd = handle;
1721+
1722+
auto ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd);
1723+
if (ret != 0) {
1724+
int err = this->getDrm(properties.rootDeviceIndex).getErrno();
1725+
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(PRIME_FD_TO_HANDLE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
1726+
DEBUG_BREAK_IF(ret != 0);
1727+
return nullptr;
1728+
}
1729+
1730+
if (hasMappedPtr) {
1731+
auto bo = new BufferObject(&getDrm(properties.rootDeviceIndex), openFd.handle, properties.size, maxOsContextCount);
1732+
bo->setAddress(properties.gpuAddress);
1733+
1734+
return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(),
1735+
handle, MemoryPool::SystemCpuInaccessible);
1736+
}
1737+
1738+
auto boHandle = openFd.handle;
1739+
auto bo = findAndReferenceSharedBufferObject(boHandle, properties.rootDeviceIndex);
1740+
1741+
void *cpuPointer = nullptr;
1742+
size_t size = lseekFunction(handle, 0, SEEK_END);
1743+
1744+
if (bo == nullptr) {
1745+
bo = new BufferObject(&getDrm(properties.rootDeviceIndex), boHandle, size, maxOsContextCount);
1746+
cpuPointer = this->mmapFunction(0, size, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1747+
bo->setAddress(reinterpret_cast<uintptr_t>(cpuPointer));
1748+
1749+
uint64_t offset = 0;
1750+
if (!retrieveMmapOffsetForBufferObject(properties.rootDeviceIndex, *bo, I915_MMAP_OFFSET_WB, offset)) {
1751+
this->munmapFunction(cpuPointer, size);
1752+
delete bo;
1753+
return nullptr;
1754+
}
1755+
1756+
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, getDrm(properties.rootDeviceIndex).getFileDescriptor(), static_cast<off_t>(offset));
1757+
DEBUG_BREAK_IF(retPtr != cpuPointer);
1758+
1759+
AllocationData allocationData = {};
1760+
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
1761+
allocationData.size = size;
1762+
emitPinningRequest(bo, allocationData);
1763+
1764+
bo->setUnmapSize(size);
1765+
bo->setRootDeviceIndex(properties.rootDeviceIndex);
1766+
1767+
pushSharedBufferObject(bo);
1768+
1769+
DrmAllocation *drmAllocation = nullptr;
1770+
drmAllocation = new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, cpuPointer, bo->peekAddress(), bo->peekSize(), MemoryPool::System4KBPages);
1771+
drmAllocation->setMmapPtr(cpuPointer);
1772+
drmAllocation->setMmapSize(size);
1773+
drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(cpuPointer), size);
1774+
drmAllocation->setCacheRegion(&this->getDrm(properties.rootDeviceIndex), static_cast<CacheRegion>(properties.cacheRegion));
1775+
1776+
return drmAllocation;
1777+
}
1778+
1779+
return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(),
1780+
handle, MemoryPool::SystemCpuInaccessible);
1781+
}
1782+
16191783
} // namespace NEO

shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp

Lines changed: 0 additions & 37 deletions
This file was deleted.

shared/source/os_interface/linux/ioctl_helper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class IoctlHelper {
110110
virtual void fillVmBindExtSyncFence(const std::unique_ptr<uint8_t[]> &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) = 0;
111111
virtual std::optional<uint64_t> getCopyClassSaturatePCIECapability() = 0;
112112
virtual std::optional<uint64_t> getCopyClassSaturateLinkCapability() = 0;
113+
virtual uint32_t getVmAdviseAtomicAttribute() = 0;
113114
virtual int vmBind(Drm *drm, const VmBindParams &vmBindParams) = 0;
114115
virtual int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) = 0;
115116
virtual bool getEuStallProperties(std::array<uint64_t, 10u> &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) = 0;
@@ -155,6 +156,7 @@ class IoctlHelperUpstream : public IoctlHelper {
155156
void fillVmBindExtSyncFence(const std::unique_ptr<uint8_t[]> &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) override;
156157
std::optional<uint64_t> getCopyClassSaturatePCIECapability() override;
157158
std::optional<uint64_t> getCopyClassSaturateLinkCapability() override;
159+
uint32_t getVmAdviseAtomicAttribute() override;
158160
int vmBind(Drm *drm, const VmBindParams &vmBindParams) override;
159161
int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) override;
160162
bool getEuStallProperties(std::array<uint64_t, 10u> &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) override;
@@ -213,6 +215,7 @@ class IoctlHelperPrelim20 : public IoctlHelper {
213215
void fillVmBindExtSyncFence(const std::unique_ptr<uint8_t[]> &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) override;
214216
std::optional<uint64_t> getCopyClassSaturatePCIECapability() override;
215217
std::optional<uint64_t> getCopyClassSaturateLinkCapability() override;
218+
uint32_t getVmAdviseAtomicAttribute() override;
216219
int vmBind(Drm *drm, const VmBindParams &vmBindParams) override;
217220
int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) override;
218221
bool getEuStallProperties(std::array<uint64_t, 10u> &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) override;

shared/source/os_interface/linux/ioctl_helper_prelim.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,17 @@ std::optional<uint64_t> IoctlHelperPrelim20::getCopyClassSaturateLinkCapability(
451451
return PRELIM_I915_COPY_CLASS_CAP_SATURATE_LINK;
452452
}
453453

454+
uint32_t IoctlHelperPrelim20::getVmAdviseAtomicAttribute() {
455+
switch (NEO::DebugManager.flags.SetVmAdviseAtomicAttribute.get()) {
456+
case 0:
457+
return PRELIM_I915_VM_ADVISE_ATOMIC_NONE;
458+
case 1:
459+
return PRELIM_I915_VM_ADVISE_ATOMIC_DEVICE;
460+
default:
461+
return PRELIM_I915_VM_ADVISE_ATOMIC_SYSTEM;
462+
}
463+
}
464+
454465
prelim_drm_i915_gem_vm_bind translateVmBindParamsToPrelimStruct(const VmBindParams &vmBindParams) {
455466
prelim_drm_i915_gem_vm_bind vmBind{};
456467
vmBind.vm_id = vmBindParams.vmId;

shared/source/os_interface/linux/ioctl_helper_upstream.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ std::optional<uint64_t> IoctlHelperUpstream::getCopyClassSaturateLinkCapability(
204204
return std::nullopt;
205205
}
206206

207+
uint32_t IoctlHelperUpstream::getVmAdviseAtomicAttribute() {
208+
return 0;
209+
}
210+
207211
int IoctlHelperUpstream::vmBind(Drm *drm, const VmBindParams &vmBindParams) {
208212
return 0;
209213
}

0 commit comments

Comments
 (0)