Skip to content

Commit 62de443

Browse files
Jaime ArteagaCompute-Runtime-Automation
Jaime Arteaga
authored andcommitted
Add copy engine support for USM shared migration
Signed-off-by: Jaime Arteaga <[email protected]>
1 parent cd702af commit 62de443

File tree

12 files changed

+195
-58
lines changed

12 files changed

+195
-58
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ struct CommandListCoreFamily : CommandListImp {
104104
ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
105105
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
106106
ze_event_handle_t *phWaitEvents) override;
107-
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr,
108-
NEO::GraphicsAllocation *srcptr,
107+
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation,
108+
NEO::GraphicsAllocation *srcAllocation,
109109
size_t size,
110110
bool flushHost) override;
111111
ze_result_t appendMemoryCopyRegion(void *dstPtr,

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -977,8 +977,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
977977
}
978978

979979
template <GFXCORE_FAMILY gfxCoreFamily>
980-
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr,
981-
NEO::GraphicsAllocation *srcptr,
980+
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation,
981+
NEO::GraphicsAllocation *srcAllocation,
982982
size_t size, bool flushHost) {
983983

984984
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@@ -991,32 +991,45 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
991991
isStateless = true;
992992
}
993993

994-
uint64_t dstAddress = dstptr->getGpuAddress();
995-
uint64_t srcAddress = srcptr->getGpuAddress();
996-
ze_result_t ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
997-
dstptr, 0,
998-
reinterpret_cast<void *>(&srcAddress),
999-
srcptr, 0,
1000-
size - rightSize,
1001-
middleElSize,
1002-
Builtin::CopyBufferToBufferMiddle,
1003-
nullptr,
1004-
isStateless);
1005-
if (ret == ZE_RESULT_SUCCESS && rightSize) {
1006-
appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
1007-
dstptr, size - rightSize,
1008-
reinterpret_cast<void *>(&srcAddress),
1009-
srcptr, size - rightSize,
1010-
rightSize, 1UL,
1011-
Builtin::CopyBufferToBufferSide,
1012-
nullptr,
1013-
isStateless);
1014-
}
994+
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
995+
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
996+
ze_result_t ret = ZE_RESULT_ERROR_UNKNOWN;
997+
if (isCopyOnly()) {
998+
ret = appendMemoryCopyBlit(dstAddress, dstAllocation, 0u,
999+
srcAddress, srcAllocation, 0u,
1000+
size - rightSize);
1001+
1002+
if (ret == ZE_RESULT_SUCCESS && rightSize) {
1003+
ret = appendMemoryCopyBlit(dstAddress, dstAllocation, size - rightSize,
1004+
srcAddress, srcAllocation, size - rightSize,
1005+
rightSize);
1006+
}
1007+
} else {
1008+
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
1009+
dstAllocation, 0,
1010+
reinterpret_cast<void *>(&srcAddress),
1011+
srcAllocation, 0,
1012+
size - rightSize,
1013+
middleElSize,
1014+
Builtin::CopyBufferToBufferMiddle,
1015+
nullptr,
1016+
isStateless);
1017+
if (ret == ZE_RESULT_SUCCESS && rightSize) {
1018+
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
1019+
dstAllocation, size - rightSize,
1020+
reinterpret_cast<void *>(&srcAddress),
1021+
srcAllocation, size - rightSize,
1022+
rightSize, 1UL,
1023+
Builtin::CopyBufferToBufferSide,
1024+
nullptr,
1025+
isStateless);
1026+
}
10151027

1016-
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
1017-
if (flushHost) {
1018-
NEO::PipeControlArgs args(true);
1019-
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
1028+
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
1029+
if (flushHost) {
1030+
NEO::PipeControlArgs args(true);
1031+
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
1032+
}
10201033
}
10211034
}
10221035

level_zero/core/source/cmdlist/cmdlist_hw_immediate.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
6565

6666
ze_result_t appendEventReset(ze_event_handle_t hEvent) override;
6767

68-
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr,
68+
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation,
69+
NEO::GraphicsAllocation *srcAllocation,
6970
size_t size, bool flushHost) override;
7071

7172
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override;

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,13 +331,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
331331
}
332332

333333
template <GFXCORE_FAMILY gfxCoreFamily>
334-
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) {
334+
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation,
335+
NEO::GraphicsAllocation *srcAllocation,
336+
size_t size, bool flushHost) {
335337

336338
if (this->isFlushTaskSubmissionEnabled) {
337339
checkAvailableSpace();
338340
}
339341

340-
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstptr, srcptr, size, flushHost);
342+
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
341343
if (ret == ZE_RESULT_SUCCESS) {
342344
if (this->isFlushTaskSubmissionEnabled) {
343345
executeCommandListImmediateWithFlushTask(false);

level_zero/core/source/cmdlist/cmdlist_imp.cpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -89,28 +89,36 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
8989
CommandListImp *commandList = nullptr;
9090
returnValue = ZE_RESULT_ERROR_UNINITIALIZED;
9191

92+
NEO::EngineGroupType engineType = engineGroupType;
93+
9294
if (allocator) {
95+
NEO::CommandStreamReceiver *csr = nullptr;
96+
auto deviceImp = static_cast<DeviceImp *>(device);
97+
if (internalUsage) {
98+
if (NEO::EngineGroupType::Copy == engineType && deviceImp->getActiveDevice()->getInternalCopyEngine()) {
99+
csr = deviceImp->getActiveDevice()->getInternalCopyEngine()->commandStreamReceiver;
100+
} else {
101+
csr = deviceImp->getActiveDevice()->getInternalEngine().commandStreamReceiver;
102+
engineType = NEO::EngineGroupType::RenderCompute;
103+
}
104+
} else {
105+
device->getCsrForOrdinalAndIndex(&csr, desc->ordinal, desc->index);
106+
}
107+
108+
UNRECOVERABLE_IF(nullptr == csr);
109+
93110
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
94111
commandList->internalUsage = internalUsage;
95112
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
96113
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
97-
returnValue = commandList->initialize(device, engineGroupType, desc->flags);
114+
returnValue = commandList->initialize(device, engineType, desc->flags);
98115
if (returnValue != ZE_RESULT_SUCCESS) {
99116
commandList->destroy();
100117
commandList = nullptr;
101118
return commandList;
102119
}
103-
NEO::CommandStreamReceiver *csr = nullptr;
104-
auto deviceImp = static_cast<DeviceImp *>(device);
105-
if (internalUsage) {
106-
csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver;
107-
} else {
108-
device->getCsrForOrdinalAndIndex(&csr, desc->ordinal, desc->index);
109-
}
110-
111-
UNRECOVERABLE_IF(nullptr == csr);
112120

113-
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, NEO::EngineGroupType::Copy == engineGroupType, internalUsage, returnValue);
121+
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, NEO::EngineGroupType::Copy == engineType, internalUsage, returnValue);
114122
if (!commandQueue) {
115123
commandList->destroy();
116124
commandList = nullptr;

level_zero/core/source/device/device_imp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,7 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, uint3
794794
ze_result_t resultValue = ZE_RESULT_SUCCESS;
795795
device->pageFaultCommandList =
796796
CommandList::createImmediate(
797-
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, NEO::EngineGroupType::RenderCompute, resultValue);
797+
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, NEO::EngineGroupType::Copy, resultValue);
798798
}
799799

800800
if (device->getSourceLevelDebugger()) {

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
2323
class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
2424
public:
2525
MockCommandListHw() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
26+
MockCommandListHw(bool failOnFirst) : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(), failOnFirstCopy(failOnFirst) {}
2627

2728
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
2829
return {0, 0, nullptr, true};
@@ -41,6 +42,10 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
4142
appendMemoryCopyKernelWithGACalledTimes++;
4243
if (isStateless)
4344
appendMemoryCopyKernelWithGAStatelessCalledTimes++;
45+
if (failOnFirstCopy &&
46+
(appendMemoryCopyKernelWithGACalledTimes == 1 || appendMemoryCopyKernelWithGAStatelessCalledTimes == 1)) {
47+
return ZE_RESULT_ERROR_UNKNOWN;
48+
}
4449
return ZE_RESULT_SUCCESS;
4550
}
4651
ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr,
@@ -50,6 +55,9 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
5055
uint64_t srcOffset,
5156
uint64_t size) override {
5257
appendMemoryCopyBlitCalledTimes++;
58+
if (failOnFirstCopy && appendMemoryCopyBlitCalledTimes == 1) {
59+
return ZE_RESULT_ERROR_UNKNOWN;
60+
}
5361
return ZE_RESULT_SUCCESS;
5462
}
5563

@@ -118,6 +126,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
118126
Vec3<size_t> appendImageRegionCopySize = {0, 0, 0};
119127
Vec3<size_t> appendImageRegionSrcOrigin = {9, 9, 9};
120128
Vec3<size_t> appendImageRegionDstOrigin = {9, 9, 9};
129+
bool failOnFirstCopy = false;
121130
};
122131

123132
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
@@ -179,6 +188,20 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledThenappendPa
179188
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u);
180189
}
181190

191+
HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledWithCopyEngineThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalled, Platforms) {
192+
MockCommandListHw<gfxCoreFamily> cmdList;
193+
size_t size = (sizeof(uint32_t) * 4);
194+
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
195+
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
196+
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
197+
MemoryPool::System4KBPages);
198+
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
199+
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
200+
MemoryPool::System4KBPages);
201+
cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false);
202+
EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 1u);
203+
}
204+
182205
HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalledForMiddleAndRightSizesAreCalled, Platforms) {
183206
MockCommandListHw<gfxCoreFamily> cmdList;
184207
size_t size = ((sizeof(uint32_t) * 4) + 1);
@@ -194,6 +217,49 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledThenappendPa
194217
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u);
195218
}
196219

220+
HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledAndErrorOnMidCopyThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalledForMiddleIsCalled, Platforms) {
221+
MockCommandListHw<gfxCoreFamily> cmdList(true);
222+
size_t size = ((sizeof(uint32_t) * 4) + 1);
223+
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
224+
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
225+
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
226+
MemoryPool::System4KBPages);
227+
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
228+
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
229+
MemoryPool::System4KBPages);
230+
cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false);
231+
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 1u);
232+
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u);
233+
}
234+
235+
HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledWithCopyEngineThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalledForMiddleAndRightSizesAreCalled, Platforms) {
236+
MockCommandListHw<gfxCoreFamily> cmdList;
237+
size_t size = ((sizeof(uint32_t) * 4) + 1);
238+
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
239+
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
240+
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
241+
MemoryPool::System4KBPages);
242+
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
243+
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
244+
MemoryPool::System4KBPages);
245+
cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false);
246+
EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 2u);
247+
}
248+
249+
HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledWithCopyEngineAndErrorOnMidOperationThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalledForMiddleIsCalled, Platforms) {
250+
MockCommandListHw<gfxCoreFamily> cmdList(true);
251+
size_t size = ((sizeof(uint32_t) * 4) + 1);
252+
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
253+
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
254+
reinterpret_cast<void *>(0x1234), size, 0, sizeof(uint32_t),
255+
MemoryPool::System4KBPages);
256+
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
257+
reinterpret_cast<void *>(0x2345), size, 0, sizeof(uint32_t),
258+
MemoryPool::System4KBPages);
259+
cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false);
260+
EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 1u);
261+
}
262+
197263
HWTEST2_F(CommandListCreate, givenCommandListWhen4GBytePageFaultCopyCalledThenPageFaultCopyWithappendMemoryCopyKernelWithGAStatelessCalled, Platforms) {
198264
MockCommandListHw<gfxCoreFamily> cmdList;
199265
size_t size = 0x100000000;

0 commit comments

Comments
 (0)