Skip to content

Commit 73a63c7

Browse files
Fix Read/WriteBuffer for unaligned offsets
Change-Id: I08d33e80243f41174f4629c8a611e286629d2e10
1 parent a31c446 commit 73a63c7

File tree

8 files changed

+200
-9
lines changed

8 files changed

+200
-9
lines changed

runtime/built_ins/built_ins.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,14 @@ class BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispa
243243
} else if (operationParams.srcMemObj) {
244244
kernelSplit1DBuilder.setArg(0, operationParams.srcMemObj);
245245
} else {
246-
kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY);
246+
kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x + operationParams.srcOffset.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY);
247247
}
248248
if (operationParams.dstSvmAlloc) {
249249
kernelSplit1DBuilder.setArgSvmAlloc(1, operationParams.dstPtr, operationParams.dstSvmAlloc);
250250
} else if (operationParams.dstMemObj) {
251251
kernelSplit1DBuilder.setArg(1, operationParams.dstMemObj);
252252
} else {
253-
kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x, operationParams.dstPtr);
253+
kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x + operationParams.dstOffset.x, operationParams.dstPtr);
254254
}
255255

256256
// Set-up srcOffset

runtime/command_queue/enqueue_read_buffer.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,30 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
8888
BuiltInOwnershipWrapper builtInLock(builder, this->context);
8989

9090
void *dstPtr = ptr;
91+
void *alignedDstPtr = dstPtr;
92+
size_t dstPtrOffset = 0;
93+
94+
if (!isAligned<4>(dstPtr)) {
95+
alignedDstPtr = alignDown(dstPtr, 4);
96+
dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
97+
}
9198

9299
MemObjSurface bufferSurf(buffer);
93-
HostPtrSurface hostPtrSurf(dstPtr, size);
100+
HostPtrSurface hostPtrSurf(alignedDstPtr, size + dstPtrOffset);
94101
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
95102

96103
if (size != 0) {
97104
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
98105
if (!status) {
99106
return CL_OUT_OF_RESOURCES;
100107
}
101-
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
108+
109+
hostPtrSurf.getAllocation()->allocationOffset += dstPtrOffset;
102110
}
103111

104112
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
105-
dc.dstPtr = dstPtr;
113+
dc.dstPtr = alignedDstPtr;
114+
dc.dstOffset = {dstPtrOffset, 0, 0};
106115
dc.srcMemObj = buffer;
107116
dc.srcOffset = {offset, 0, 0};
108117
dc.size = {size, 0, 0};

runtime/command_queue/enqueue_write_buffer.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
8989
BuiltInOwnershipWrapper builtInLock(builder, this->context);
9090

9191
void *srcPtr = const_cast<void *>(ptr);
92+
void *alignedSrcPtr = srcPtr;
93+
size_t srcPtrOffset = 0;
9294

93-
HostPtrSurface hostPtrSurf(srcPtr, size, true);
95+
if (!isAligned<4>(srcPtr)) {
96+
alignedSrcPtr = alignDown(srcPtr, 4);
97+
srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
98+
}
99+
100+
HostPtrSurface hostPtrSurf(alignedSrcPtr, size + srcPtrOffset, true);
94101
MemObjSurface bufferSurf(buffer);
95102
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
96103

@@ -99,11 +106,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
99106
if (!status) {
100107
return CL_OUT_OF_RESOURCES;
101108
}
102-
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
109+
110+
hostPtrSurf.getAllocation()->allocationOffset += srcPtrOffset;
103111
}
104112

105113
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
106-
dc.srcPtr = srcPtr;
114+
dc.srcPtr = alignedSrcPtr;
115+
dc.srcOffset = {srcPtrOffset, 0, 0};
107116
dc.dstMemObj = buffer;
108117
dc.dstOffset = {offset, 0, 0};
109118
dc.size = {size, 0, 0};

runtime/command_stream/aub_command_stream_receiver_hw.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(uint64_t gpuAddress, voi
623623

624624
template <typename GfxFamily>
625625
bool AUBCommandStreamReceiverHw<GfxFamily>::writeMemory(GraphicsAllocation &gfxAllocation) {
626-
auto cpuAddress = gfxAllocation.getUnderlyingBuffer();
626+
auto cpuAddress = ptrOffset(gfxAllocation.getUnderlyingBuffer(), static_cast<size_t>(gfxAllocation.allocationOffset));
627627
auto gpuAddress = GmmHelper::decanonize(gfxAllocation.getGpuAddress());
628628
auto size = gfxAllocation.getUnderlyingBufferSize();
629629
if (gfxAllocation.gmm && gfxAllocation.gmm->isRenderCompressed) {

unit_tests/aub_tests/command_queue/enqueue_read_buffer_aub_tests.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,66 @@ HWTEST_F(AUBReadBuffer, reserveCanonicalGpuAddress) {
143143

144144
AUBCommandStreamFixture::expectMemory<FamilyType>(dstGpuAddress, srcMemory, sizeof(dstMemory));
145145
}
146+
147+
struct AUBReadBufferUnaligned
148+
: public CommandEnqueueAUBFixture,
149+
public ::testing::Test {
150+
151+
void SetUp() override {
152+
CommandEnqueueAUBFixture::SetUp();
153+
}
154+
155+
void TearDown() override {
156+
CommandEnqueueAUBFixture::TearDown();
157+
}
158+
159+
template <typename FamilyType>
160+
void testReadBufferUnaligned(size_t offset, size_t size) {
161+
MockContext context(&pCmdQ->getDevice());
162+
163+
char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
164+
const auto bufferSize = sizeof(srcMemory);
165+
char dstMemory[bufferSize] = {0};
166+
167+
auto retVal = CL_INVALID_VALUE;
168+
169+
auto buffer = std::unique_ptr<Buffer>(Buffer::create(
170+
&context,
171+
CL_MEM_USE_HOST_PTR,
172+
bufferSize,
173+
srcMemory,
174+
retVal));
175+
ASSERT_NE(nullptr, buffer);
176+
177+
buffer->forceDisallowCPUCopy = true;
178+
179+
// Map destination memory to GPU
180+
GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize);
181+
auto dstMemoryGPUPtr = reinterpret_cast<char *>(allocation->getGpuAddress());
182+
183+
// Do unaligned read
184+
retVal = pCmdQ->enqueueReadBuffer(
185+
buffer.get(),
186+
CL_TRUE,
187+
offset,
188+
size,
189+
ptrOffset(dstMemory, offset),
190+
0,
191+
nullptr,
192+
nullptr);
193+
EXPECT_EQ(CL_SUCCESS, retVal);
194+
195+
// Check the memory
196+
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(dstMemoryGPUPtr, offset), ptrOffset(srcMemory, offset), size);
197+
}
198+
};
199+
200+
HWTEST_F(AUBReadBufferUnaligned, all) {
201+
const std::vector<size_t> offsets = {0, 1, 2, 3};
202+
const std::vector<size_t> sizes = {4, 3, 2, 1};
203+
for (auto offset : offsets) {
204+
for (auto size : sizes) {
205+
testReadBufferUnaligned<FamilyType>(offset, size);
206+
}
207+
}
208+
}

unit_tests/aub_tests/command_queue/enqueue_write_buffer_aub_tests.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,63 @@ INSTANTIATE_TEST_CASE_P(AUBWriteBuffer_simple,
108108
1 * sizeof(cl_float),
109109
2 * sizeof(cl_float),
110110
3 * sizeof(cl_float)));
111+
112+
struct AUBWriteBufferUnaligned
113+
: public CommandEnqueueAUBFixture,
114+
public ::testing::Test {
115+
116+
void SetUp() override {
117+
CommandEnqueueAUBFixture::SetUp();
118+
}
119+
120+
void TearDown() override {
121+
CommandEnqueueAUBFixture::TearDown();
122+
}
123+
124+
template <typename FamilyType>
125+
void testWriteBufferUnaligned(size_t offset, size_t size) {
126+
MockContext context(&pCmdQ->getDevice());
127+
128+
char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
129+
const auto bufferSize = sizeof(srcMemory);
130+
char dstMemory[bufferSize] = {0};
131+
132+
auto retVal = CL_INVALID_VALUE;
133+
134+
auto buffer = std::unique_ptr<Buffer>(Buffer::create(
135+
&context,
136+
CL_MEM_USE_HOST_PTR,
137+
bufferSize,
138+
dstMemory,
139+
retVal));
140+
ASSERT_NE(nullptr, buffer);
141+
142+
buffer->forceDisallowCPUCopy = true;
143+
144+
// Do unaligned write
145+
retVal = pCmdQ->enqueueWriteBuffer(
146+
buffer.get(),
147+
CL_TRUE,
148+
offset,
149+
size,
150+
ptrOffset(srcMemory, offset),
151+
0,
152+
nullptr,
153+
nullptr);
154+
EXPECT_EQ(CL_SUCCESS, retVal);
155+
156+
// Check the memory
157+
auto bufferGPUPtr = reinterpret_cast<char *>((buffer->getGraphicsAllocation()->getGpuAddress()));
158+
AUBCommandStreamFixture::expectMemory<FamilyType>(ptrOffset(bufferGPUPtr, offset), ptrOffset(srcMemory, offset), size);
159+
}
160+
};
161+
162+
HWTEST_F(AUBWriteBufferUnaligned, all) {
163+
const std::vector<size_t> offsets = {0, 1, 2, 3};
164+
const std::vector<size_t> sizes = {4, 3, 2, 1};
165+
for (auto offset : offsets) {
166+
for (auto size : sizes) {
167+
testWriteBufferUnaligned<FamilyType>(offset, size);
168+
}
169+
}
170+
}

unit_tests/command_queue/enqueue_read_buffer_tests.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "reg_configs_common.h"
1111
#include "runtime/helpers/cache_policy.h"
1212
#include "runtime/helpers/dispatch_info.h"
13+
#include "runtime/memory_manager/allocations_list.h"
1314
#include "unit_tests/command_queue/enqueue_fixture.h"
1415
#include "unit_tests/gen_common/gen_commands_common_validation.h"
1516
#include "unit_tests/command_queue/enqueue_read_buffer_fixture.h"
@@ -326,6 +327,30 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
326327
EXPECT_FALSE(csr.disableL3Cache);
327328
}
328329

330+
HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
331+
void *ptr = (void *)0x1039;
332+
333+
cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
334+
CL_FALSE,
335+
0,
336+
MemoryConstants::cacheLineSize,
337+
ptr,
338+
0,
339+
nullptr,
340+
nullptr);
341+
342+
EXPECT_EQ(CL_SUCCESS, retVal);
343+
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
344+
345+
auto allocation = csr.getTemporaryAllocations().peekHead();
346+
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
347+
allocation = allocation->next;
348+
}
349+
350+
ASSERT_NE(allocation, nullptr);
351+
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
352+
}
353+
329354
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
330355
DebugManagerStateRestore dbgRestore;
331356
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);

unit_tests/command_queue/enqueue_write_buffer_tests.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "runtime/built_ins/builtins_dispatch_builder.h"
1111
#include "reg_configs_common.h"
1212
#include "runtime/helpers/dispatch_info.h"
13+
#include "runtime/memory_manager/allocations_list.h"
1314
#include "unit_tests/command_queue/enqueue_fixture.h"
1415
#include "unit_tests/gen_common/gen_commands_common_validation.h"
1516
#include "unit_tests/helpers/debug_manager_state_restore.h"
@@ -433,3 +434,27 @@ HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocatio
433434

434435
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
435436
}
437+
438+
HWTEST_F(EnqueueWriteBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenWriteBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
439+
void *ptr = (void *)0x1039;
440+
441+
cl_int retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
442+
CL_FALSE,
443+
0,
444+
MemoryConstants::cacheLineSize,
445+
ptr,
446+
0,
447+
nullptr,
448+
nullptr);
449+
450+
EXPECT_EQ(CL_SUCCESS, retVal);
451+
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
452+
453+
auto allocation = csr.getTemporaryAllocations().peekHead();
454+
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
455+
allocation = allocation->next;
456+
}
457+
458+
ASSERT_NE(allocation, nullptr);
459+
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
460+
}

0 commit comments

Comments
 (0)