Skip to content

Commit 4918b54

Browse files
committed
Rename Indices member variable to ArgPointers
1 parent d3fb566 commit 4918b54

File tree

7 files changed

+52
-47
lines changed

7 files changed

+52
-47
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
523523
ThreadsPerBlock, BlocksPerGrid));
524524

525525
// Set node param structure with the kernel related data
526-
auto &ArgIndices = hKernel->getArgIndices();
526+
auto &ArgPointers = hKernel->getArgPointers();
527527
CUDA_KERNEL_NODE_PARAMS NodeParams = {};
528528
NodeParams.func = CuFunc;
529529
NodeParams.gridDimX = BlocksPerGrid[0];
@@ -533,7 +533,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
533533
NodeParams.blockDimY = ThreadsPerBlock[1];
534534
NodeParams.blockDimZ = ThreadsPerBlock[2];
535535
NodeParams.sharedMemBytes = LocalSize;
536-
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
536+
NodeParams.kernelParams = const_cast<void **>(ArgPointers.data());
537537

538538
// Create and add an new kernel node to the Cuda graph
539539
UR_CHECK_ERROR(cuGraphAddKernelNode(&GraphNode, hCommandBuffer->CudaGraph,
@@ -1398,7 +1398,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
13981398
Params.blockDimZ = ThreadsPerBlock[2];
13991399
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
14001400
Params.kernelParams =
1401-
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
1401+
const_cast<void **>(KernelCommandHandle->Kernel->getArgPointers().data());
14021402

14031403
CUgraphNode Node = KernelCommandHandle->Node;
14041404
CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec;

source/adapters/cuda/enqueue.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ enqueueKernelLaunch(ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel,
492492
UR_CHECK_ERROR(RetImplEvent->start());
493493
}
494494

495-
auto &ArgIndices = hKernel->getArgIndices();
495+
auto &ArgIndices = hKernel->getArgPointers();
496496
UR_CHECK_ERROR(cuLaunchKernel(
497497
CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
498498
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
@@ -680,7 +680,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
680680
UR_CHECK_ERROR(RetImplEvent->start());
681681
}
682682

683-
auto &ArgIndices = hKernel->getArgIndices();
683+
auto &ArgPointers = hKernel->getArgPointers();
684684

685685
CUlaunchConfig launch_config;
686686
launch_config.gridDimX = BlocksPerGrid[0];
@@ -696,7 +696,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
696696
launch_config.numAttrs = launch_attribute.size();
697697

698698
UR_CHECK_ERROR(cuLaunchKernelEx(&launch_config, CuFunc,
699-
const_cast<void **>(ArgIndices.data()),
699+
const_cast<void **>(ArgPointers.data()),
700700
nullptr));
701701

702702
if (phEvent) {

source/adapters/cuda/kernel.hpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ struct ur_kernel_handle_t_ {
6666
args_t Storage;
6767
/// Aligned size of each parameter, including padding.
6868
args_size_t ParamSizes;
69-
/// Byte offset into /p Storage allocation for each parameter.
70-
args_index_t Indices;
69+
/// Byte offset into /p Storage allocation for each argument.
70+
args_index_t ArgPointers;
7171
/// Position in the Storage array where the next argument should added.
7272
size_t InsertPos = 0;
7373
/// Aligned size in bytes for each local memory parameter after padding has
@@ -92,21 +92,23 @@ struct ur_kernel_handle_t_ {
9292
std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};
9393

9494
arguments() {
95-
// Place the implicit offset index at the end of the indicies collection
96-
Indices.emplace_back(&ImplicitOffsetArgs);
95+
// Place the implicit offset index at the end of the ArgPointers
96+
// collection.
97+
ArgPointers.emplace_back(&ImplicitOffsetArgs);
9798
}
9899

99100
/// Add an argument to the kernel.
100101
/// If the argument existed before, it is replaced.
101102
/// Otherwise, it is added.
102103
/// Gaps are filled with empty arguments.
103-
/// Implicit offset argument is kept at the back of the indices collection.
104+
/// Implicit offset argument is kept at the back of the ArgPointers
105+
/// collection.
104106
void addArg(size_t Index, size_t Size, const void *Arg,
105107
size_t LocalSize = 0) {
106108
// Expand storage to accommodate this Index if needed.
107-
if (Index + 2 > Indices.size()) {
109+
if (Index + 2 > ArgPointers.size()) {
108110
// Move implicit offset argument index with the end
109-
Indices.resize(Index + 2, Indices.back());
111+
ArgPointers.resize(Index + 2, ArgPointers.back());
110112
// Ensure enough space for the new argument
111113
ParamSizes.resize(Index + 1);
112114
AlignedLocalMemSize.resize(Index + 1);
@@ -117,13 +119,13 @@ struct ur_kernel_handle_t_ {
117119
if (ParamSizes[Index] == 0) {
118120
ParamSizes[Index] = Size;
119121
std::memcpy(&Storage[InsertPos], Arg, Size);
120-
Indices[Index] = &Storage[InsertPos];
122+
ArgPointers[Index] = &Storage[InsertPos];
121123
AlignedLocalMemSize[Index] = LocalSize;
122124
InsertPos += Size;
123125
}
124126
// Otherwise, update the existing argument.
125127
else {
126-
std::memcpy(Indices[Index], Arg, Size);
128+
std::memcpy(ArgPointers[Index], Arg, Size);
127129
AlignedLocalMemSize[Index] = LocalSize;
128130
assert(Size == ParamSizes[Index]);
129131
}
@@ -138,7 +140,7 @@ struct ur_kernel_handle_t_ {
138140
std::pair<size_t, size_t> calcAlignedLocalArgument(size_t Index,
139141
size_t Size) {
140142
// Store the unpadded size of the local argument
141-
if (Index + 2 > Indices.size()) {
143+
if (Index + 2 > ArgPointers.size()) {
142144
AlignedLocalMemSize.resize(Index + 1);
143145
OriginalLocalMemSize.resize(Index + 1);
144146
}
@@ -168,10 +170,11 @@ struct ur_kernel_handle_t_ {
168170
return std::make_pair(AlignedLocalSize, AlignedLocalOffset);
169171
}
170172

171-
// Iterate over all existing local argument which follows StartIndex
173+
// Iterate over each existing local argument which follows StartIndex
172174
// index, update the offset and pointer into the kernel local memory.
173175
void updateLocalArgOffset(size_t StartIndex) {
174-
const size_t NumArgs = Indices.size() - 1; // Accounts for implicit arg
176+
const size_t NumArgs =
177+
ArgPointers.size() - 1; // Accounts for implicit arg
175178
for (auto SuccIndex = StartIndex; SuccIndex < NumArgs; SuccIndex++) {
176179
const size_t OriginalLocalSize = OriginalLocalMemSize[SuccIndex];
177180
if (OriginalLocalSize == 0) {
@@ -187,7 +190,7 @@ struct ur_kernel_handle_t_ {
187190
AlignedLocalMemSize[SuccIndex] = SuccAlignedLocalSize;
188191

189192
// Store new offset into local data
190-
std::memcpy(Indices[SuccIndex], &SuccAlignedLocalOffset,
193+
std::memcpy(ArgPointers[SuccIndex], &SuccAlignedLocalOffset,
191194
sizeof(size_t));
192195
}
193196
}
@@ -235,7 +238,7 @@ struct ur_kernel_handle_t_ {
235238
std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
236239
}
237240

238-
const args_index_t &getIndices() const noexcept { return Indices; }
241+
const args_index_t &getArgPointers() const noexcept { return ArgPointers; }
239242

240243
uint32_t getLocalSize() const {
241244
return std::accumulate(std::begin(AlignedLocalMemSize),
@@ -306,7 +309,7 @@ struct ur_kernel_handle_t_ {
306309
/// real one required by the kernel, since this cannot be queried from
307310
/// the CUDA Driver API
308311
uint32_t getNumArgs() const noexcept {
309-
return static_cast<uint32_t>(Args.Indices.size() - 1);
312+
return static_cast<uint32_t>(Args.ArgPointers.size() - 1);
310313
}
311314

312315
void setKernelArg(int Index, size_t Size, const void *Arg) {
@@ -321,8 +324,8 @@ struct ur_kernel_handle_t_ {
321324
return Args.setImplicitOffset(Size, ImplicitOffset);
322325
}
323326

324-
const arguments::args_index_t &getArgIndices() const {
325-
return Args.getIndices();
327+
const arguments::args_index_t &getArgPointers() const {
328+
return Args.getArgPointers();
326329
}
327330

328331
void setWorkGroupMemory(size_t MemSize) { Args.setWorkGroupMemory(MemSize); }

source/adapters/hip/command_buffer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
378378
pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid));
379379

380380
// Set node param structure with the kernel related data
381-
auto &ArgIndices = hKernel->getArgIndices();
381+
auto &ArgPointers = hKernel->getArgPointers();
382382
hipKernelNodeParams NodeParams;
383383
NodeParams.func = HIPFunc;
384384
NodeParams.gridDim.x = BlocksPerGrid[0];
@@ -388,7 +388,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
388388
NodeParams.blockDim.y = ThreadsPerBlock[1];
389389
NodeParams.blockDim.z = ThreadsPerBlock[2];
390390
NodeParams.sharedMemBytes = LocalSize;
391-
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
391+
NodeParams.kernelParams = const_cast<void **>(ArgPointers.data());
392392
NodeParams.extra = nullptr;
393393

394394
// Create and add an new kernel node to the HIP graph
@@ -1098,7 +1098,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
10981098
Params.blockDim.z = ThreadsPerBlock[2];
10991099
Params.sharedMemBytes = hCommand->Kernel->getLocalSize();
11001100
Params.kernelParams =
1101-
const_cast<void **>(hCommand->Kernel->getArgIndices().data());
1101+
const_cast<void **>(hCommand->Kernel->getArgPointers().data());
11021102

11031103
hipGraphNode_t Node = hCommand->Node;
11041104
hipGraphExec_t HipGraphExec = CommandBuffer->HIPGraphExec;

source/adapters/hip/enqueue.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
308308
}
309309
}
310310

311-
auto ArgIndices = hKernel->getArgIndices();
311+
auto ArgPointers = hKernel->getArgPointers();
312312

313313
// If migration of mem across buffer is needed, an event must be associated
314314
// with this command, implicitly if phEvent is nullptr
@@ -322,7 +322,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
322322
UR_CHECK_ERROR(hipModuleLaunchKernel(
323323
HIPFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
324324
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2],
325-
hKernel->getLocalSize(), HIPStream, ArgIndices.data(), nullptr));
325+
hKernel->getLocalSize(), HIPStream, ArgPointers.data(), nullptr));
326326

327327
if (phEvent) {
328328
UR_CHECK_ERROR(RetImplEvent->record());

source/adapters/hip/kernel.hpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ struct ur_kernel_handle_t_ {
6161
args_t Storage;
6262
/// Aligned size of each parameter, including padding.
6363
args_size_t ParamSizes;
64-
/// Byte offset into /p Storage allocation for each parameter.
65-
args_index_t Indices;
64+
/// Byte offset into /p Storage allocation for each argument.
65+
args_index_t ArgPointers;
6666
/// Position in the Storage array where the next argument should added.
6767
size_t InsertPos = 0;
6868
/// Aligned size in bytes for each local memory parameter after padding has
@@ -87,20 +87,21 @@ struct ur_kernel_handle_t_ {
8787

8888
arguments() {
8989
// Place the implicit offset index at the end of the indicies collection
90-
Indices.emplace_back(&ImplicitOffsetArgs);
90+
ArgPointers.emplace_back(&ImplicitOffsetArgs);
9191
}
9292

9393
/// Add an argument to the kernel.
9494
/// If the argument existed before, it is replaced.
9595
/// Otherwise, it is added.
9696
/// Gaps are filled with empty arguments.
97-
/// Implicit offset argument is kept at the back of the indices collection.
97+
/// Implicit offset argument is kept at the back of the ArgPointers
98+
/// collection.
9899
void addArg(size_t Index, size_t Size, const void *Arg,
99100
size_t LocalSize = 0) {
100101
// Expand storage to accommodate this Index if needed.
101-
if (Index + 2 > Indices.size()) {
102+
if (Index + 2 > ArgPointers.size()) {
102103
// Move implicit offset argument index with the end
103-
Indices.resize(Index + 2, Indices.back());
104+
ArgPointers.resize(Index + 2, ArgPointers.back());
104105
// Ensure enough space for the new argument
105106
ParamSizes.resize(Index + 1);
106107
AlignedLocalMemSize.resize(Index + 1);
@@ -111,13 +112,13 @@ struct ur_kernel_handle_t_ {
111112
if (ParamSizes[Index] == 0) {
112113
ParamSizes[Index] = Size;
113114
std::memcpy(&Storage[InsertPos], Arg, Size);
114-
Indices[Index] = &Storage[InsertPos];
115+
ArgPointers[Index] = &Storage[InsertPos];
115116
AlignedLocalMemSize[Index] = LocalSize;
116117
InsertPos += Size;
117118
}
118119
// Otherwise, update the existing argument.
119120
else {
120-
std::memcpy(Indices[Index], Arg, Size);
121+
std::memcpy(ArgPointers[Index], Arg, Size);
121122
AlignedLocalMemSize[Index] = LocalSize;
122123
assert(Size == ParamSizes[Index]);
123124
}
@@ -132,7 +133,7 @@ struct ur_kernel_handle_t_ {
132133
std::pair<size_t, size_t> calcAlignedLocalArgument(size_t Index,
133134
size_t Size) {
134135
// Store the unpadded size of the local argument
135-
if (Index + 2 > Indices.size()) {
136+
if (Index + 2 > ArgPointers.size()) {
136137
AlignedLocalMemSize.resize(Index + 1);
137138
OriginalLocalMemSize.resize(Index + 1);
138139
}
@@ -161,10 +162,11 @@ struct ur_kernel_handle_t_ {
161162
return std::make_pair(AlignedLocalSize, AlignedLocalOffset);
162163
}
163164

164-
// Iterate over all existing local argument which follows StartIndex
165+
// Iterate over each existing local argument which follows StartIndex
165166
// index, update the offset and pointer into the kernel local memory.
166167
void updateLocalArgOffset(size_t StartIndex) {
167-
const size_t NumArgs = Indices.size() - 1; // Accounts for implicit arg
168+
const size_t NumArgs =
169+
ArgPointers.size() - 1; // Accounts for implicit arg
168170
for (auto SuccIndex = StartIndex; SuccIndex < NumArgs; SuccIndex++) {
169171
const size_t OriginalLocalSize = OriginalLocalMemSize[SuccIndex];
170172
if (OriginalLocalSize == 0) {
@@ -180,7 +182,7 @@ struct ur_kernel_handle_t_ {
180182
AlignedLocalMemSize[SuccIndex] = SuccAlignedLocalSize;
181183

182184
// Store new offset into local data
183-
std::memcpy(Indices[SuccIndex], &SuccAlignedLocalOffset,
185+
std::memcpy(ArgPointers[SuccIndex], &SuccAlignedLocalOffset,
184186
sizeof(size_t));
185187
}
186188
}
@@ -219,7 +221,7 @@ struct ur_kernel_handle_t_ {
219221
std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
220222
}
221223

222-
const args_index_t &getIndices() const noexcept { return Indices; }
224+
const args_index_t &getArgPointers() const noexcept { return ArgPointers; }
223225

224226
uint32_t getLocalSize() const {
225227
return std::accumulate(std::begin(AlignedLocalMemSize),
@@ -276,7 +278,7 @@ struct ur_kernel_handle_t_ {
276278
/// offset. Note this only returns the current known number of arguments,
277279
/// not the real one required by the kernel, since this cannot be queried
278280
/// from the HIP Driver API
279-
uint32_t getNumArgs() const noexcept { return Args.Indices.size() - 1; }
281+
uint32_t getNumArgs() const noexcept { return Args.ArgPointers.size() - 1; }
280282

281283
void setKernelArg(int Index, size_t Size, const void *Arg) {
282284
Args.addArg(Index, Size, Arg);
@@ -290,8 +292,8 @@ struct ur_kernel_handle_t_ {
290292
return Args.setImplicitOffset(Size, ImplicitOffset);
291293
}
292294

293-
const arguments::args_index_t &getArgIndices() const {
294-
return Args.getIndices();
295+
const arguments::args_index_t &getArgPointers() const {
296+
return Args.getArgPointers();
295297
}
296298

297299
uint32_t getLocalSize() const noexcept { return Args.getLocalSize(); }

test/adapters/cuda/kernel_tests.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ TEST_P(cudaKernelTest, URKernelArgumentSimple) {
153153

154154
int number = 10;
155155
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 0, sizeof(int), nullptr, &number));
156-
const auto &kernelArgs = kernel->getArgIndices();
156+
const auto &kernelArgs = kernel->getArgPointers();
157157
ASSERT_EQ(kernelArgs.size(), 1 + NumberOfImplicitArgsCUDA);
158158

159159
int storedValue = *static_cast<const int *>(kernelArgs[0]);
@@ -175,15 +175,15 @@ TEST_P(cudaKernelTest, URKernelArgumentSetTwice) {
175175

176176
int number = 10;
177177
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 0, sizeof(int), nullptr, &number));
178-
const auto &kernelArgs = kernel->getArgIndices();
178+
const auto &kernelArgs = kernel->getArgPointers();
179179
ASSERT_EQ(kernelArgs.size(), 1 + NumberOfImplicitArgsCUDA);
180180
int storedValue = *static_cast<const int *>(kernelArgs[0]);
181181
ASSERT_EQ(storedValue, number);
182182

183183
int otherNumber = 934;
184184
ASSERT_SUCCESS(
185185
urKernelSetArgValue(kernel, 0, sizeof(int), nullptr, &otherNumber));
186-
const auto kernelArgs2 = kernel->getArgIndices();
186+
const auto kernelArgs2 = kernel->getArgPointers();
187187
ASSERT_EQ(kernelArgs2.size(), 1 + NumberOfImplicitArgsCUDA);
188188
storedValue = *static_cast<const int *>(kernelArgs2[0]);
189189
ASSERT_EQ(storedValue, otherNumber);

0 commit comments

Comments
 (0)