Skip to content

Commit 8fc6cb4

Browse files
[SYCL] Remove std::function usage from applyFuncOnFilteredArgs (#17202)
`std::function` makes a heap memory allocation during construction, which we'd rather avoid on the kernel submission path for performance reasons.
1 parent c2c029e commit 8fc6cb4

File tree

2 files changed

+33
-37
lines changed

2 files changed

+33
-37
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -145,40 +145,6 @@ static std::string demangleKernelName(std::string Name) {
145145
static std::string demangleKernelName(std::string Name) { return Name; }
146146
#endif
147147

148-
void applyFuncOnFilteredArgs(
149-
const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
150-
std::function<void(detail::ArgDesc &Arg, int NextTrueIndex)> Func) {
151-
if (!EliminatedArgMask || EliminatedArgMask->size() == 0) {
152-
for (ArgDesc &Arg : Args) {
153-
Func(Arg, Arg.MIndex);
154-
}
155-
} else {
156-
// TODO this is not necessary as long as we can guarantee that the
157-
// arguments are already sorted (e. g. handle the sorting in handler
158-
// if necessary due to set_arg(...) usage).
159-
std::sort(Args.begin(), Args.end(), [](const ArgDesc &A, const ArgDesc &B) {
160-
return A.MIndex < B.MIndex;
161-
});
162-
int LastIndex = -1;
163-
size_t NextTrueIndex = 0;
164-
165-
for (ArgDesc &Arg : Args) {
166-
// Handle potential gaps in set arguments (e. g. if some of them are
167-
// set on the user side).
168-
for (int Idx = LastIndex + 1; Idx < Arg.MIndex; ++Idx)
169-
if (!(*EliminatedArgMask)[Idx])
170-
++NextTrueIndex;
171-
LastIndex = Arg.MIndex;
172-
173-
if ((*EliminatedArgMask)[Arg.MIndex])
174-
continue;
175-
176-
Func(Arg, NextTrueIndex);
177-
++NextTrueIndex;
178-
}
179-
}
180-
}
181-
182148
static std::string accessModeToString(access::mode Mode) {
183149
switch (Mode) {
184150
case access::mode::read:

sycl/source/detail/scheduler/commands.hpp

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -748,9 +748,39 @@ void SetArgBasedOnType(
748748
const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
749749
const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex);
750750

751-
void applyFuncOnFilteredArgs(
752-
const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
753-
std::function<void(detail::ArgDesc &Arg, int NextTrueIndex)> Func);
751+
template <typename FuncT>
752+
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask,
753+
std::vector<ArgDesc> &Args, FuncT Func) {
754+
if (!EliminatedArgMask || EliminatedArgMask->size() == 0) {
755+
for (ArgDesc &Arg : Args) {
756+
Func(Arg, Arg.MIndex);
757+
}
758+
} else {
759+
// TODO this is not necessary as long as we can guarantee that the
760+
// arguments are already sorted (e. g. handle the sorting in handler
761+
// if necessary due to set_arg(...) usage).
762+
std::sort(Args.begin(), Args.end(), [](const ArgDesc &A, const ArgDesc &B) {
763+
return A.MIndex < B.MIndex;
764+
});
765+
int LastIndex = -1;
766+
size_t NextTrueIndex = 0;
767+
768+
for (ArgDesc &Arg : Args) {
769+
// Handle potential gaps in set arguments (e. g. if some of them are
770+
// set on the user side).
771+
for (int Idx = LastIndex + 1; Idx < Arg.MIndex; ++Idx)
772+
if (!(*EliminatedArgMask)[Idx])
773+
++NextTrueIndex;
774+
LastIndex = Arg.MIndex;
775+
776+
if ((*EliminatedArgMask)[Arg.MIndex])
777+
continue;
778+
779+
Func(Arg, NextTrueIndex);
780+
++NextTrueIndex;
781+
}
782+
}
783+
}
754784

755785
void ReverseRangeDimensionsForKernel(NDRDescT &NDR);
756786

0 commit comments

Comments
 (0)