Skip to content

Commit 9012e6d

Browse files
authored
[SYCL] Fix a bug when using no device split and reqd_work_group_size (#16236)
There was a bug (#13523) where a kernel couldn't be launched when `-fsycl-device-code-split=off` was used and multiple kernels with different required work group sizes were present. This issue was fixed by ensuring that the required work group size metadata is not attached to the device image when multiple required work group sizes are detected in a single module. However, there was a similar but related case that was not fixed by that PR, which is now demonstrated in the new test no-split-reqd-wg-size-2.cpp. This issue occurs when there is a single kernel with a required work group size and another kernel without one. In this case, the module doesn't contain multiple required work group sizes, so the required work group size metadata is still attached. As a result of the metadata being attached, the runtime cannot launch the kernel without a required work group size. This PR removes the logic of ensuring metadata is not attached when there are multiple required work group sizes, and instead adds logic that ensures the metadata is not attached when the split mode is `SPLIT_NONE`. This covers the old cases from the previous PR and the new case in this PR.
1 parent 8024711 commit 9012e6d

File tree

5 files changed

+45
-12
lines changed

5 files changed

+45
-12
lines changed

llvm/include/llvm/Support/PropertySetIO.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ class PropertySetRegistry {
212212
static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions";
213213
static constexpr char SYCL_IMPLICIT_LOCAL_ARG[] = "SYCL/implicit local arg";
214214

215+
static constexpr char PROPERTY_REQD_WORK_GROUP_SIZE[] =
216+
"reqd_work_group_size_uint64_t";
217+
215218
/// Function for bulk addition of an entire property set in the given
216219
/// \p Category .
217220
template <typename MapTy> void add(StringRef Category, const MapTy &Props) {
@@ -231,6 +234,17 @@ class PropertySetRegistry {
231234
PropSet.insert({PropName, PropertyValue(PropVal)});
232235
}
233236

237+
void remove(StringRef Category, StringRef PropName) {
238+
auto PropertySetIt = PropSetMap.find(Category);
239+
if (PropertySetIt == PropSetMap.end())
240+
return;
241+
auto &PropertySet = PropertySetIt->second;
242+
auto PropIt = PropertySet.find(PropName);
243+
if (PropIt == PropertySet.end())
244+
return;
245+
PropertySet.erase(PropIt);
246+
}
247+
234248
/// Parses from the given \p Buf a property set registry.
235249
static Expected<std::unique_ptr<PropertySetRegistry>>
236250
read(const MemoryBuffer *Buf);

llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ SYCLDeviceRequirements
4040
llvm::computeDeviceRequirements(const Module &M,
4141
const SetVector<Function *> &EntryPoints) {
4242
SYCLDeviceRequirements Reqs;
43-
bool MultipleReqdWGSize = false;
4443
// Process all functions in the module
4544
for (const Function &F : M) {
4645
if (auto *MDN = F.getMetadata("sycl_used_aspects")) {
@@ -81,8 +80,6 @@ llvm::computeDeviceRequirements(const Module &M,
8180
ExtractUnsignedIntegerFromMDNodeOperand(MDN, I));
8281
if (!Reqs.ReqdWorkGroupSize.has_value())
8382
Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize;
84-
if (Reqs.ReqdWorkGroupSize != NewReqdWorkGroupSize)
85-
MultipleReqdWGSize = true;
8683
}
8784

8885
if (auto *MDN = F.getMetadata("sycl_joint_matrix")) {
@@ -119,13 +116,6 @@ llvm::computeDeviceRequirements(const Module &M,
119116
}
120117
}
121118

122-
// Usually, we would only expect one ReqdWGSize, as the module passed to
123-
// this function would be split according to that. However, when splitting
124-
// is disabled, this cannot be guaranteed. In this case, we reset the value,
125-
// which makes so that no value is reqd_work_group_size data is attached in
126-
// in the device image.
127-
if (MultipleReqdWGSize)
128-
Reqs.ReqdWorkGroupSize.reset();
129119
return Reqs;
130120
}
131121

@@ -152,7 +142,8 @@ std::map<StringRef, util::PropertyValue> SYCLDeviceRequirements::asMap() const {
152142
// reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
153143
// window, this can be changed back to reqd_work_group_size.
154144
if (ReqdWorkGroupSize.has_value())
155-
Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize;
145+
Requirements[util::PropertySetRegistry::PROPERTY_REQD_WORK_GROUP_SIZE] =
146+
*ReqdWorkGroupSize;
156147

157148
if (JointMatrix.has_value())
158149
Requirements["joint_matrix"] = *JointMatrix;

llvm/tools/sycl-post-link/sycl-post-link.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,15 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD,
310310
auto PropSet =
311311
computeModuleProperties(MD.getModule(), MD.entries(), GlobProps);
312312

313+
// When the split mode is none, the required work group size will be added
314+
// to the whole module, which will make the runtime unable to
315+
// launch the other kernels in the module that have different
316+
// required work group sizes or no required work group sizes. So we need to
317+
// remove the required work group size metadata in this case.
318+
if (SplitMode == module_split::SPLIT_NONE)
319+
PropSet.remove(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS,
320+
PropSetRegTy::PROPERTY_REQD_WORK_GROUP_SIZE);
321+
313322
std::string NewSuff = Suff.str();
314323
if (!Target.empty()) {
315324
PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, "compile_target",
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// This test checks that with -fsycl-device-code-split=off, kernels
2+
// with different reqd_work_group_size dimensions can be launched.
3+
4+
// RUN: %{build} -fsycl-device-code-split=off -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
// UNSUPPORTED: hip
8+
9+
#include <sycl/detail/core.hpp>
10+
11+
using namespace sycl;
12+
13+
int main(int argc, char **argv) {
14+
queue q;
15+
q.single_task([] {});
16+
q.parallel_for(range<2>(24, 1),
17+
[=](auto) [[sycl::reqd_work_group_size(24, 1)]] {});
18+
return 0;
19+
}

sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// This test checks that with -fsycl-device-code-split=off, kernels
22
// with different reqd_work_group_size dimensions can be launched.
33

4-
// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out
4+
// RUN: %{build} -fsycl-device-code-split=off -o %t.out
55
// RUN: %{run} %t.out
66

77
// UNSUPPORTED: hip

0 commit comments

Comments
 (0)