Skip to content

Commit 6934bcf

Browse files
authored
[SYCL] Do not attach reqd_work_group_size info when multiple are detected (#13523)
1 parent 5c6616c commit 6934bcf

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ static llvm::StringRef ExtractStringFromMDNodeOperand(const MDNode *N,
3838
SYCLDeviceRequirements
3939
llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) {
4040
SYCLDeviceRequirements Reqs;
41+
bool MultipleReqdWGSize = false;
4142
// Process all functions in the module
4243
for (const Function &F : MD.getModule()) {
4344
if (auto *MDN = F.getMetadata("sycl_used_aspects")) {
@@ -70,6 +71,8 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) {
7071
ExtractUnsignedIntegerFromMDNodeOperand(MDN, I));
7172
if (!Reqs.ReqdWorkGroupSize.has_value())
7273
Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize;
74+
if (Reqs.ReqdWorkGroupSize != NewReqdWorkGroupSize)
75+
MultipleReqdWGSize = true;
7376
}
7477

7578
if (auto *MDN = F.getMetadata("sycl_joint_matrix")) {
@@ -105,6 +108,14 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) {
105108
assert(*Reqs.SubGroupSize == static_cast<uint32_t>(MDValue));
106109
}
107110
}
111+
112+
// Usually, we would only expect one ReqdWGSize, as the module passed to
113+
// this function would be split according to that. However, when splitting
114+
// is disabled, this cannot be guaranteed. In this case, we reset the value,
115+
// which makes so that no value is reqd_work_group_size data is attached in
116+
// in the device image.
117+
if (MultipleReqdWGSize)
118+
Reqs.ReqdWorkGroupSize.reset();
108119
return Reqs;
109120
}
110121

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// This test checks that with -fsycl-device-code-split=off, kernels
2+
// with different reqd_work_group_size dimensions can be launched.
3+
4+
// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
// UNSUPPORTED: hip
8+
9+
#include <sycl/detail/core.hpp>
10+
11+
using namespace sycl;
12+
13+
#define TEST(...) \
14+
{ \
15+
range globalRange(__VA_ARGS__); \
16+
range localRange(__VA_ARGS__); \
17+
nd_range NDRange(globalRange, localRange); \
18+
q.parallel_for(NDRange, \
19+
[=](auto) [[sycl::reqd_work_group_size(__VA_ARGS__)]] {}); \
20+
}
21+
22+
int main(int argc, char **argv) {
23+
queue q;
24+
TEST(4);
25+
TEST(4, 5);
26+
TEST(4, 5, 6);
27+
return 0;
28+
}

0 commit comments

Comments
 (0)