Skip to content

Commit 3ec4594

Browse files
[SYCL] Fix handling of unpacked POD spec constants (#2926)
Fixed calculating of offsets to scalar elements of composite spec constants by taking into account paddings, that might be instered into a struct by the compiler to make it elements aligned.
1 parent f1ab47c commit 3ec4594

File tree

2 files changed

+98
-6
lines changed

2 files changed

+98
-6
lines changed

llvm/tools/sycl-post-link/SpecConstants.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,26 +213,38 @@ getScalarSpecConstMetadata(const Instruction *I) {
213213
/// Recursively iterates over a composite type in order to collect information
214214
/// about its scalar elements.
215215
void collectCompositeElementsInfoRecursive(
216-
const Type *Ty, unsigned &Index, unsigned &Offset,
216+
const Module *M, Type *Ty, unsigned &Index, unsigned &Offset,
217217
std::vector<CompositeSpecConstElementDescriptor> &Result) {
218218
if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
219219
for (size_t I = 0; I < ArrTy->getNumElements(); ++I) {
220220
// TODO: this is a spot for potential optimization: for arrays we could
221221
// just make a single recursive call here and use it to populate Result
222222
// in a loop.
223-
collectCompositeElementsInfoRecursive(ArrTy->getElementType(), Index,
223+
collectCompositeElementsInfoRecursive(M, ArrTy->getElementType(), Index,
224224
Offset, Result);
225225
}
226226
} else if (auto *StructTy = dyn_cast<StructType>(Ty)) {
227-
for (Type *ElTy : StructTy->elements()) {
228-
collectCompositeElementsInfoRecursive(ElTy, Index, Offset, Result);
227+
const StructLayout *SL = M->getDataLayout().getStructLayout(StructTy);
228+
for (size_t I = 0, E = StructTy->getNumElements(); I < E; ++I) {
229+
auto *ElTy = StructTy->getElementType(I);
230+
// When handling elements of a structure, we do not use manually
231+
// calculated offsets (which are sum of sizes of all previously
232+
// encountered elements), but instead rely on data provided for us by
233+
// DataLayout, because the structure can be unpacked, i.e. padded in
234+
// order to ensure particular alignment of its elements.
235+
unsigned LocalOffset = Offset + SL->getElementOffset(I);
236+
collectCompositeElementsInfoRecursive(M, ElTy, Index, LocalOffset,
237+
Result);
229238
}
239+
// Update "global" offset according to the total size of a handled struct
240+
// type.
241+
Offset += SL->getSizeInBytes();
230242
} else if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
231243
for (size_t I = 0; I < VecTy->getNumElements(); ++I) {
232244
// TODO: this is a spot for potential optimization: for vectors we could
233245
// just make a single recursive call here and use it to populate Result
234246
// in a loop.
235-
collectCompositeElementsInfoRecursive(VecTy->getElementType(), Index,
247+
collectCompositeElementsInfoRecursive(M, VecTy->getElementType(), Index,
236248
Offset, Result);
237249
}
238250
} else { // Assume that we encountered some scalar element
@@ -256,7 +268,8 @@ getCompositeSpecConstMetadata(const Instruction *I) {
256268
std::vector<CompositeSpecConstElementDescriptor> Result(N->getNumOperands() -
257269
1);
258270
unsigned Index = 0, Offset = 0;
259-
collectCompositeElementsInfoRecursive(I->getType(), Index, Offset, Result);
271+
collectCompositeElementsInfoRecursive(I->getModule(), I->getType(), Index,
272+
Offset, Result);
260273

261274
for (unsigned I = 1; I < N->getNumOperands(); ++I) {
262275
const auto *MDInt = cast<ConstantAsMetadata>(N->getOperand(I));
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// UNSUPPORTED: cuda
2+
//
3+
// RUN: %clangxx -fsycl %s -o %t.out
4+
// RUN: %RUN_ON_HOST %t.out | FileCheck %s
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
6+
// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
7+
//
8+
// This test is intended to check that unpacked composites with elemements of
9+
// various sizes are handled correctly
10+
//
11+
// CHECK: --------> 1
12+
// CHECK: --------> 2
13+
// CHECK: --------> 3
14+
// CHECK: --------> 4
15+
#include <CL/sycl.hpp>
16+
17+
#include <stdint.h>
18+
19+
using namespace cl::sycl;
20+
21+
class sc_kernel_t;
22+
23+
namespace test {
24+
25+
struct pod_t {
26+
int a;
27+
int8_t b;
28+
int c;
29+
int64_t d;
30+
};
31+
32+
template <typename T> class kernel_t {
33+
public:
34+
using sc_t = sycl::ONEAPI::experimental::spec_constant<pod_t, sc_kernel_t>;
35+
36+
kernel_t(const sc_t &sc, cl::sycl::stream &strm) : sc_(sc), strm_(strm) {}
37+
38+
void operator()(cl::sycl::id<1> i) const {
39+
strm_ << "--------> " << sc_.get().a << sycl::endl;
40+
strm_ << "--------> " << sc_.get().b << sycl::endl;
41+
strm_ << "--------> " << sc_.get().c << sycl::endl;
42+
strm_ << "--------> " << sc_.get().d << sycl::endl;
43+
}
44+
45+
sc_t sc_;
46+
cl::sycl::stream strm_;
47+
};
48+
49+
template <typename T> class kernel_driver_t {
50+
public:
51+
void execute(const pod_t &pod) {
52+
device dev = sycl::device(default_selector{});
53+
context ctx = context(dev);
54+
queue q(dev);
55+
56+
cl::sycl::program p(q.get_context());
57+
auto sc = p.set_spec_constant<sc_kernel_t>(pod);
58+
p.build_with_kernel_type<kernel_t<T>>();
59+
60+
q.submit([&](cl::sycl::handler &cgh) {
61+
cl::sycl::stream strm(1024, 256, cgh);
62+
kernel_t<T> func(sc, strm);
63+
64+
auto sycl_kernel = p.get_kernel<kernel_t<T>>();
65+
cgh.parallel_for(sycl_kernel, cl::sycl::range<1>(1), func);
66+
});
67+
q.wait();
68+
}
69+
};
70+
71+
} // namespace test
72+
73+
int main() {
74+
test::pod_t pod = {1, 2, 3, 4};
75+
test::kernel_driver_t<float> kd_float;
76+
kd_float.execute(pod);
77+
78+
return 0;
79+
}

0 commit comments

Comments
 (0)