Skip to content

[SYCL] Fix handling of unpacked POD spec constants #2926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions llvm/tools/sycl-post-link/SpecConstants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,26 +213,38 @@ getScalarSpecConstMetadata(const Instruction *I) {
/// Recursively iterates over a composite type in order to collect information
/// about its scalar elements.
void collectCompositeElementsInfoRecursive(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to have a unit-test for this change in the pass. Technically I can do that by looking into a properties file generated by sycl-post-link, but non-scalar properties are encoded using base64, so, it won't be an expressive test. However, this will be my plan if I didn't think of anything better. Any ideas are welcome

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed the fix is needed urgently and the test will follow shortly, so I approved.

My idea would be adding logging to the spec constants pass and verifying the log output is expected. IIRC, some of passes use this approach.

const Type *Ty, unsigned &Index, unsigned &Offset,
const Module *M, Type *Ty, unsigned &Index, unsigned &Offset,
std::vector<CompositeSpecConstElementDescriptor> &Result) {
if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
for (size_t I = 0; I < ArrTy->getNumElements(); ++I) {
// TODO: this is a spot for potential optimization: for arrays we could
// just make a single recursive call here and use it to populate Result
// in a loop.
collectCompositeElementsInfoRecursive(ArrTy->getElementType(), Index,
collectCompositeElementsInfoRecursive(M, ArrTy->getElementType(), Index,
Offset, Result);
}
} else if (auto *StructTy = dyn_cast<StructType>(Ty)) {
for (Type *ElTy : StructTy->elements()) {
collectCompositeElementsInfoRecursive(ElTy, Index, Offset, Result);
const StructLayout *SL = M->getDataLayout().getStructLayout(StructTy);
for (size_t I = 0, E = StructTy->getNumElements(); I < E; ++I) {
auto *ElTy = StructTy->getElementType(I);
// When handling elements of a structure, we do not use manually
// calculated offsets (which are sum of sizes of all previously
// encountered elements), but instead rely on data provided for us by
// DataLayout, because the structure can be unpacked, i.e. padded in
// order to ensure particular alignment of its elements.
unsigned LocalOffset = Offset + SL->getElementOffset(I);
collectCompositeElementsInfoRecursive(M, ElTy, Index, LocalOffset,
Result);
}
// Update "global" offset according to the total size of a handled struct
// type.
Offset += SL->getSizeInBytes();
} else if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
for (size_t I = 0; I < VecTy->getNumElements(); ++I) {
// TODO: this is a spot for potential optimization: for vectors we could
// just make a single recursive call here and use it to populate Result
// in a loop.
collectCompositeElementsInfoRecursive(VecTy->getElementType(), Index,
collectCompositeElementsInfoRecursive(M, VecTy->getElementType(), Index,
Offset, Result);
}
} else { // Assume that we encountered some scalar element
Expand All @@ -256,7 +268,8 @@ getCompositeSpecConstMetadata(const Instruction *I) {
std::vector<CompositeSpecConstElementDescriptor> Result(N->getNumOperands() -
1);
unsigned Index = 0, Offset = 0;
collectCompositeElementsInfoRecursive(I->getType(), Index, Offset, Result);
collectCompositeElementsInfoRecursive(I->getModule(), I->getType(), Index,
Offset, Result);

for (unsigned I = 1; I < N->getNumOperands(); ++I) {
const auto *MDInt = cast<ConstantAsMetadata>(N->getOperand(I));
Expand Down
79 changes: 79 additions & 0 deletions sycl/test/on-device/spec_const/unpacked-composite-type.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// UNSUPPORTED: cuda
//
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %RUN_ON_HOST %t.out | FileCheck %s
// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
//
// This test is intended to check that unpacked composites with elemements of
// various sizes are handled correctly
//
// CHECK: --------> 1
// CHECK: --------> 2
// CHECK: --------> 3
// CHECK: --------> 4
#include <CL/sycl.hpp>

#include <stdint.h>

using namespace cl::sycl;

class sc_kernel_t;

namespace test {

struct pod_t {
int a;
int8_t b;
int c;
int64_t d;
};

template <typename T> class kernel_t {
public:
using sc_t = sycl::ONEAPI::experimental::spec_constant<pod_t, sc_kernel_t>;

kernel_t(const sc_t &sc, cl::sycl::stream &strm) : sc_(sc), strm_(strm) {}

void operator()(cl::sycl::id<1> i) const {
strm_ << "--------> " << sc_.get().a << sycl::endl;
strm_ << "--------> " << sc_.get().b << sycl::endl;
strm_ << "--------> " << sc_.get().c << sycl::endl;
strm_ << "--------> " << sc_.get().d << sycl::endl;
}

sc_t sc_;
cl::sycl::stream strm_;
};

template <typename T> class kernel_driver_t {
public:
void execute(const pod_t &pod) {
device dev = sycl::device(default_selector{});
context ctx = context(dev);
queue q(dev);

cl::sycl::program p(q.get_context());
auto sc = p.set_spec_constant<sc_kernel_t>(pod);
p.build_with_kernel_type<kernel_t<T>>();

q.submit([&](cl::sycl::handler &cgh) {
cl::sycl::stream strm(1024, 256, cgh);
kernel_t<T> func(sc, strm);

auto sycl_kernel = p.get_kernel<kernel_t<T>>();
cgh.parallel_for(sycl_kernel, cl::sycl::range<1>(1), func);
});
q.wait();
}
};

} // namespace test

int main() {
test::pod_t pod = {1, 2, 3, 4};
test::kernel_driver_t<float> kd_float;
kd_float.execute(pod);

return 0;
}