Skip to content

[SYCL] Fix crash when kernel argument is a multi-dimensional array. #2341

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 90 additions & 29 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,11 +876,12 @@ class KernelObjVisitor {

assert(ElemCount > 0 && "SYCL prohibits 0 sized arrays");
VisitFirstElement(nullptr, FD, ET, handlers...);
(void)std::initializer_list<int>{(handlers.nextElement(ET), 0)...};
(void)std::initializer_list<int>{(handlers.nextElement(ET, 1), 0)...};

for (int64_t Count = 1; Count < ElemCount; Count++) {
VisitNthElement(nullptr, FD, ET, handlers...);
(void)std::initializer_list<int>{(handlers.nextElement(ET), 0)...};
(void)std::initializer_list<int>{
(handlers.nextElement(ET, Count + 1), 0)...};
}

(void)std::initializer_list<int>{
Expand Down Expand Up @@ -1081,7 +1082,7 @@ class SyclKernelFieldHandlerBase {
virtual bool enterField(const CXXRecordDecl *, FieldDecl *) { return true; }
virtual bool leaveField(const CXXRecordDecl *, FieldDecl *) { return true; }
virtual bool enterArray() { return true; }
virtual bool nextElement(QualType) { return true; }
virtual bool nextElement(QualType, uint64_t) { return true; }
virtual bool leaveArray(FieldDecl *, QualType, int64_t) { return true; }

virtual ~SyclKernelFieldHandlerBase() = default;
Expand Down Expand Up @@ -1660,7 +1661,6 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
InitializedEntity VarEntity;
const CXXRecordDecl *KernelObj;
llvm::SmallVector<Expr *, 16> MemberExprBases;
uint64_t ArrayIndex;
FunctionDecl *KernelCallerFunc;

// Using the statements/init expressions that we've created, this generates
Expand Down Expand Up @@ -1773,17 +1773,62 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
InitExprs.push_back(MemberInit.get());
}

int getDims() {
int Dims = 0;
for (int i = MemberExprBases.size() - 1; i >= 0; --i) {
if (!isa<ArraySubscriptExpr>(MemberExprBases[i]))
break;
++Dims;
}
return Dims;
}

int64_t getArrayIndex(int Idx) {
ArraySubscriptExpr *LastArrayRef =
cast<ArraySubscriptExpr>(MemberExprBases[Idx]);
Expr *LastIdx = LastArrayRef->getIdx();
llvm::APSInt Result;
SemaRef.VerifyIntegerConstantExpression(LastIdx, &Result);
return Result.getExtValue();
}

void createExprForScalarElement(FieldDecl *FD) {
InitializedEntity ArrayEntity =
llvm::SmallVector<InitializedEntity, 4> InitEntities;

// For multi-dimensional arrays, an initialized entity needs to be
// generated for each 'dimension'. For example, the initialized entity
// for s.array[x][y][z] is constructed using initialized entities for
// s.array[x][y], s.array[x] and s.array. InitEntities is used to maintain
// this.
InitializedEntity Entity =
InitializedEntity::InitializeMember(FD, &VarEntity);
InitEntities.push_back(Entity);

// Calculate dimension using ArraySubscriptExpressions in MemberExprBases.
// Each dimension has an ArraySubscriptExpression (maintains index)
// in MemberExprBases. For example, if we are currently handling element
// a[0][0][1], the top of stack entries are ArraySubscriptExpressions for
// indices 0,0 and 1, with 1 on top.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not use same numbers, it is too confusing.
We had this issue in the spec...

int Dims = getDims();

// MemberExprBasesIdx is used to get the index of each dimension, in correct
// order, from MemberExprBases. For example for a[0][0][1], getArrayIndex
// will return 0, 0 and then 1.
int MemberExprBasesIdx = MemberExprBases.size() - Dims;
for (int I = 0; I < Dims; ++I) {
InitializedEntity NewEntity = InitializedEntity::InitializeElement(
SemaRef.getASTContext(), getArrayIndex(MemberExprBasesIdx),
InitEntities.back());
InitEntities.push_back(NewEntity);
++MemberExprBasesIdx;
}

InitializationKind InitKind =
InitializationKind::CreateCopy(SourceLocation(), SourceLocation());
Expr *DRE = createInitExpr(FD);
InitializedEntity Entity = InitializedEntity::InitializeElement(
SemaRef.getASTContext(), ArrayIndex, ArrayEntity);
ArrayIndex++;
InitializationSequence InitSeq(SemaRef, Entity, InitKind, DRE);
ExprResult MemberInit = InitSeq.Perform(SemaRef, Entity, InitKind, DRE);
InitializationSequence InitSeq(SemaRef, InitEntities.back(), InitKind, DRE);
ExprResult MemberInit =
InitSeq.Perform(SemaRef, InitEntities.back(), InitKind, DRE);
InitExprs.push_back(MemberInit.get());
}

Expand All @@ -1797,7 +1842,22 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
Expr *ILE = new (SemaRef.getASTContext())
InitListExpr(SemaRef.getASTContext(), SourceLocation(), ArrayInitExprs,
SourceLocation());
ILE->setType(FD->getType());

// We need to find the type of the element for which we are generating the
// InitListExpr. For example, for a multi-dimensional array say a[2][3][2],
// the types for InitListExpr of the array and its 'sub-arrays' are -
// int [2][3][2], int [3][2] and int [2]. This loop is used to obtain this
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not use same numbers, it is too confusing.
I found using prime number is even better in the case of concrete computations.
For example here int [2] is not instantaneously obvious which [2] we are talking about.
Use for example int [5][3][2]

// information from MemberExprBases. MemberExprBases holds
// ArraySubscriptExprs and the top of stack shows how far we have descended
// down the array. getDims() calculates this depth.
QualType ILEType = FD->getType();
for (int I = getDims(); I > 1; I--) {
const ConstantArrayType *CAT =
SemaRef.getASTContext().getAsConstantArrayType(ILEType);
assert(CAT && "Should only be called on constant-size array.");
ILEType = CAT->getElementType();
}
ILE->setType(ILEType);
InitExprs.push_back(ILE);
}

Expand Down Expand Up @@ -2056,20 +2116,18 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr(
ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation());
MemberExprBases.push_back(ElementBase.get());
ArrayIndex = 0;
return true;
}

bool nextElement(QualType ET) final {
ArraySubscriptExpr *LastArrayRef =
cast<ArraySubscriptExpr>(MemberExprBases.back());
bool nextElement(QualType ET, uint64_t) final {
// Top of MemberExprBases holds ArraySubscriptExpression of element
// we just handled, or the Array base for the dimension we are
// currently visiting.
int64_t nextIndex = getArrayIndex(MemberExprBases.size() - 1) + 1;
MemberExprBases.pop_back();
Expr *LastIdx = LastArrayRef->getIdx();
llvm::APSInt Result;
SemaRef.VerifyIntegerConstantExpression(LastIdx, &Result);
Expr *ArrayBase = MemberExprBases.back();
ExprResult IndexExpr = SemaRef.ActOnIntegerConstant(
SourceLocation(), Result.getExtValue() + 1);
ExprResult IndexExpr =
SemaRef.ActOnIntegerConstant(SourceLocation(), nextIndex);
ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr(
ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation());
MemberExprBases.push_back(ElementBase.get());
Expand All @@ -2094,6 +2152,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
SYCLIntegrationHeader &Header;
int64_t CurOffset = 0;
llvm::SmallVector<size_t, 16> ArrayBaseOffsets;
int StructDepth = 0;

void addParam(const FieldDecl *FD, QualType ArgTy,
Expand Down Expand Up @@ -2240,18 +2299,20 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
return true;
}

bool nextElement(QualType ET) final {
CurOffset += SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity();
bool enterArray() final {
ArrayBaseOffsets.push_back(CurOffset);
return true;
}

bool leaveArray(FieldDecl *, QualType ET, int64_t Count) final {
int64_t ArraySize =
SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity();
if (!ET->isArrayType()) {
ArraySize *= Count;
}
CurOffset -= ArraySize;
bool nextElement(QualType ET, uint64_t Index) final {
int64_t Size = SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity();
CurOffset = ArrayBaseOffsets.back() + Size * (Index);
return true;
}

bool leaveArray(FieldDecl *, QualType ET, int64_t) final {
CurOffset = ArrayBaseOffsets.back();
ArrayBaseOffsets.pop_back();
return true;
}
using SyclKernelFieldHandler::enterStruct;
Expand Down
44 changes: 42 additions & 2 deletions clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

// CHECK: static constexpr
// CHECK-NEXT: const char* const kernel_names[] = {
// CHECK-NEXT: "_ZTSZ4mainE8kernel_B"
// CHECK-NEXT: "_ZTSZ4mainE8kernel_B",
// CHECK-NEXT: "_ZTSZ4mainE8kernel_C",
// CHECK-NEXT: "_ZTSZ4mainE8kernel_D"
// CHECK-NEXT: };

// CHECK: static constexpr
Expand All @@ -25,14 +27,40 @@
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 },
// CHECK-EMPTY:
// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_C
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 4 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 20 },
// CHECK-EMPTY:
// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_D
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 4 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 20 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 24 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 28 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 32 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 36 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 40 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 44 },
// CHECK-EMPTY:
// CHECK-NEXT: };

// CHECK: static constexpr
// CHECK-NEXT: const unsigned kernel_signature_start[] = {
// CHECK-NEXT: 0 // _ZTSZ4mainE8kernel_B
// CHECK-NEXT: 0, // _ZTSZ4mainE8kernel_B
// CHECK-NEXT: 6, // _ZTSZ4mainE8kernel_C
// CHECK-NEXT: 13 // _ZTSZ4mainE8kernel_D
// CHECK-NEXT: };

// CHECK: template <> struct KernelInfo<class kernel_B> {
// CHECK: template <> struct KernelInfo<class kernel_C> {
// CHECK: template <> struct KernelInfo<class kernel_D> {

#include <sycl.hpp>

Expand All @@ -46,9 +74,21 @@ __attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) {
int main() {

int a[5];
int b[2][3];
int c[2][3][2];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int c[2][3][2];
int c[2][3][5];


a_kernel<class kernel_B>(
[=]() {
int local = a[3];
});

a_kernel<class kernel_C>(
[=]() {
int local = b[0][1];
});

a_kernel<class kernel_D>(
[=]() {
int local = c[0][1][1];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int local = c[0][1][1];
int local = c[0][1][2];

});
}
28 changes: 28 additions & 0 deletions clang/test/CodeGenSYCL/kernel-param-pod-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct foo {
int main() {

int a[2];
int array_2D[2][1];
foo struct_array[2];

a_kernel<class kernel_B>(
Expand All @@ -36,6 +37,11 @@ int main() {
[=]() {
foo local = struct_array[1];
});

a_kernel<class kernel_D>(
[=]() {
int local = array_2D[0][0];
});
}

// Check kernel_B parameters
Expand Down Expand Up @@ -151,3 +157,25 @@ int main() {
// CHECK: [[GEP_FOO2_C:%[a-zA-Z0-9_]+]] = getelementptr inbounds %struct.{{.*}}foo.foo, %struct.{{.*}}foo.foo* [[FOO_ARRAY_1]], i32 0, i32 2
// CHECK: [[LOAD_FOO2_C:%[a-zA-Z0-9_]+]] = load i32, i32* [[FOO2_C_LOCAL]], align 4
// CHECK: store i32 [[LOAD_FOO2_C]], i32* [[GEP_FOO2_C]], align 4

// Check kernel_D parameters
// CHECK: define spir_kernel void @{{.*}}kernel_D
// CHECK-SAME: i32 [[ARR_2D_1:%[a-zA-Z0-9_]+]], i32 [[ARR_2D_2:%[a-zA-Z0-9_]+]]

// Check local lambda object alloca
// CHECK: [[LAMBDA_OBJ:%[0-9]+]] = alloca %"class.{{.*}}.anon.1", align 4

// Check local stores
// CHECK: store i32 [[ARR_2D_1]], i32* [[ARR_2D_1_LOCAL:%[a-zA-Z_]+.addr[0-9]*]], align 4
// CHECK: store i32 [[ARR_2D_2]], i32* [[ARR_2D_2_LOCAL:%[a-zA-Z_]+.addr[0-9]*]], align 4

// Check initialization of local array
// CHECK: [[GEP_ARR_2D:%[0-9]*]] = getelementptr inbounds %"class._ZTSZ4mainE3$_0.anon.1", %"class._ZTSZ4mainE3$_0.anon.1"* [[LAMBDA_OBJ]], i32 0, i32 0
// CHECK: [[GEP_ARR_BEGIN1:%[a-zA-Z0-9_.]+]] = getelementptr inbounds [2 x [1 x i32]], [2 x [1 x i32]]* [[GEP_ARR_2D]], i64 0, i64 0
// CHECK: [[GEP_ARR_ELEM0:%[a-zA-Z0-9_.]+]] = getelementptr inbounds [1 x i32], [1 x i32]* [[GEP_ARR_BEGIN1]], i64 0, i64 0
// CHECK: [[ARR_2D_ELEM0:%[0-9]*]] = load i32, i32* [[ARR_2D_1_LOCAL]], align 4
// CHECK: store i32 [[ARR_2D_ELEM0]], i32* [[GEP_ARR_ELEM0]], align 4
// CHECK: [[GEP_ARR_BEGIN2:%[a-zA-Z_.]+]] = getelementptr inbounds [1 x i32], [1 x i32]* [[GEP_ARR_BEGIN1]], i64 1
// CHECK: [[GEP_ARR_ELEM1:%[a-zA-Z0-9_.]+]] = getelementptr inbounds [1 x i32], [1 x i32]* [[GEP_ARR_BEGIN2]], i64 0, i64 0
// CHECK: [[ARR_2D_ELEM1:%[0-9]*]] = load i32, i32* [[ARR_2D_2_LOCAL]], align 4
// CHECK: store i32 [[ARR_2D_ELEM1]], i32* [[GEP_ARR_ELEM1]], align 4
Loading