Skip to content

[SYCL] Do not decompose non-trivial classes with pointers #6886

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 48 additions & 78 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1796,20 +1796,10 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler {
CollectionStack.back() = true;
PointerStack.pop_back();
} else if (PointerStack.pop_back_val()) {
// FIXME: Stop triggering decomposition for non-trivial types with
// pointers
if (RD->isTrivial()) {
PointerStack.back() = true;
if (!RD->hasAttr<SYCLGenerateNewTypeAttr>())
RD->addAttr(
SYCLGenerateNewTypeAttr::CreateImplicit(SemaRef.getASTContext()));
} else {
// We are visiting a non-trivial type with pointer.
CollectionStack.back() = true;
if (!RD->hasAttr<SYCLRequiresDecompositionAttr>())
RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit(
SemaRef.getASTContext()));
}
PointerStack.back() = true;
if (!RD->hasAttr<SYCLGenerateNewTypeAttr>())
RD->addAttr(
SYCLGenerateNewTypeAttr::CreateImplicit(SemaRef.getASTContext()));
}
return true;
}
Expand Down Expand Up @@ -2916,6 +2906,18 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
Init.get());
}

void addBaseInit(const CXXBaseSpecifier &BS, QualType Ty,
InitializationKind InitKind, MultiExprArg Args) {
InitializedEntity Entity = InitializedEntity::InitializeBase(
SemaRef.Context, &BS, /*IsInheritedVirtualBase*/ false, &VarEntity);
InitializationSequence InitSeq(SemaRef, Entity, InitKind, Args);
ExprResult Init = InitSeq.Perform(SemaRef, Entity, InitKind, Args);

InitListExpr *ParentILE = CollectionInitExprs.back();
ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(),
Init.get());
}

void addSimpleBaseInit(const CXXBaseSpecifier &BS, QualType Ty) {
InitializationKind InitKind =
InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc);
Expand All @@ -2938,85 +2940,53 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
addFieldInit(FD, Ty, ParamRef);
}

Expr *addDerivedToBaseCastExpr(const CXXRecordDecl *RD,
const CXXBaseSpecifier &BS,
Expr *LocalCloneRef) {
CXXCastPath BasePath;
QualType DerivedTy(RD->getTypeForDecl(), 0);
QualType BaseTy = BS.getType();
SemaRef.CheckDerivedToBaseConversion(DerivedTy, BaseTy, KernelCallerSrcLoc,
SourceRange(), &BasePath,
/*IgnoreBaseAccess*/ true);
auto Cast = ImplicitCastExpr::Create(
SemaRef.Context, SemaRef.Context.getPointerType(BaseTy),
CK_DerivedToBase, LocalCloneRef,
/* CXXCastPath=*/&BasePath, VK_LValue, FPOptionsOverride());
return Cast;
}

Expr *createGetAddressOf(Expr *E) {
return UnaryOperator::Create(SemaRef.Context, E, UO_AddrOf,
SemaRef.Context.getPointerType(E->getType()),
VK_PRValue, OK_Ordinary, KernelCallerSrcLoc,
false, SemaRef.CurFPFeatureOverrides());
}

Expr *buildMemCpyCall(Expr *From, Expr *To, QualType T) {
// Compute the size of the memory buffer to be copied.
QualType SizeType = SemaRef.Context.getSizeType();
llvm::APInt Size(SemaRef.Context.getTypeSize(SizeType),
SemaRef.Context.getTypeSizeInChars(T).getQuantity());

LookupResult R(SemaRef, &SemaRef.Context.Idents.get("__builtin_memcpy"),
KernelCallerSrcLoc, Sema::LookupOrdinaryName);
SemaRef.LookupName(R, SemaRef.TUScope, true);

FunctionDecl *MemCpy = R.getAsSingle<FunctionDecl>();

assert(MemCpy && "__builtin_memcpy should be found");

ExprResult MemCpyRef =
SemaRef.BuildDeclRefExpr(MemCpy, SemaRef.Context.BuiltinFnTy,
VK_PRValue, KernelCallerSrcLoc, nullptr);

assert(MemCpyRef.isUsable() && "Builtin reference cannot fail");

Expr *CallArgs[] = {To, From,
IntegerLiteral::Create(SemaRef.Context, Size, SizeType,
KernelCallerSrcLoc)};
ExprResult Call =
SemaRef.BuildCallExpr(/*Scope=*/nullptr, MemCpyRef.get(),
KernelCallerSrcLoc, CallArgs, KernelCallerSrcLoc);
Expr *createDerefOp(Expr *E) {
return UnaryOperator::Create(SemaRef.Context, E, UO_Deref,
E->getType()->getPointeeType(), VK_LValue,
OK_Ordinary, KernelCallerSrcLoc, false,
SemaRef.CurFPFeatureOverrides());
}

assert(!Call.isInvalid() && "Call to __builtin_memcpy cannot fail!");
return Call.getAs<Expr>();
Expr *createReinterpretCastExpr(Expr *E, QualType To) {
return CXXReinterpretCastExpr::Create(
SemaRef.Context, To, VK_PRValue, CK_BitCast, E,
/*Path=*/nullptr, SemaRef.Context.CreateTypeSourceInfo(To),
SourceLocation(), SourceLocation(), SourceRange());
}

// Adds default initializer for generated type and creates
// a call to __builtin_memcpy to initialize local clone from
// kernel argument.
void handleGeneratedType(FieldDecl *FD, QualType Ty) {
addFieldInit(FD, Ty, None,
InitializationKind::CreateDefault(KernelCallerSrcLoc));
addFieldMemberExpr(FD, Ty);
Expr *ParamRef = createGetAddressOf(createParamReferenceExpr());
Expr *LocalCloneRef = createGetAddressOf(MemberExprBases.back());
Expr *MemCpyCallExpr = buildMemCpyCall(ParamRef, LocalCloneRef, Ty);
BodyStmts.push_back(MemCpyCallExpr);
removeFieldMemberExpr(FD, Ty);
// Equivalent of the following code is generated here:
// void ocl_kernel(__generated_type GT) {
// Kernel KernelObjClone { *(reinterpret_cast<UsersType*>(&GT)) };
// }

Expr *RCE = createReinterpretCastExpr(
createGetAddressOf(createParamReferenceExpr()),
SemaRef.Context.getPointerType(Ty));
Expr *Initializer = createDerefOp(RCE);
addFieldInit(FD, Ty, Initializer);
}

// Adds default initializer for generated base and creates
// a call to __builtin_memcpy to initialize the base of local clone
// from kernel argument.
void handleGeneratedType(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS,
QualType Ty) {
addBaseInit(BS, Ty, InitializationKind::CreateDefault(KernelCallerSrcLoc));
Expr *ParamRef = createGetAddressOf(createParamReferenceExpr());
Expr *LocalCloneRef = createGetAddressOf(MemberExprBases.back());
LocalCloneRef = addDerivedToBaseCastExpr(RD, BS, LocalCloneRef);
Expr *MemCpyCallExpr = buildMemCpyCall(ParamRef, LocalCloneRef, Ty);
BodyStmts.push_back(MemCpyCallExpr);
// Equivalent of the following code is generated here:
// void ocl_kernel(__generated_type GT) {
// Kernel KernelObjClone { *(reinterpret_cast<UsersType*>(&GT)) };
// }
Expr *RCE = createReinterpretCastExpr(
createGetAddressOf(createParamReferenceExpr()),
SemaRef.Context.getPointerType(Ty));
Expr *Initializer = createDerefOp(RCE);
InitializationKind InitKind =
InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc);
addBaseInit(BS, Ty, InitKind, Initializer);
}

MemberExpr *buildMemberExpr(Expr *Base, ValueDecl *Member) {
Expand Down
11 changes: 6 additions & 5 deletions clang/test/CodeGenSYCL/inheritance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,14 @@ int main() {
// Initialize 'base' subobject
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[LOCAL_OBJECT]], ptr addrspace(4) align 4 %[[ARG_BASE]], i64 12, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, ptr addrspace(4) %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], ptr addrspace(4) %[[GEP_A]]

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, ptr addrspace(4) %[[LOCAL_OBJECT]], i64 16
// Initialize 'second_base'
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[OFFSET_CALC]], ptr addrspace(4) align 8 %[[ARG_BASE1]], i64 8, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, ptr addrspace(4) %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], ptr addrspace(4) %[[GEP_A]]

17 changes: 9 additions & 8 deletions clang/test/CodeGenSYCL/no_opaque_inheritance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,18 @@ int main() {
// CHECK: %[[PARAM_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %[[ARG_BASE]] to i8 addrspace(4)*
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[BASE_TO_PTR]], i8 addrspace(4)* align 4 %[[PARAM_TO_PTR]], i64 12, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[DERIVED_PTR:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]] to i8 addrspace(4)*
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, i8 addrspace(4)* %[[DERIVED_PTR]], i64 16
// CHECK: %[[TO_SECOND_BASE:.*]] = bitcast i8 addrspace(4)* %[[OFFSET_CALC]] to %class.second_base addrspace(4)*
// CHECK: %[[SECOND_BASE_TO_PTR:.*]] = bitcast %class.second_base addrspace(4)* %[[TO_SECOND_BASE]] to i8 addrspace(4)*
// CHECK: %[[SECOND_PARAM_TO_PTR:.*]] = bitcast %class.__generated_second_base addrspace(4)* %[[ARG_BASE1]] to i8 addrspace(4)*
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[SECOND_BASE_TO_PTR]], i8 addrspace(4)* align 8 %[[SECOND_PARAM_TO_PTR]], i64 8, i1 false)
// CHECK: %[[GEN_TO_SECOND_BASE:.*]] = bitcast %class.__generated_second_base addrspace(4)* %[[ARG_BASE1]] to %class.second_base addrspace(4)*
// CHECK: %[[TO:.*]] = bitcast %class.second_base addrspace(4)* %[[TO_SECOND_BASE]] to i8 addrspace(4)*
// CHECK: %[[FROM:.*]] = bitcast %class.second_base addrspace(4)* %[[GEN_TO_SECOND_BASE]] to i8 addrspace(4)*
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[TO]], i8 addrspace(4)* align 8 %[[FROM]], i64 8, i1 false)


// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_pointers-in-structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ int main() {
// CHECK-SAME: %[[GENERATED_A]]* noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
// CHECK-SAME: %[[WRAPPER_F4_1]]* noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
// CHECK-SAME: %[[WRAPPER_F4_2]]* noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(%[[WRAPPER_LAMBDA_PTR]]* noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(%[[WRAPPER_LAMBDA_PTR]]* noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Lambda)
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/pointers-in-structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ int main() {
// CHECK-SAME: ptr noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(ptr noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(ptr noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Lambda)
Loading