Skip to content

[clang][OpenCL][CodeGen][AMDGPU] Do not use private as the default AS for when generic is available #112442

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 22, 2024
Merged
6 changes: 3 additions & 3 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
TargetInfo::adjust(Diags, Opts);
// ToDo: There are still a few places using default address space as private
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
// can be removed from the following line.
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
// address space in OpenCL, which needs to be cleaned up, then the references
// to OpenCL can be removed from the following line.
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
!isAMDGCN(getTriple()));
}

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGBlocks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
DI->setLocation(D->getLocation());
DI->EmitDeclareOfBlockLiteralArgVariable(
*BlockInfo, D->getName(), argNum,
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would hope alloc would be the direct alloca reference to begin with

Builder);
}
}

Expand Down
11 changes: 9 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5853,8 +5853,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
// The EmitLifetime* pair expect a naked Alloca as their last argument,
// however for cases where the default AS is not the Alloca AS, Tmp is
// actually the Alloca ascasted to the default AS, hence the
// stripPointerCasts()
llvm::Value *Alloca = TmpPtr->stripPointerCasts();
llvm::Value *TmpSize = EmitLifetimeStart(
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
Expand All @@ -5870,7 +5875,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
}
return std::tie(ElemPtr, TmpSize, TmpPtr);
// Return the Alloca itself rather than a potential ascast as this is only
// used by the paired EmitLifetimeEnd.
return std::tie(ElemPtr, TmpSize, Alloca);
};

// Could have events and/or varargs.
Expand Down
169 changes: 99 additions & 70 deletions clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Large diffs are not rendered by default.

36 changes: 20 additions & 16 deletions clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
// CL20-NEXT: [[ENTRY:.*:]]
// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: store ptr [[X]], ptr addrspace(5) [[X_ADDR]], align 8
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[X_ADDR]], align 8
// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4
// CL20-NEXT: ret void
//
Expand Down Expand Up @@ -54,25 +55,27 @@ void func1(int *x) {
// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4
// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8
// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]]
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4
// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4
// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4
// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4
// CL20-NEXT: ret void
//
void func2(void) {
int lv1;
lv1 = 1;

int lv2 = 2;

int la[100];
Expand All @@ -99,7 +102,8 @@ void func2(void) {
// CL20-SAME: ) #[[ATTR0]] {
// CL20-NEXT: [[ENTRY:.*:]]
// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
// CL20-NEXT: ret void
//
void func3(void) {
Expand Down
Loading