Skip to content

Commit a11d864

Browse files
authored
clang: Fix broken implicit cast to generic address space (#138863)
This fixes emitting undefined behavior where a 64-bit generic pointer is written to a 32-bit slot allocated for a private pointer. This can be seen in test/CodeGenOpenCL/amdgcn-automatic-variable.cl's wrong_pointer_alloca.
1 parent 5df01ab commit a11d864

13 files changed

+129
-157
lines changed

clang/lib/CodeGen/CGDecl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1588,7 +1588,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
15881588
// Create the alloca. Note that we set the name separately from
15891589
// building the instruction so that it's there even in no-asserts
15901590
// builds.
1591-
address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(),
1591+
address = CreateTempAlloca(allocaTy, Ty.getAddressSpace(),
1592+
allocaAlignment, D.getName(),
15921593
/*ArraySize=*/nullptr, &AllocaAddr);
15931594

15941595
// Don't emit lifetime markers for MSVC catch parameters. The lifetime of

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,31 +100,30 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits Align,
100100
return RawAddress(Alloca, Ty, Align, KnownNonNull);
101101
}
102102

103-
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
104-
/// block. The alloca is casted to default address space if necessary.
105-
RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
106-
const Twine &Name,
103+
RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
104+
CharUnits Align, const Twine &Name,
107105
llvm::Value *ArraySize,
108106
RawAddress *AllocaAddr) {
109-
auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
107+
RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
110108
if (AllocaAddr)
111109
*AllocaAddr = Alloca;
112110
llvm::Value *V = Alloca.getPointer();
113111
// Alloca always returns a pointer in alloca address space, which may
114112
// be different from the type defined by the language. For example,
115113
// in C++ the auto variables are in the default address space. Therefore
116114
// cast alloca to the default address space when necessary.
117-
if (getASTAllocaAddressSpace() != LangAS::Default) {
118-
auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
115+
116+
unsigned DestAddrSpace = getContext().getTargetAddressSpace(DestLangAS);
117+
if (DestAddrSpace != Alloca.getAddressSpace()) {
119118
llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
120119
// When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
121120
// otherwise alloca is inserted at the current insertion point of the
122121
// builder.
123122
if (!ArraySize)
124123
Builder.SetInsertPoint(getPostAllocaInsertPoint());
125124
V = getTargetHooks().performAddrSpaceCast(
126-
*this, V, getASTAllocaAddressSpace(), LangAS::Default,
127-
Builder.getPtrTy(DestAddrSpace), /*non-null*/ true);
125+
*this, V, getASTAllocaAddressSpace(), DestLangAS,
126+
Builder.getPtrTy(DestAddrSpace), /*IsNonNull=*/true);
128127
}
129128

130129
return RawAddress(V, Ty, Align, KnownNonNull);

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2861,10 +2861,28 @@ class CodeGenFunction : public CodeGenTypeCache {
28612861
/// more efficient if the caller knows that the address will not be exposed.
28622862
llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
28632863
llvm::Value *ArraySize = nullptr);
2864+
2865+
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
2866+
/// block. The alloca is casted to the address space of \p UseAddrSpace if
2867+
/// necessary.
2868+
RawAddress CreateTempAlloca(llvm::Type *Ty, LangAS UseAddrSpace,
2869+
CharUnits align, const Twine &Name = "tmp",
2870+
llvm::Value *ArraySize = nullptr,
2871+
RawAddress *Alloca = nullptr);
2872+
2873+
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
2874+
/// block. The alloca is casted to default address space if necessary.
2875+
///
2876+
/// FIXME: This version should be removed, and context should provide the
2877+
/// context use address space used instead of default.
28642878
RawAddress CreateTempAlloca(llvm::Type *Ty, CharUnits align,
28652879
const Twine &Name = "tmp",
28662880
llvm::Value *ArraySize = nullptr,
2867-
RawAddress *Alloca = nullptr);
2881+
RawAddress *Alloca = nullptr) {
2882+
return CreateTempAlloca(Ty, LangAS::Default, align, Name, ArraySize,
2883+
Alloca);
2884+
}
2885+
28682886
RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align,
28692887
const Twine &Name = "tmp",
28702888
llvm::Value *ArraySize = nullptr);

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -798,10 +798,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
798798
// AMDGCN20-SAME: ) #[[ATTR0]] {
799799
// AMDGCN20-NEXT: [[ENTRY:.*:]]
800800
// AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
801-
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
802-
// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
803-
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
804-
// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
801+
// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]]
805802
// AMDGCN20-NEXT: ret void
806803
//
807804
//

clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -55,22 +55,19 @@ void func1(int *x) {
5555
// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
5656
// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
5757
// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
58+
// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4
59+
// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4
60+
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
61+
// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
5862
// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
59-
// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
60-
// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
61-
// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
62-
// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
63-
// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
64-
// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4
65-
// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4
66-
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
67-
// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4
68-
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
69-
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
70-
// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
71-
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
72-
// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4
73-
// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4
63+
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8
64+
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
65+
// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
66+
// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8
67+
// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
68+
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]]
69+
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4
70+
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4
7471
// CL20-NEXT: ret void
7572
//
7673
void func2(void) {
@@ -102,8 +99,7 @@ void func2(void) {
10299
// CL20-SAME: ) #[[ATTR0]] {
103100
// CL20-NEXT: [[ENTRY:.*:]]
104101
// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
105-
// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
106-
// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
102+
// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
107103
// CL20-NEXT: ret void
108104
//
109105
void func3(void) {
@@ -126,11 +122,9 @@ void func3(void) {
126122
// CL20-NEXT: [[ENTRY:.*:]]
127123
// CL20-NEXT: [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
128124
// CL20-NEXT: [[ALLOCA_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
129-
// CL20-NEXT: [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr
130-
// CL20-NEXT: [[ALLOCA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR]] to ptr
131-
// CL20-NEXT: store i64 5, ptr [[VAR_ASCAST]], align 8
132-
// CL20-NEXT: store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_ASCAST]], align 4
133-
// CL20-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr [[ALLOCA_ADDR_ASCAST]], align 4
125+
// CL20-NEXT: store i64 5, ptr addrspace(5) [[VAR]], align 8
126+
// CL20-NEXT: store ptr addrspace(5) [[VAR]], ptr addrspace(5) [[ALLOCA_ADDR]], align 4
127+
// CL20-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOCA_ADDR]], align 4
134128
// CL20-NEXT: store i64 8, ptr addrspace(5) [[TMP0]], align 8
135129
// CL20-NEXT: ret void
136130
//
@@ -159,11 +153,10 @@ void wrong_store_type_private_pointer_alloca() {
159153
// CL20-NEXT: [[ENTRY:.*:]]
160154
// CL20-NEXT: [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
161155
// CL20-NEXT: [[ALLOCA_ADDR_AS_GENERIC:%.*]] = alloca ptr, align 8, addrspace(5)
156+
// CL20-NEXT: store i64 5, ptr addrspace(5) [[VAR]], align 8
162157
// CL20-NEXT: [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr
163-
// CL20-NEXT: [[ALLOCA_ADDR_AS_GENERIC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]] to ptr
164-
// CL20-NEXT: store i64 5, ptr [[VAR_ASCAST]], align 8
165-
// CL20-NEXT: store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
166-
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
158+
// CL20-NEXT: store ptr [[VAR_ASCAST]], ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 8
159+
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 8
167160
// CL20-NEXT: store i64 9, ptr [[TMP0]], align 8
168161
// CL20-NEXT: ret void
169162
//

clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
272272
// AMDGCN-SAME: ) #[[ATTR0]] {
273273
// AMDGCN-NEXT: [[ENTRY:.*:]]
274274
// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
275-
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
276-
// AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
277-
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
278-
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
275+
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]]
279276
// AMDGCN-NEXT: ret void
280277
//
281278
//

0 commit comments

Comments
 (0)