Skip to content

Commit 6d9fc1b

Browse files
authored
AMDGPU: Fix producing invalid IR on vector typed getelementptr (#114113)
This did not consider the IR change to allow a scalar base with a vector offset part. Reject any users that are not explicitly handled. In this situation we could handle the vector GEP, but that is a larger change. This just avoids the IR verifier error by rejecting it.
1 parent bb39151 commit 6d9fc1b

File tree

2 files changed

+58
-4
lines changed

2 files changed

+58
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,7 +1159,6 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
11591159
if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
11601160
if (LI->isVolatile())
11611161
return false;
1162-
11631162
continue;
11641163
}
11651164

@@ -1170,12 +1169,19 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
11701169
// Reject if the stored value is not the pointer operand.
11711170
if (SI->getPointerOperand() != Val)
11721171
return false;
1173-
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
1172+
continue;
1173+
}
1174+
1175+
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
11741176
if (RMW->isVolatile())
11751177
return false;
1176-
} else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
1178+
continue;
1179+
}
1180+
1181+
if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
11771182
if (CAS->isVolatile())
11781183
return false;
1184+
continue;
11791185
}
11801186

11811187
// Only promote a select if we know that the other select operand
@@ -1186,6 +1192,7 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
11861192

11871193
// May need to rewrite constant operands.
11881194
WorkList.push_back(ICmp);
1195+
continue;
11891196
}
11901197

11911198
// TODO: If we know the address is only observed through flat pointers, we
@@ -1198,8 +1205,9 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
11981205
if (isa<InsertValueInst>(User) || isa<InsertElementInst>(User))
11991206
return false;
12001207

1208+
// TODO: Handle vectors of pointers.
12011209
if (!User->getType()->isPointerTy())
1202-
continue;
1210+
return false;
12031211

12041212
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
12051213
// Be conservative if an address could be computed outside the bounds of
@@ -1504,6 +1512,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
15041512

15051513
PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
15061514

1515+
assert(isa<PointerType>(V->getType()));
1516+
15071517
// FIXME: It doesn't really make sense to try to do this for all
15081518
// instructions.
15091519
V->mutateType(NewTy);
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
3+
4+
; Check that invalid IR is not produced on a vector typed
5+
; getelementptr with a scalar alloca pointer base.
6+
7+
define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
8+
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
9+
; CHECK-NEXT: [[BB:.*:]]
10+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
11+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
12+
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[GETELEMENTPTR]], i64 0
13+
; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4
14+
; CHECK-NEXT: ret void
15+
;
16+
bb:
17+
%alloca = alloca i32, align 4, addrspace(5)
18+
%getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
19+
%extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0
20+
store i32 0, ptr addrspace(5) %extractelement
21+
ret void
22+
}
23+
24+
define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) {
25+
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(
26+
; CHECK-SAME: i1 [[COND:%.*]]) {
27+
; CHECK-NEXT: [[BB:.*:]]
28+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
29+
; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
30+
; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 3, i64 2, i64 1, i64 0>
31+
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(5)> [[GETELEMENTPTR0]], <4 x ptr addrspace(5)> [[GETELEMENTPTR1]]
32+
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[SELECT]], i64 1
33+
; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4
34+
; CHECK-NEXT: ret void
35+
;
36+
bb:
37+
%alloca = alloca i32, align 4, addrspace(5)
38+
%getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
39+
%getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 3, i64 2, i64 1, i64 0>
40+
%select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1
41+
%extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1
42+
store i32 0, ptr addrspace(5) %extractelement
43+
ret void
44+
}

0 commit comments

Comments
 (0)