Skip to content

Commit 4c9e14b

Browse files
sgundapaarsenm
andauthored
[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. (#122342)
In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer. --------- Co-authored-by: Matt Arsenault <[email protected]>
1 parent 607a1f2 commit 4c9e14b

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -385,16 +385,18 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
385385
match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
386386
}
387387

388-
static Value *
389-
calculateVectorIndex(Value *Ptr,
390-
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
388+
static Value *calculateVectorIndex(
389+
Value *Ptr, const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {
391390
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
392391
if (!GEP)
393392
return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
394393

395394
auto I = GEPIdx.find(GEP);
396395
assert(I != GEPIdx.end() && "Must have entry for GEP!");
397-
return I->second;
396+
397+
Value *IndexValue = I->second;
398+
assert(IndexValue && "index value missing from GEP index map");
399+
return IndexValue;
398400
}
399401

400402
static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
@@ -449,7 +451,7 @@ static Value *promoteAllocaUserToVector(
449451
Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
450452
unsigned VecStoreSize, unsigned ElementSize,
451453
DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
452-
std::map<GetElementPtrInst *, Value *> &GEPVectorIdx, Value *CurVal,
454+
std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
453455
SmallVectorImpl<LoadInst *> &DeferredLoads) {
454456
// Note: we use InstSimplifyFolder because it can leverage the DataLayout
455457
// to do more folding, especially in the case of vector splats.
@@ -757,7 +759,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
757759
return false;
758760
}
759761

760-
std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
762+
std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
761763
SmallVector<Instruction *> WorkList;
762764
SmallVector<Instruction *> UsersToRemove;
763765
SmallVector<Instruction *> DeferredInsts;

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,34 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
122122
ret void
123123
}
124124

125+
%Block4 = type { [2 x i32], i32 }
126+
@block4 = external addrspace(1) global %Block4
127+
%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
128+
@pv1 = external addrspace(1) global %gl_PV
129+
130+
; This should not crash on an aliased variable offset that can be
131+
; optimized out (variable %aliasTofoo3 in the test)
132+
define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) {
133+
; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
134+
; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
135+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
136+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
137+
; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> %input, i32 [[TMP2]], i64 3
138+
; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
139+
; CHECK-NEXT: ret void
140+
;
141+
%f1 = alloca [3 x i32], align 4, addrspace(5)
142+
%G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
143+
%foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
144+
store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
145+
%aliasTofoo3 = load i32, ptr addrspace(5) %G1, align 4
146+
%foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %aliasTofoo3
147+
%foo6 = load i32, ptr addrspace(5) %foo5, align 4
148+
%foo12 = insertelement <4 x i32> %input, i32 %foo6, i64 3
149+
store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
150+
ret void
151+
}
152+
125153
define amdgpu_vs void @promote_memmove_aggr() #0 {
126154
; CHECK-LABEL: @promote_memmove_aggr(
127155
; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4

0 commit comments

Comments
 (0)