-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer.
@llvm/pr-subscribers-backend-amdgpu Author: Sumanth Gundapaneni (sgundapa) ChangesIn case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer. Full diff: https://github.com/llvm/llvm-project/pull/122342.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..1e32743c3dfeeb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
}
+static bool hasVariableOffset(GetElementPtrInst *GEP) {
+ // Iterate over all operands starting from the first index (index 0 is the
+ // base pointer).
+ for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *Op = GEP->getOperand(i);
+ // Check if the operand is not a constant integer value
+ if (!isa<ConstantInt>(Op)) {
+ return true;
+ }
+ }
+ return false;
+}
+
static Value *
-calculateVectorIndex(Value *Ptr,
- const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
+ const DataLayout &DL) {
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
if (!GEP)
return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+ // If the index of this GEP is a variable that might be deleted,
+ // update the index with its latest value. We've already handled any GEPs
+ // with unsupported index types(in GEPToVectorIndex) at this point.
+ if (hasVariableOffset(GEP)) {
+ unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+ SmallMapVector<Value *, APInt, 4> VarOffsets;
+ APInt ConstOffset(BW, 0);
+ if (GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) {
+ if (VarOffsets.size() == 1 && ConstOffset.isZero()) {
+ auto *UpdatedValue = VarOffsets.front().first;
+ GEPIdx[GEP] = UpdatedValue;
+ return UpdatedValue;
+ }
+ }
+ }
+
auto I = GEPIdx.find(GEP);
assert(I != GEPIdx.end() && "Must have entry for GEP!");
return I->second;
@@ -496,7 +525,7 @@ static Value *promoteAllocaUserToVector(
}
Value *Index = calculateVectorIndex(
- cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
+ cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, DL);
// We're loading the full vector.
Type *AccessTy = Inst->getType();
@@ -552,7 +581,8 @@ static Value *promoteAllocaUserToVector(
// to know the current value. If this is a store of a single element, we
// need to know the value.
StoreInst *SI = cast<StoreInst>(Inst);
- Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
+ Value *Index =
+ calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx, DL);
Value *Val = SI->getValueOperand();
// We're storing the full vector, we can handle this without knowing CurVal.
@@ -850,7 +880,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
return nullptr;
- return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
+ return dyn_cast<ConstantInt>(
+ calculateVectorIndex(Ptr, GEPVectorIdx, *DL));
};
unsigned OpNum = U->getOperandNo();
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index 05c727201bbf1d..9db416041a5bc0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -122,6 +122,34 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
ret void
}
+%Block4 = type { [2 x i32], i32 }
+@block4 = external addrspace(1) global %Block4
+%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
+@pv1 = external addrspace(1) global %gl_PV
+
+; This should should not crash on variable offset that can be
+; optimized out (variable foo4 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
+; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
+; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
+; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 3
+; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
+; CHECK-NEXT: ret void
+;
+ %f1 = alloca [3 x i32], align 4, addrspace(5)
+ %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
+ %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+ store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
+ %foo4 = load i32, ptr addrspace(5) %G1, align 4
+ %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %foo4
+ %foo6 = load i32, ptr addrspace(5) %foo5, align 4
+ %foo12 = insertelement <4 x i32> poison, i32 %foo6, i64 3
+ store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
+ ret void
+}
+
define amdgpu_vs void @promote_memmove_aggr() #0 {
; CHECK-LABEL: @promote_memmove_aggr(
; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 3019e49ebfc5d710191712b6d437c56c01e65b87 3e8e15f5a351ca5d380e884ea808ca206aef950a llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI, | |||
match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile(); | |||
} | |||
|
|||
static bool hasVariableOffset(GetElementPtrInst *GEP) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will make the code future proof. Will make the change
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could
The deletion of this instuction happens after the transformation is done.
In the attached lit test, the dangling use of instruction in cached Index is result of replaceAllUsesWith() in promoteAllocaUserToVector() . Please advise. I will make this code generic (not for a specific scenario).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One possible solution is to track these values with WeakVH. Then you'll just have to check if the value was already deleted before deleting it. This appears to be how SROA handles it, (see how DeadInsts is a vector of WeakVH)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks Matt. I will look in to weakVH
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
WeakVH might not be useful here as the Value still exists in the code. We can do a DCE but I am not sure if it guaranteed if the Value has no uses in the code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can easily check if the operation is dead. This pass is essentially doing the same thing as SROA, which checks isInstructionTriviallyDead
%G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8 | ||
%foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4 | ||
store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4 | ||
%foo4 = load i32, ptr addrspace(5) %G1, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
give a better name to indicate this is the interesting value
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure
|
||
; This should should not crash on variable offset that can be | ||
; optimized out (variable foo4 in the test) | ||
define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr { | |
define amdgpu_vs void @promote_load_from_store_aggr_varoff() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure
The PromoteAlloca pass was using outdated cached GEP indices in some cases, leading to an assertion failure when encountering aliased GEP indices that could be optimized out. This commit fixes the issue by refreshing the cached index before use. Issue found by fuzzer.
@@ -459,6 +422,36 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, | |||
return ConstantInt::get(GEP->getContext(), Quot); | |||
} | |||
|
|||
// Function to check if a Value is an operand of a GetElementPtrInst. | |||
static bool isValueInGEP(GetElementPtrInst *GEP, Value *ValueToCheck) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This still feels like too specific of a check, and the same form of pattern could arise from other instructions. It would be better to detect it as part of the transformation
The PromoteAlloca pass was using outdated cached GEP indices in some cases, leading to an assertion failure. This commit fixes the issue by using ValueHandle to track the change in values. Issue found by fuzzer.
Co-authored-by: Matt Arsenault <[email protected]>
✅ With the latest revision this PR passed the C/C++ code formatter. |
In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion.
Issue found by fuzzer.