[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

sgundapa · 2025-01-09T19:04:23Z

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion.

Issue found by fuzzer.

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer.

llvmbot · 2025-01-09T19:04:57Z

@llvm/pr-subscribers-backend-amdgpu

Author: Sumanth Gundapaneni (sgundapa)

Changes

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion.

Issue found by fuzzer.

Full diff: https://github.com/llvm/llvm-project/pull/122342.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+36-5)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll (+28)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..1e32743c3dfeeb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
          match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }
 
+static bool hasVariableOffset(GetElementPtrInst *GEP) {
+  // Iterate over all operands starting from the first index (index 0 is the
+  // base pointer).
+  for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) {
+    Value *Op = GEP->getOperand(i);
+    // Check if the operand is not a constant integer value
+    if (!isa<ConstantInt>(Op)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 static Value *
-calculateVectorIndex(Value *Ptr,
-                     const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
+                     const DataLayout &DL) {
   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
   if (!GEP)
     return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
 
+  // If the index of this GEP is a variable that might be deleted,
+  // update the index with its latest value. We've already handled any GEPs
+  // with unsupported index types(in GEPToVectorIndex) at this point.
+  if (hasVariableOffset(GEP)) {
+    unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+    SmallMapVector<Value *, APInt, 4> VarOffsets;
+    APInt ConstOffset(BW, 0);
+    if (GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) {
+      if (VarOffsets.size() == 1 && ConstOffset.isZero()) {
+        auto *UpdatedValue = VarOffsets.front().first;
+        GEPIdx[GEP] = UpdatedValue;
+        return UpdatedValue;
+      }
+    }
+  }
+
   auto I = GEPIdx.find(GEP);
   assert(I != GEPIdx.end() && "Must have entry for GEP!");
   return I->second;
@@ -496,7 +525,7 @@ static Value *promoteAllocaUserToVector(
     }
 
     Value *Index = calculateVectorIndex(
-        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
+        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, DL);
 
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
@@ -552,7 +581,8 @@ static Value *promoteAllocaUserToVector(
     // to know the current value. If this is a store of a single element, we
     // need to know the value.
     StoreInst *SI = cast<StoreInst>(Inst);
-    Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
+    Value *Index =
+        calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx, DL);
     Value *Val = SI->getValueOperand();
 
     // We're storing the full vector, we can handle this without knowing CurVal.
@@ -850,7 +880,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
         if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
           return nullptr;
 
-        return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
+        return dyn_cast<ConstantInt>(
+            calculateVectorIndex(Ptr, GEPVectorIdx, *DL));
       };
 
       unsigned OpNum = U->getOperandNo();
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index 05c727201bbf1d..9db416041a5bc0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -122,6 +122,34 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
   ret void
 }
 
+%Block4 = type { [2 x i32], i32 }
+@block4 = external addrspace(1) global %Block4
+%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
+@pv1 = external addrspace(1) global %gl_PV
+
+; This should should not crash on variable offset that can be
+; optimized out (variable foo4 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
+; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
+; CHECK-NEXT:    [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
+; CHECK-NEXT:    [[FOO12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 3
+; CHECK-NEXT:    store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
+; CHECK-NEXT:    ret void
+;
+  %f1 = alloca [3 x i32], align 4, addrspace(5)
+  %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
+  %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+  store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
+  %foo4 = load i32, ptr addrspace(5) %G1, align 4
+  %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %foo4
+  %foo6 = load i32, ptr addrspace(5) %foo5, align 4
+  %foo12 = insertelement <4 x i32> poison, i32 %foo6, i64 3
+  store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
+  ret void
+}
+
 define amdgpu_vs void @promote_memmove_aggr() #0 {
 ; CHECK-LABEL: @promote_memmove_aggr(
 ; CHECK-NEXT:    store float 1.000000e+00, ptr addrspace(1) @pv, align 4

github-actions · 2025-01-09T19:07:46Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 3019e49ebfc5d710191712b6d437c56c01e65b87 3e8e15f5a351ca5d380e884ea808ca206aef950a llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

The following files introduce new uses of undef:

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

arsenm · 2025-01-10T02:30:33Z

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
         match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }

+static bool hasVariableOffset(GetElementPtrInst *GEP) {


This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could

It will make the code future proof. Will make the change

This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could

The deletion of this instuction happens after the transformation is done.
In the attached lit test, the dangling use of instruction in cached Index is result of replaceAllUsesWith() in promoteAllocaUserToVector() . Please advise. I will make this code generic (not for a specific scenario).

One possible solution is to track these values with WeakVH. Then you'll just have to check if the value was already deleted before deleting it. This appears to be how SROA handles it, (see how DeadInsts is a vector of WeakVH)

Thanks Matt. I will look in to weakVH

WeakVH might not be useful here as the Value still exists in the code. We can do a DCE but I am not sure if it guaranteed if the Value has no uses in the code.

You can easily check if the operation is dead. This pass is essentially doing the same thing as SROA, which checks isInstructionTriviallyDead

arsenm · 2025-01-10T02:38:38Z

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

+  %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
+  %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+  store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
+  %foo4 = load i32, ptr addrspace(5) %G1, align 4


give a better name to indicate this is the interesting value

arsenm · 2025-01-10T02:38:56Z

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

+
+; This should should not crash on variable offset that can be
+; optimized out (variable foo4 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {


Suggested change

define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {

define amdgpu_vs void @promote_load_from_store_aggr_varoff() {

The PromoteAlloca pass was using outdated cached GEP indices in some cases, leading to an assertion failure when encountering aliased GEP indices that could be optimized out. This commit fixes the issue by refreshing the cached index before use. Issue found by fuzzer.

arsenm · 2025-01-31T04:54:17Z

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

@@ -459,6 +422,36 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
  return ConstantInt::get(GEP->getContext(), Quot);
 }

+// Function to check if a Value is an operand of a GetElementPtrInst.
+static bool isValueInGEP(GetElementPtrInst *GEP, Value *ValueToCheck) {


This still feels like too specific of a check, and the same form of pattern could arise from other instructions. It would be better to detect it as part of the transformation

The PromoteAlloca pass was using outdated cached GEP indices in some cases, leading to an assertion failure. This commit fixes the issue by using ValueHandle to track the change in values. Issue found by fuzzer.

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Co-authored-by: Matt Arsenault <[email protected]>

github-actions · 2025-02-24T15:25:14Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset.

1093642

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer.

sgundapa requested review from arsenm and bcahoon January 9, 2025 19:04

llvmbot added the backend:AMDGPU label Jan 9, 2025

arsenm reviewed Jan 10, 2025

View reviewed changes

arsenm mentioned this pull request Jan 16, 2025

[AMDGPU] Fix AMDGPUPromoteAlloca handling certain loads incorrectly #123173

Closed

arsenm reviewed Jan 31, 2025

View reviewed changes

sgundapa added 2 commits February 10, 2025 09:24

Merge branch 'llvm:main' into pro_alloc_crash

d443b33

[AMDGPU] Update PromoteAlloca to handle changed GEP Indexes.

13a43bd

The PromoteAlloca pass was using outdated cached GEP indices in some cases, leading to an assertion failure. This commit fixes the issue by using ValueHandle to track the change in values. Issue found by fuzzer.

sgundapa requested a review from arsenm February 10, 2025 23:25

arsenm approved these changes Feb 20, 2025

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp Outdated Show resolved Hide resolved

Update llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

bf8a22d

Co-authored-by: Matt Arsenault <[email protected]>

arsenm reviewed Feb 24, 2025

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp Outdated Show resolved Hide resolved

clang-format fix

3e8e15f

sgundapa merged commit 4c9e14b into llvm:main Feb 24, 2025
10 of 11 checks passed

	define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
	define amdgpu_vs void @promote_load_from_store_aggr_varoff() {

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

Uh oh!

Conversation

sgundapa commented Jan 9, 2025

Uh oh!

llvmbot commented Jan 9, 2025

Uh oh!

github-actions bot commented Jan 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

sgundapa Jan 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Feb 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Jan 9, 2025 •

edited

Loading

sgundapa Jan 14, 2025 •

edited

Loading

github-actions bot commented Feb 24, 2025 •

edited

Loading