-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SCEVExpander] Clear flags when reusing GEP #109293
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-webassembly Author: Nikita Popov (nikic) ChangesAs pointed out in the review of #102133, SCEVExpander currently incorrectly reuses GEP instructions that have poison-generating flags set. Fix this by clearing the flags on the reused instruction. Full diff: https://github.com/llvm/llvm-project/pull/109293.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 62c1e15a9a60e1..0af3efeacd040c 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -47,6 +47,7 @@ struct PoisonFlags {
unsigned Exact : 1;
unsigned Disjoint : 1;
unsigned NNeg : 1;
+ GEPNoWrapFlags GEPNW;
PoisonFlags(const Instruction *I);
void apply(Instruction *I);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 0927a3015818fd..1088547e1f3efe 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -49,6 +49,7 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
Exact = false;
Disjoint = false;
NNeg = false;
+ GEPNW = GEPNoWrapFlags::none();
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
NUW = OBO->hasNoUnsignedWrap();
NSW = OBO->hasNoSignedWrap();
@@ -63,6 +64,8 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
NUW = TI->hasNoUnsignedWrap();
NSW = TI->hasNoSignedWrap();
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ GEPNW = GEP->getNoWrapFlags();
}
void PoisonFlags::apply(Instruction *I) {
@@ -80,6 +83,8 @@ void PoisonFlags::apply(Instruction *I) {
I->setHasNoUnsignedWrap(NUW);
I->setHasNoSignedWrap(NSW);
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ GEP->setNoWrapFlags(GEPNW);
}
/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
@@ -370,11 +375,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
// generated code.
if (isa<DbgInfoIntrinsic>(IP))
ScanLimit++;
- if (IP->getOpcode() == Instruction::GetElementPtr &&
- IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
- cast<GEPOperator>(&*IP)->getSourceElementType() ==
- Builder.getInt8Ty())
- return &*IP;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(IP)) {
+ if (GEP->getPointerOperand() == V &&
+ GEP->getSourceElementType() == Builder.getInt8Ty() &&
+ GEP->getOperand(1) == Idx) {
+ rememberFlags(GEP);
+ GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+ return &*IP;
+ }
+ }
if (IP == BlockBegin) break;
}
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
index d4518d40e42986..75612ba645ca4d 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
@@ -9,19 +9,21 @@ target triple = "wasm32-unknown-unknown"
define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_loop:
; CHECK: .functype shl_loop (i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB0_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label0:
; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
-; CHECK-NEXT: v128.store 16
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 16
-; CHECK-NEXT: i32.add
+; CHECK-NEXT: v128.store 0
+; CHECK-NEXT: local.get 3
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
@@ -56,23 +58,25 @@ exit:
define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_phi_loop:
; CHECK: .functype shl_phi_loop (i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB1_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label1:
; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
-; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.set 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 16
-; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
index 5e72e13a26edb9..8f1c95fd4a330b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
@@ -79,11 +79,11 @@ define void @lsr_crash_preserve_addrspace_unknown_type2(ptr addrspace(5) %array,
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
-; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
-; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT: [[IDX:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
+; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
; CHECK-NEXT: [[N8:%.*]] = load i8, ptr addrspace(5) [[T]], align 4
-; CHECK-NEXT: [[N7:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[T]], i32 42
+; CHECK-NEXT: [[N7:%.*]] = getelementptr i8, ptr addrspace(5) [[T]], i32 42
; CHECK-NEXT: [[N9:%.*]] = load i8, ptr addrspace(5) [[N7]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[J]], 42
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN17:.*]], label %[[FOR_INC]]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
index 745b54e2bdc642..1709ec1086042f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
@@ -25,7 +25,7 @@ define ptr @negativeOneCase(ptr returned %a, ptr nocapture readonly %b, i32 %n)
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[P_0:%.*]] = phi ptr [ [[ADD_PTR]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_0]], i32 1
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[P_0]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND2_PREHEADER:%.*]], label [[WHILE_COND]]
|
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesAs pointed out in the review of #102133, SCEVExpander currently incorrectly reuses GEP instructions that have poison-generating flags set. Fix this by clearing the flags on the reused instruction. Full diff: https://github.com/llvm/llvm-project/pull/109293.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 62c1e15a9a60e1..0af3efeacd040c 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -47,6 +47,7 @@ struct PoisonFlags {
unsigned Exact : 1;
unsigned Disjoint : 1;
unsigned NNeg : 1;
+ GEPNoWrapFlags GEPNW;
PoisonFlags(const Instruction *I);
void apply(Instruction *I);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 0927a3015818fd..1088547e1f3efe 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -49,6 +49,7 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
Exact = false;
Disjoint = false;
NNeg = false;
+ GEPNW = GEPNoWrapFlags::none();
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
NUW = OBO->hasNoUnsignedWrap();
NSW = OBO->hasNoSignedWrap();
@@ -63,6 +64,8 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
NUW = TI->hasNoUnsignedWrap();
NSW = TI->hasNoSignedWrap();
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ GEPNW = GEP->getNoWrapFlags();
}
void PoisonFlags::apply(Instruction *I) {
@@ -80,6 +83,8 @@ void PoisonFlags::apply(Instruction *I) {
I->setHasNoUnsignedWrap(NUW);
I->setHasNoSignedWrap(NSW);
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ GEP->setNoWrapFlags(GEPNW);
}
/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
@@ -370,11 +375,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
// generated code.
if (isa<DbgInfoIntrinsic>(IP))
ScanLimit++;
- if (IP->getOpcode() == Instruction::GetElementPtr &&
- IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
- cast<GEPOperator>(&*IP)->getSourceElementType() ==
- Builder.getInt8Ty())
- return &*IP;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(IP)) {
+ if (GEP->getPointerOperand() == V &&
+ GEP->getSourceElementType() == Builder.getInt8Ty() &&
+ GEP->getOperand(1) == Idx) {
+ rememberFlags(GEP);
+ GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+ return &*IP;
+ }
+ }
if (IP == BlockBegin) break;
}
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
index d4518d40e42986..75612ba645ca4d 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
@@ -9,19 +9,21 @@ target triple = "wasm32-unknown-unknown"
define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_loop:
; CHECK: .functype shl_loop (i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB0_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label0:
; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
-; CHECK-NEXT: v128.store 16
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 16
-; CHECK-NEXT: i32.add
+; CHECK-NEXT: v128.store 0
+; CHECK-NEXT: local.get 3
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
@@ -56,23 +58,25 @@ exit:
define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_phi_loop:
; CHECK: .functype shl_phi_loop (i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB1_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label1:
; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
-; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.set 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 16
-; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
index 5e72e13a26edb9..8f1c95fd4a330b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
@@ -79,11 +79,11 @@ define void @lsr_crash_preserve_addrspace_unknown_type2(ptr addrspace(5) %array,
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
-; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
-; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT: [[IDX:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
+; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
; CHECK-NEXT: [[N8:%.*]] = load i8, ptr addrspace(5) [[T]], align 4
-; CHECK-NEXT: [[N7:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[T]], i32 42
+; CHECK-NEXT: [[N7:%.*]] = getelementptr i8, ptr addrspace(5) [[T]], i32 42
; CHECK-NEXT: [[N9:%.*]] = load i8, ptr addrspace(5) [[N7]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[J]], 42
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN17:.*]], label %[[FOR_INC]]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
index 745b54e2bdc642..1709ec1086042f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
@@ -25,7 +25,7 @@ define ptr @negativeOneCase(ptr returned %a, ptr nocapture readonly %b, i32 %n)
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[P_0:%.*]] = phi ptr [ [[ADD_PTR]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_0]], i32 1
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[P_0]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND2_PREHEADER:%.*]], label [[WHILE_COND]]
|
GEP->getSourceElementType() == Builder.getInt8Ty() && | ||
GEP->getOperand(1) == Idx) { | ||
rememberFlags(GEP); | ||
GEP->setNoWrapFlags(GEPNoWrapFlags::none()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In #102133 this will become an intersect with SCEV nuw flag instead.
@@ -9,19 +9,21 @@ target triple = "wasm32-unknown-unknown" | |||
define void @shl_loop(ptr %a, i8 %shift, i32 %count) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A bit unfortunate that this regresses, but I think we'll recover this in the future (after #102133 + inference for gep nuw in instcombine, at which point we should be able to preserve the nuw flag here, which is all wasm cares about).
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
It might be good to add a dedicated test for clearing the flags, rather than relying on AMDGPU/ARM LSR tests.
As pointed out in the review of llvm#102133, SCEVExpander currently incorrectly reuses GEP instructions that have poison-generating flags set. Fix this by clearing the flags on the reused instruction.
e04122a
to
4c37dc3
Compare
As pointed out in the review of llvm#102133, SCEVExpander currently incorrectly reuses GEP instructions that have poison-generating flags set. Fix this by clearing the flags on the reused instruction.
As pointed out in the review of #102133, SCEVExpander currently incorrectly reuses GEP instructions that have poison-generating flags set. Fix this by clearing the flags on the reused instruction.