Skip to content

Commit 054a3c5

Browse files
committed
[SCEVExpander] Clear flags when reusing GEP
As pointed out in the review of llvm#102133, SCEVExpander currently incorrectly reuses GEP instructions that have poison-generating flags set. Fix this by clearing the flags on the reused instruction.
1 parent d50d961 commit 054a3c5

File tree

5 files changed

+32
-18
lines changed

5 files changed

+32
-18
lines changed

llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct PoisonFlags {
4747
unsigned Exact : 1;
4848
unsigned Disjoint : 1;
4949
unsigned NNeg : 1;
50+
GEPNoWrapFlags GEPNW;
5051

5152
PoisonFlags(const Instruction *I);
5253
void apply(Instruction *I);

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
4949
Exact = false;
5050
Disjoint = false;
5151
NNeg = false;
52+
GEPNW = GEPNoWrapFlags::none();
5253
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
5354
NUW = OBO->hasNoUnsignedWrap();
5455
NSW = OBO->hasNoSignedWrap();
@@ -63,6 +64,8 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
6364
NUW = TI->hasNoUnsignedWrap();
6465
NSW = TI->hasNoSignedWrap();
6566
}
67+
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
68+
GEPNW = GEP->getNoWrapFlags();
6669
}
6770

6871
void PoisonFlags::apply(Instruction *I) {
@@ -80,6 +83,8 @@ void PoisonFlags::apply(Instruction *I) {
8083
I->setHasNoUnsignedWrap(NUW);
8184
I->setHasNoSignedWrap(NSW);
8285
}
86+
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
87+
GEP->setNoWrapFlags(GEPNW);
8388
}
8489

8590
/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
@@ -370,11 +375,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
370375
// generated code.
371376
if (isa<DbgInfoIntrinsic>(IP))
372377
ScanLimit++;
373-
if (IP->getOpcode() == Instruction::GetElementPtr &&
374-
IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
375-
cast<GEPOperator>(&*IP)->getSourceElementType() ==
376-
Builder.getInt8Ty())
377-
return &*IP;
378+
if (auto *GEP = dyn_cast<GetElementPtrInst>(IP)) {
379+
if (GEP->getPointerOperand() == V &&
380+
GEP->getSourceElementType() == Builder.getInt8Ty() &&
381+
GEP->getOperand(1) == Idx) {
382+
rememberFlags(GEP);
383+
GEP->setNoWrapFlags(GEPNoWrapFlags::none());
384+
return &*IP;
385+
}
386+
}
378387
if (IP == BlockBegin) break;
379388
}
380389
}

llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,21 @@ target triple = "wasm32-unknown-unknown"
99
define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
1010
; CHECK-LABEL: shl_loop:
1111
; CHECK: .functype shl_loop (i32, i32, i32) -> ()
12+
; CHECK-NEXT: .local i32
1213
; CHECK-NEXT: # %bb.0: # %entry
1314
; CHECK-NEXT: .LBB0_1: # %body
1415
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1516
; CHECK-NEXT: loop # label0:
1617
; CHECK-NEXT: local.get 0
18+
; CHECK-NEXT: i32.const 16
19+
; CHECK-NEXT: i32.add
20+
; CHECK-NEXT: local.tee 3
1721
; CHECK-NEXT: local.get 0
1822
; CHECK-NEXT: v128.load 0:p2align=0
1923
; CHECK-NEXT: local.get 1
2024
; CHECK-NEXT: i8x16.shl
21-
; CHECK-NEXT: v128.store 16
22-
; CHECK-NEXT: local.get 0
23-
; CHECK-NEXT: i32.const 16
24-
; CHECK-NEXT: i32.add
25+
; CHECK-NEXT: v128.store 0
26+
; CHECK-NEXT: local.get 3
2527
; CHECK-NEXT: local.set 0
2628
; CHECK-NEXT: local.get 2
2729
; CHECK-NEXT: i32.const -1
@@ -56,23 +58,25 @@ exit:
5658
define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
5759
; CHECK-LABEL: shl_phi_loop:
5860
; CHECK: .functype shl_phi_loop (i32, i32, i32) -> ()
61+
; CHECK-NEXT: .local i32
5962
; CHECK-NEXT: # %bb.0: # %entry
6063
; CHECK-NEXT: .LBB1_1: # %body
6164
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
6265
; CHECK-NEXT: loop # label1:
6366
; CHECK-NEXT: local.get 0
67+
; CHECK-NEXT: i32.const 16
68+
; CHECK-NEXT: i32.add
69+
; CHECK-NEXT: local.tee 3
6470
; CHECK-NEXT: local.get 0
6571
; CHECK-NEXT: v128.load 0:p2align=0
6672
; CHECK-NEXT: local.get 1
6773
; CHECK-NEXT: i8x16.shl
68-
; CHECK-NEXT: v128.store 16
74+
; CHECK-NEXT: v128.store 0
6975
; CHECK-NEXT: local.get 1
7076
; CHECK-NEXT: i32.const 1
7177
; CHECK-NEXT: i32.and
7278
; CHECK-NEXT: local.set 1
73-
; CHECK-NEXT: local.get 0
74-
; CHECK-NEXT: i32.const 16
75-
; CHECK-NEXT: i32.add
79+
; CHECK-NEXT: local.get 3
7680
; CHECK-NEXT: local.set 0
7781
; CHECK-NEXT: local.get 2
7882
; CHECK-NEXT: i32.const -1

llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ define void @lsr_crash_preserve_addrspace_unknown_type2(ptr addrspace(5) %array,
7979
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
8080
; CHECK: [[FOR_BODY]]:
8181
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
82-
; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
83-
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
84-
; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
82+
; CHECK-NEXT: [[IDX:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
83+
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
84+
; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
8585
; CHECK-NEXT: [[N8:%.*]] = load i8, ptr addrspace(5) [[T]], align 4
86-
; CHECK-NEXT: [[N7:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[T]], i32 42
86+
; CHECK-NEXT: [[N7:%.*]] = getelementptr i8, ptr addrspace(5) [[T]], i32 42
8787
; CHECK-NEXT: [[N9:%.*]] = load i8, ptr addrspace(5) [[N7]], align 4
8888
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[J]], 42
8989
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN17:.*]], label %[[FOR_INC]]

llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define ptr @negativeOneCase(ptr returned %a, ptr nocapture readonly %b, i32 %n)
2525
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
2626
; CHECK: while.cond:
2727
; CHECK-NEXT: [[P_0:%.*]] = phi ptr [ [[ADD_PTR]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_COND]] ]
28-
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_0]], i32 1
28+
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[P_0]], i32 1
2929
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
3030
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
3131
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND2_PREHEADER:%.*]], label [[WHILE_COND]]

0 commit comments

Comments
 (0)