Skip to content

Commit 19a34dd

Browse files
[SLP]Do not account external uses in EH block and in non-returning blocks
No need to account the cost of the external uses in EH and non-returning basic blocks. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #112045
1 parent b9d7117 commit 19a34dd

File tree

3 files changed

+18
-34
lines changed

3 files changed

+18
-34
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11977,11 +11977,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1197711977
if (EphValues.count(EU.User))
1197811978
continue;
1197911979

11980-
// Used in unreachable blocks or in landing pads (rarely executed).
11980+
// Used in unreachable blocks or in EH pads (rarely executed) or is
11981+
// terminated with unreachable instruction.
1198111982
if (BasicBlock *UserParent =
1198211983
EU.User ? cast<Instruction>(EU.User)->getParent() : nullptr;
1198311984
UserParent &&
11984-
(!DT->isReachableFromEntry(UserParent) || UserParent->isLandingPad()))
11985+
(!DT->isReachableFromEntry(UserParent) || UserParent->isEHPad() ||
11986+
isa_and_present<UnreachableInst>(UserParent->getTerminator())))
1198511987
continue;
1198611988

1198711989
// We only add extract cost once for the same scalar.

llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define void @test1(ptr %p) personality ptr @__CxxFrameHandler3 {
99
; CHECK-LABEL: @test1(
1010
; CHECK-NEXT: invoke.cont:
1111
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
12-
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[P]], align 8
12+
; CHECK-NEXT: [[LOAD1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
1313
; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[P]], align 8
1414
; CHECK-NEXT: invoke void @throw()
1515
; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]

llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,41 +11,23 @@ define void @hoge() {
1111
; CHECK-NEXT: ret void
1212
; CHECK: bb2:
1313
; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15
14-
; CHECK-NEXT: [[T3:%.*]] = sext i16 undef to i32
15-
; CHECK-NEXT: [[T4:%.*]] = sext i16 [[T]] to i32
16-
; CHECK-NEXT: [[T5:%.*]] = sub nsw i32 undef, [[T4]]
17-
; CHECK-NEXT: [[T6:%.*]] = sub i32 [[T5]], undef
18-
; CHECK-NEXT: [[T7:%.*]] = sub nsw i32 63, [[T3]]
19-
; CHECK-NEXT: [[T8:%.*]] = sub i32 [[T7]], undef
20-
; CHECK-NEXT: [[T9:%.*]] = add i32 [[T8]], undef
21-
; CHECK-NEXT: [[T10:%.*]] = add nsw i32 [[T6]], 15
22-
; CHECK-NEXT: [[T11:%.*]] = icmp sgt i32 [[T9]], [[T10]]
23-
; CHECK-NEXT: [[T12:%.*]] = select i1 [[T11]], i32 [[T9]], i32 [[T10]]
24-
; CHECK-NEXT: [[T13:%.*]] = add nsw i32 [[T6]], 31
25-
; CHECK-NEXT: [[T14:%.*]] = icmp sgt i32 [[T12]], [[T13]]
26-
; CHECK-NEXT: [[T15:%.*]] = select i1 [[T14]], i32 [[T12]], i32 [[T13]]
27-
; CHECK-NEXT: [[T16:%.*]] = add nsw i32 [[T6]], 47
28-
; CHECK-NEXT: [[T17:%.*]] = icmp sgt i32 [[T15]], [[T16]]
29-
; CHECK-NEXT: [[T18:%.*]] = select i1 [[T17]], i32 [[T15]], i32 [[T16]]
14+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 undef>, i16 [[T]], i32 0
15+
; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32>
16+
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP1]]
17+
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef
18+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
19+
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], <i32 undef, i32 15, i32 31, i32 47>
20+
; CHECK-NEXT: [[T18:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
3021
; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[T18]], i32 undef
3122
; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63
32-
; CHECK-NEXT: [[T21:%.*]] = sub nsw i32 undef, [[T3]]
33-
; CHECK-NEXT: [[T22:%.*]] = sub i32 [[T21]], undef
34-
; CHECK-NEXT: [[T23:%.*]] = sub nsw i32 undef, [[T4]]
35-
; CHECK-NEXT: [[T24:%.*]] = sub i32 [[T23]], undef
36-
; CHECK-NEXT: [[T25:%.*]] = add nsw i32 [[T24]], -49
37-
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T22]], -33
38-
; CHECK-NEXT: [[T35:%.*]] = add nsw i32 [[T24]], -33
39-
; CHECK-NEXT: [[T40:%.*]] = add nsw i32 [[T22]], -17
23+
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP1]]
24+
; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
25+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
26+
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], <i32 -49, i32 -33, i32 -33, i32 -17>
27+
; CHECK-NEXT: [[T25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]])
4028
; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T25]]
4129
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T25]]
42-
; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[T30]], [[T35]]
43-
; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T30]], i32 [[T35]]
44-
; CHECK-NEXT: [[OP_RDX4:%.*]] = icmp slt i32 [[OP_RDX1]], [[OP_RDX3]]
45-
; CHECK-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
46-
; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T40]]
47-
; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T40]]
48-
; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]]
30+
; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX1]]
4931
; CHECK-NEXT: unreachable
5032
;
5133
bb:

0 commit comments

Comments
 (0)