Skip to content

Commit c28b7eb

Browse files
committed
[SLP]Fix handling of -slp-vectorize-hor-store for values with many uses.
1 parent 68da743 commit c28b7eb

File tree

2 files changed

+6
-11
lines changed

2 files changed

+6
-11
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15822,8 +15822,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
1582215822
// to investigate if we can safely turn on slp-vectorize-hor-store
1582315823
// instead to allow lookup for reduction chains in all non-vectorized
1582415824
// stores (need to check side effects and compile time).
15825-
TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) &&
15826-
SI->getValueOperand()->hasOneUse();
15825+
TryToVectorizeRoot |= (I == Stores.end() || I->second.size() == 1) &&
15826+
SI->getValueOperand()->hasOneUse();
1582715827
}
1582815828
if (TryToVectorizeRoot) {
1582915829
for (auto *V : it->operand_values()) {

llvm/test/Transforms/SLPVectorizer/X86/horizontal-store-many-uses.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@ define void @test(ptr noalias %pl, ptr noalias %res, ptr noalias %p2) {
66
; CHECK-LABEL: define void @test(
77
; CHECK-SAME: ptr noalias [[PL:%.*]], ptr noalias [[RES:%.*]], ptr noalias [[P2:%.*]]) {
88
; CHECK-NEXT: entry:
9-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @arr_i32, align 16
10-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 1), align 4
11-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
12-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 2), align 8
13-
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
14-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 3), align 4
15-
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
16-
; CHECK-NEXT: store i32 [[ADD_2]], ptr [[P2]], align 16
17-
; CHECK-NEXT: store i32 [[ADD_2]], ptr [[RES]], align 16
9+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @arr_i32, align 16
10+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
11+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[P2]], align 16
12+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES]], align 16
1813
; CHECK-NEXT: ret void
1914
;
2015
entry:

0 commit comments

Comments
 (0)