Skip to content

Commit 1d6b5b6

Browse files
committed
[SLP]Fix PR61050: Assertion `I->use_empty() && "trying to erase instruction with users."
When gathering the counter for the reused scalars, need to use reduced value, not the original reduced value. Same values counter is gathered for reduced values, not original ones.
1 parent 5193d19 commit 1d6b5b6

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13157,7 +13157,7 @@ class HorizontalReduction {
1315713157
for (Value *RdxVal : VL) {
1315813158
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
1315913159
if (IsSupportedHorRdxIdentityOp) {
13160-
VectorizedVals.try_emplace(OrigV, SameValuesCounter[OrigV]);
13160+
VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal]);
1316113161
continue;
1316213162
}
1316313163
++VectorizedVals.try_emplace(OrigV, 0).first->getSecond();
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: bb:
7+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
8+
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
9+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
10+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
11+
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP2]], [[TMP3]]
12+
; CHECK-NEXT: ret i32 [[OP_RDX]]
13+
;
14+
bb:
15+
%0 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
16+
%1 = extractelement <4 x i32> %0, i32 3
17+
%2 = extractelement <4 x i32> %0, i32 2
18+
%3 = extractelement <4 x i32> %0, i32 1
19+
%4 = extractelement <4 x i32> %0, i32 0
20+
%inst514 = or i32 %4, 0
21+
%inst494 = or i32 %3, 0
22+
%inst474 = or i32 %2, 0
23+
%inst454 = or i32 %1, 0
24+
%inst458 = add i32 %1, %inst454
25+
%inst477 = add i32 %inst458, %2
26+
%inst478 = add i32 %inst477, %inst474
27+
%inst497 = add i32 %inst478, %3
28+
%inst498 = add i32 %inst497, %inst494
29+
%inst517 = add i32 %inst498, %4
30+
%inst518 = add i32 %inst517, %inst514
31+
ret i32 %inst518
32+
}

0 commit comments

Comments
 (0)