Skip to content

Commit f564a48

Browse files
committed
[SLP]Fix PR108700: correctly identify id of the operand node
If the operand node for truncs is not created during construction, but one of the previous ones is reused instead, need to correctly identify its index, to correctly emit the code. Fixes #108700
1 parent 4c6f313 commit f564a48

File tree

3 files changed

+86
-10
lines changed

3 files changed

+86
-10
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7481,7 +7481,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
74817481
PrevMaxBW),
74827482
std::min<unsigned>(DL->getTypeSizeInBits(VL0->getType()),
74837483
PrevMinBW));
7484-
ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
7484+
}
7485+
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
7486+
ReuseShuffleIndices);
7487+
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
7488+
7489+
TE->setOperandsInOrder();
7490+
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
7491+
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
7492+
if (ShuffleOrOp == Instruction::Trunc) {
7493+
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
74857494
} else if (ShuffleOrOp == Instruction::SIToFP ||
74867495
ShuffleOrOp == Instruction::UIToFP) {
74877496
unsigned NumSignBits =
@@ -7492,15 +7501,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
74927501
}
74937502
if (NumSignBits * 2 >=
74947503
DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
7495-
ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
7504+
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
74967505
}
7497-
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
7498-
ReuseShuffleIndices);
7499-
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
7500-
7501-
TE->setOperandsInOrder();
7502-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
7503-
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
75047506
return;
75057507
}
75067508
case Instruction::ICmp:

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-user-not-min.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ define void @test(ptr %block, ptr noalias %pixels, i1 %b) {
66
; CHECK-SAME: ptr [[BLOCK:%.*]], ptr noalias [[PIXELS:%.*]], i1 [[B:%.*]]) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> <i1 true, i1 poison, i1 false, i1 false>, i1 [[B]], i32 1
9+
; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i1> [[TMP0]] to <4 x i8>
910
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[BLOCK]], align 2
1011
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[TMP2]], zeroinitializer
1112
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i16> [[TMP2]] to <4 x i8>
12-
; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i1> [[TMP0]] to <4 x i8>
1313
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i8> [[TMP4]], <4 x i8> [[TMP1]]
1414
; CHECK-NEXT: store <4 x i8> [[TMP5]], ptr [[PIXELS]], align 1
1515
; CHECK-NEXT: ret void
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i16 @test() {
5+
; CHECK-LABEL: define i16 @test() {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> <i1 false, i1 false, i1 poison, i1 poison>, <2 x i1> zeroinitializer, i64 2)
8+
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> zeroinitializer, [[TMP0]]
9+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> zeroinitializer, [[TMP1]]
10+
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP2]], zeroinitializer
11+
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i64>
12+
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[TMP4]], zeroinitializer
13+
; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> zeroinitializer, [[TMP1]]
14+
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> zeroinitializer, <4 x i1> [[TMP6]]
15+
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i1> [[TMP7]] to <4 x i16>
16+
; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]])
17+
; CHECK-NEXT: ret i16 [[TMP9]]
18+
;
19+
entry:
20+
%conv73 = xor i64 0, 0
21+
%and.i = and i64 0, 0
22+
%xor2.i = or i64 %and.i, 0
23+
%sub.i = or i64 %xor2.i, 0
24+
%xor3.i = xor i64 %sub.i, %conv73
25+
%and4.i = and i64 %xor3.i, 0
26+
%cmp.i = icmp slt i64 %and4.i, 0
27+
%0 = trunc i64 %conv73 to i16
28+
%1 = or i16 0, %0
29+
%conv73i = xor i64 0, 0
30+
%andi.i = and i64 0, 0
31+
%xor2i.i = or i64 %andi.i, 0
32+
%subi.i = or i64 %xor2i.i, 0
33+
%xor3i.i = xor i64 %subi.i, %conv73i
34+
%and4i.i = and i64 %xor3i.i, 0
35+
%cmpi.i = icmp slt i64 %and4i.i, 0
36+
%2 = trunc i64 %conv73i to i16
37+
%3 = or i16 0, %2
38+
%4 = select i1 %cmpi.i, i16 0, i16 %3
39+
%5 = select i1 %cmp.i, i16 0, i16 %1
40+
%6 = zext i32 0 to i64
41+
%add.ip = or i64 %6, 0
42+
%orp = or i64 %add.ip, 0
43+
%conv72p = shl i64 %orp, 0
44+
%sextp = ashr i64 %conv72p, 0
45+
%conv73p = xor i64 %sextp, 0
46+
%and.ip = and i64 0, 0
47+
%xor2.ip = or i64 %and.ip, 0
48+
%sub.ip = or i64 %xor2.ip, 0
49+
%xor3.ip = xor i64 %sub.ip, %conv73p
50+
%and4.ip = and i64 %xor3.ip, 0
51+
%cmp.ip = icmp slt i64 %and4.ip, 0
52+
%7 = trunc i64 %conv73p to i16
53+
%8 = or i16 0, %7
54+
%9 = select i1 %cmp.ip, i16 0, i16 %8
55+
%conv76i = and i16 %4, %5
56+
%conv76p = and i16 %conv76i, %9
57+
%10 = zext i32 0 to i64
58+
%add.ip1 = or i64 %10, 0
59+
%orp1 = or i64 %add.ip1, 0
60+
%conv72p1 = shl i64 %orp1, 0
61+
%sextp1 = ashr i64 %conv72p1, 0
62+
%conv73p1 = xor i64 %sextp1, 0
63+
%and.ip1 = and i64 0, 0
64+
%xor2.ip1 = or i64 %and.ip1, 0
65+
%sub.ip1 = or i64 %xor2.ip1, 0
66+
%xor3.ip1 = xor i64 %sub.ip1, %conv73p1
67+
%and4.ip1 = and i64 %xor3.ip1, 0
68+
%cmp.ip1 = icmp slt i64 %and4.ip1, 0
69+
%11 = trunc i64 %conv73p1 to i16
70+
%12 = or i16 0, %11
71+
%13 = select i1 %cmp.ip1, i16 0, i16 %12
72+
%conv76p2 = and i16 %conv76p, %13
73+
ret i16 %conv76p2
74+
}

0 commit comments

Comments
 (0)