Skip to content

Commit d77dc87

Browse files
authored
[SLP][REVEC] Fix type comparison and mask transformation for REVEC. (#135310)
When REVEC is enabled, ScalarTy may be a FixedVectorType. Compare its element type to decide if casting is needed. Also apply mask transformation accordingly.
1 parent 8be4bd8 commit d77dc87

File tree

2 files changed

+78
-3
lines changed

2 files changed

+78
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17003,7 +17003,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1700317003
});
1700417004
return IsSigned;
1700517005
};
17006-
if (cast<VectorType>(Op1->getType())->getElementType() != ScalarTy) {
17006+
if (cast<VectorType>(Op1->getType())->getElementType() !=
17007+
ScalarTy->getScalarType()) {
1700717008
assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
1700817009
Op1 = Builder.CreateIntCast(
1700917010
Op1,
@@ -17012,7 +17013,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1701217013
cast<FixedVectorType>(Op1->getType())->getNumElements()),
1701317014
GetOperandSignedness(&OpTE1));
1701417015
}
17015-
if (cast<VectorType>(Op2->getType())->getElementType() != ScalarTy) {
17016+
if (cast<VectorType>(Op2->getType())->getElementType() !=
17017+
ScalarTy->getScalarType()) {
1701617018
assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
1701717019
Op2 = Builder.CreateIntCast(
1701817020
Op2,
@@ -17027,9 +17029,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1702717029
Mask.begin(),
1702817030
std::next(Mask.begin(), E->CombinedEntriesWithIndices.back().second),
1702917031
0);
17032+
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
17033+
if (ScalarTyNumElements != 1) {
17034+
assert(SLPReVec && "Only supported by REVEC.");
17035+
transformScalarShuffleIndiciesToVector(ScalarTyNumElements, Mask);
17036+
}
1703017037
Value *Vec = Builder.CreateShuffleVector(Op1, Mask);
1703117038
Vec = createInsertVector(Builder, Vec, Op2,
17032-
E->CombinedEntriesWithIndices.back().second);
17039+
E->CombinedEntriesWithIndices.back().second *
17040+
ScalarTyNumElements);
1703317041
E->VectorizedValue = Vec;
1703417042
return Vec;
1703517043
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> poison, <4 x i32> zeroinitializer, i64 0)
8+
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP0]], <4 x i32> zeroinitializer, i64 4)
9+
; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> zeroinitializer, i64 8)
10+
; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP2]], <4 x i32> zeroinitializer, i64 12)
11+
; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP3]], <4 x i32> zeroinitializer, i64 16)
12+
; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP4]], <4 x i32> zeroinitializer, i64 20)
13+
; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP5]], <4 x i32> zeroinitializer, i64 24)
14+
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP6]], <4 x i32> zeroinitializer, i64 28)
15+
; CHECK-NEXT: [[TMP8:%.*]] = trunc <32 x i32> [[TMP7]] to <32 x i1>
16+
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> poison, <4 x i1> zeroinitializer, i64 0)
17+
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP9]], <4 x i1> zeroinitializer, i64 4)
18+
; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP10]], <4 x i1> zeroinitializer, i64 8)
19+
; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP11]], <4 x i1> zeroinitializer, i64 12)
20+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
21+
; CHECK: vector.body:
22+
; CHECK-NEXT: [[TMP13:%.*]] = phi <32 x i1> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
23+
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP12]], <16 x i1> [[TMP12]], <16 x i1> [[TMP12]]
24+
; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i1> [[TMP12]], [[TMP12]]
25+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i1> [[TMP14]], <16 x i1> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
26+
; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v16i1(<32 x i1> [[TMP16]], <16 x i1> [[TMP15]], i64 16)
27+
; CHECK-NEXT: [[TMP18]] = or <32 x i1> [[TMP13]], [[TMP17]]
28+
; CHECK-NEXT: br label [[VECTOR_BODY]]
29+
;
30+
entry:
31+
br label %vector.body
32+
33+
vector.body: ; preds = %vector.body, %entry
34+
%vec.phi30 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi, %vector.body ]
35+
%vec.phi31 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi40, %vector.body ]
36+
%vec.phi32 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi41, %vector.body ]
37+
%vec.phi33 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi42, %vector.body ]
38+
%vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi43, %vector.body ]
39+
%vec.phi27 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi44, %vector.body ]
40+
%vec.phi28 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi45, %vector.body ]
41+
%vec.phi29 = phi <4 x i32> [ zeroinitializer, %entry ], [ %predphi46, %vector.body ]
42+
%narrow = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
43+
%narrow66 = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
44+
%narrow67 = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
45+
%narrow68 = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
46+
%not. = xor <4 x i1> zeroinitializer, zeroinitializer
47+
%not.69 = xor <4 x i1> zeroinitializer, zeroinitializer
48+
%not.70 = xor <4 x i1> zeroinitializer, zeroinitializer
49+
%not.71 = xor <4 x i1> zeroinitializer, zeroinitializer
50+
%0 = zext <4 x i1> %narrow to <4 x i32>
51+
%1 = zext <4 x i1> %narrow66 to <4 x i32>
52+
%2 = zext <4 x i1> %narrow67 to <4 x i32>
53+
%3 = zext <4 x i1> %narrow68 to <4 x i32>
54+
%4 = zext <4 x i1> %not. to <4 x i32>
55+
%5 = zext <4 x i1> %not.69 to <4 x i32>
56+
%6 = zext <4 x i1> %not.70 to <4 x i32>
57+
%7 = zext <4 x i1> %not.71 to <4 x i32>
58+
%predphi = or <4 x i32> %vec.phi30, %0
59+
%predphi40 = or <4 x i32> %vec.phi31, %1
60+
%predphi41 = or <4 x i32> %vec.phi32, %2
61+
%predphi42 = or <4 x i32> %vec.phi33, %3
62+
%predphi43 = or <4 x i32> %vec.phi, %4
63+
%predphi44 = or <4 x i32> %vec.phi27, %5
64+
%predphi45 = or <4 x i32> %vec.phi28, %6
65+
%predphi46 = or <4 x i32> %vec.phi29, %7
66+
br label %vector.body
67+
}

0 commit comments

Comments
 (0)