Skip to content

Commit 60ac347

Browse files
authored
[SLP][REVEC] Make getAltInstrMask and getGatherCost vectorize vector instructions. (#99461)
1 parent 9d315bc commit 60ac347

File tree

2 files changed

+70
-5
lines changed

2 files changed

+70
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,10 +1047,13 @@ static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
10471047
/// Opcode1.
10481048
SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
10491049
unsigned Opcode1) {
1050-
SmallBitVector OpcodeMask(VL.size(), false);
1050+
Type *ScalarTy = VL[0]->getType();
1051+
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
1052+
SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false);
10511053
for (unsigned Lane : seq<unsigned>(VL.size()))
10521054
if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
1053-
OpcodeMask.set(Lane);
1055+
OpcodeMask.set(Lane * ScalarTyNumElements,
1056+
Lane * ScalarTyNumElements + ScalarTyNumElements);
10541057
return OpcodeMask;
10551058
}
10561059

@@ -11491,7 +11494,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1149111494
// Find the cost of inserting/extracting values from the vector.
1149211495
// Check if the same elements are inserted several times and count them as
1149311496
// shuffle candidates.
11494-
APInt ShuffledElements = APInt::getZero(VL.size());
11497+
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
11498+
APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
1149511499
DenseMap<Value *, unsigned> UniqueElements;
1149611500
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1149711501
InstructionCost Cost;
@@ -11511,7 +11515,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1151111515
Value *V = VL[I];
1151211516
// No need to shuffle duplicates for constants.
1151311517
if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
11514-
ShuffledElements.setBit(I);
11518+
ShuffledElements.setBits(I * ScalarTyNumElements,
11519+
I * ScalarTyNumElements + ScalarTyNumElements);
1151511520
ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
1151611521
continue;
1151711522
}
@@ -11524,7 +11529,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1152411529
}
1152511530

1152611531
DuplicateNonConst = true;
11527-
ShuffledElements.setBit(I);
11532+
ShuffledElements.setBits(I * ScalarTyNumElements,
11533+
I * ScalarTyNumElements + ScalarTyNumElements);
1152811534
ShuffleMask[I] = Res.first->second;
1152911535
}
1153011536
if (ForPoisonSrc)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=slp-vectorizer -slp-revec -mtriple=x86_64 -S | FileCheck %s
3+
4+
define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
5+
; CHECK-LABEL: @logical_and_icmp_diff_preds(
6+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
7+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
8+
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
9+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
10+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
11+
; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
12+
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
13+
; CHECK-NEXT: ret i1 [[TMP7]]
14+
;
15+
%x0 = extractelement <4 x i32> %x, i32 0
16+
%x1 = extractelement <4 x i32> %x, i32 1
17+
%x2 = extractelement <4 x i32> %x, i32 2
18+
%x3 = extractelement <4 x i32> %x, i32 3
19+
%c0 = icmp ult i32 %x0, 0
20+
%c1 = icmp slt i32 %x1, 0
21+
%c2 = icmp sgt i32 %x2, 0
22+
%c3 = icmp slt i32 %x3, 0
23+
%s1 = select i1 %c0, i1 %c1, i1 false
24+
%s2 = select i1 %s1, i1 %c2, i1 false
25+
%s3 = select i1 %s2, i1 %c3, i1 false
26+
ret i1 %s3
27+
}
28+
29+
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
30+
; CHECK-LABEL: @logical_and_icmp_clamp(
31+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
32+
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
33+
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
34+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
35+
; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
36+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
37+
; CHECK-NEXT: ret i1 [[TMP6]]
38+
;
39+
%x0 = extractelement <4 x i32> %x, i32 0
40+
%x1 = extractelement <4 x i32> %x, i32 1
41+
%x2 = extractelement <4 x i32> %x, i32 2
42+
%x3 = extractelement <4 x i32> %x, i32 3
43+
%c0 = icmp slt i32 %x0, 42
44+
%c1 = icmp slt i32 %x1, 42
45+
%c2 = icmp slt i32 %x2, 42
46+
%c3 = icmp slt i32 %x3, 42
47+
%d0 = icmp sgt i32 %x0, 17
48+
%d1 = icmp sgt i32 %x1, 17
49+
%d2 = icmp sgt i32 %x2, 17
50+
%d3 = icmp sgt i32 %x3, 17
51+
%s1 = select i1 %c0, i1 %c1, i1 false
52+
%s2 = select i1 %s1, i1 %c2, i1 false
53+
%s3 = select i1 %s2, i1 %c3, i1 false
54+
%s4 = select i1 %s3, i1 %d0, i1 false
55+
%s5 = select i1 %s4, i1 %d1, i1 false
56+
%s6 = select i1 %s5, i1 %d2, i1 false
57+
%s7 = select i1 %s6, i1 %d3, i1 false
58+
ret i1 %s7
59+
}

0 commit comments

Comments
 (0)