Skip to content

Commit d04b0f8

Browse files
committed
[SLP][REVEC] Make getGatherCost support vector instructions.
Fix "Vector size mismatch".
1 parent 2d8e0b2 commit d04b0f8

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11361,7 +11361,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1136111361
// Find the cost of inserting/extracting values from the vector.
1136211362
// Check if the same elements are inserted several times and count them as
1136311363
// shuffle candidates.
11364-
APInt ShuffledElements = APInt::getZero(VL.size());
11364+
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
11365+
APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
1136511366
DenseMap<Value *, unsigned> UniqueElements;
1136611367
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1136711368
InstructionCost Cost;
@@ -11381,7 +11382,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1138111382
Value *V = VL[I];
1138211383
// No need to shuffle duplicates for constants.
1138311384
if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
11384-
ShuffledElements.setBit(I);
11385+
ShuffledElements.setBits(I * ScalarTyNumElements,
11386+
I * ScalarTyNumElements + ScalarTyNumElements);
1138511387
ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
1138611388
continue;
1138711389
}
@@ -11394,7 +11396,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1139411396
}
1139511397

1139611398
DuplicateNonConst = true;
11397-
ShuffledElements.setBit(I);
11399+
ShuffledElements.setBits(I * ScalarTyNumElements,
11400+
I * ScalarTyNumElements + ScalarTyNumElements);
1139811401
ShuffleMask[I] = Res.first->second;
1139911402
}
1140011403
if (ForPoisonSrc)

llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
2727
}
2828

2929
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
30+
; CHECK-LABEL: @logical_and_icmp_clamp(
31+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
32+
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
33+
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
34+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
35+
; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
36+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
37+
; CHECK-NEXT: ret i1 [[TMP6]]
38+
;
3039
%x0 = extractelement <4 x i32> %x, i32 0
3140
%x1 = extractelement <4 x i32> %x, i32 1
3241
%x2 = extractelement <4 x i32> %x, i32 2

0 commit comments

Comments
 (0)