Skip to content

Commit d02a704

Browse files
authored
[SLP][REVEC] Make getExtractWithExtendCost support FixedVectorType as Dst. (#134822)
1 parent 64b5e8f commit d02a704

File tree

2 files changed

+50
-3
lines changed

2 files changed

+50
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5410,6 +5410,24 @@ static InstructionCost getVectorInstrCost(
54105410
ScalarUserAndIdx);
54115411
}
54125412

5413+
/// This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst
5414+
/// is a FixedVectorType, a vector will be extracted instead of a scalar.
5415+
static InstructionCost getExtractWithExtendCost(
5416+
const TargetTransformInfo &TTI, unsigned Opcode, Type *Dst,
5417+
VectorType *VecTy, unsigned Index,
5418+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
5419+
if (auto *ScalarTy = dyn_cast<FixedVectorType>(Dst)) {
5420+
assert(SLPReVec && "Only supported by REVEC.");
5421+
auto *SubTp =
5422+
getWidenedType(VecTy->getElementType(), ScalarTy->getNumElements());
5423+
return getShuffleCost(TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
5424+
Index * ScalarTy->getNumElements(), SubTp) +
5425+
TTI.getCastInstrCost(Opcode, Dst, SubTp, TTI::CastContextHint::None,
5426+
CostKind);
5427+
}
5428+
return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
5429+
}
5430+
54135431
/// Correctly creates insert_subvector, checking that the index is multiple of
54145432
/// the subvectors length. Otherwise, generates shuffle using \p Generator or
54155433
/// using default shuffle.
@@ -14155,13 +14173,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
1415514173
const TreeEntry *Entry = &EU.E;
1415614174
auto It = MinBWs.find(Entry);
1415714175
if (It != MinBWs.end()) {
14158-
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
14176+
Type *MinTy = IntegerType::get(F->getContext(), It->second.first);
14177+
if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
14178+
MinTy = getWidenedType(MinTy, VecTy->getNumElements());
1415914179
unsigned Extend = isKnownNonNegative(EU.Scalar, SimplifyQuery(*DL))
1416014180
? Instruction::ZExt
1416114181
: Instruction::SExt;
1416214182
VecTy = getWidenedType(MinTy, BundleWidth);
14163-
ExtraCost = TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
14164-
VecTy, EU.Lane);
14183+
ExtraCost =
14184+
getExtractWithExtendCost(*TTI, Extend, ScalarTy, VecTy, EU.Lane);
1416514185
} else {
1416614186
ExtraCost =
1416714187
getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.2-512 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = sub <8 x i64> zeroinitializer, splat (i64 1)
8+
; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i64> zeroinitializer, zeroinitializer
9+
; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP0]], zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i32>
11+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
12+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr null, i64 32
13+
; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4
14+
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[TMP5]], align 4
15+
; CHECK-NEXT: ret void
16+
;
17+
entry:
18+
%0 = sub <8 x i64> zeroinitializer, splat (i64 1)
19+
%1 = sub <8 x i64> zeroinitializer, zeroinitializer
20+
%2 = or <8 x i64> %0, zeroinitializer
21+
%3 = trunc <8 x i64> %0 to <8 x i32>
22+
%4 = trunc <8 x i64> %1 to <8 x i32>
23+
%5 = getelementptr i8, ptr null, i64 32
24+
store <8 x i32> %3, ptr null, align 4
25+
store <8 x i32> %4, ptr %5, align 4
26+
ret void
27+
}

0 commit comments

Comments
 (0)