Skip to content

Commit 04e554c

Browse files
committed
[AArch64] Set MaxInterleaving to 4 for Neoverse V2
1 parent acfa8a0 commit 04e554c

File tree

10 files changed

+110
-2
lines changed

10 files changed

+110
-2
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,10 @@ class TargetTransformInfo {
626626
AssumptionCache &AC, TargetLibraryInfo *LibInfo,
627627
HardwareLoopInfo &HWLoopInfo) const;
628628

629+
// Query the target for which minimum vectorization factor epilogue
630+
// vectorization should be considered.
631+
unsigned getEpilogueVectorizationMinVF() const;
632+
629633
/// Query the target whether it would be prefered to create a predicated
630634
/// vector loop, which can avoid the need to emit a scalar epilogue loop.
631635
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const;
@@ -1865,6 +1869,7 @@ class TargetTransformInfo::Concept {
18651869
AssumptionCache &AC,
18661870
TargetLibraryInfo *LibInfo,
18671871
HardwareLoopInfo &HWLoopInfo) = 0;
1872+
virtual unsigned getEpilogueVectorizationMinVF() = 0;
18681873
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = 0;
18691874
virtual TailFoldingStyle
18701875
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
@@ -2319,6 +2324,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
23192324
HardwareLoopInfo &HWLoopInfo) override {
23202325
return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
23212326
}
2327+
unsigned getEpilogueVectorizationMinVF() override {
2328+
return Impl.getEpilogueVectorizationMinVF();
2329+
}
23222330
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
23232331
return Impl.preferPredicateOverEpilogue(TFI);
23242332
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ class TargetTransformInfoImplBase {
192192
return false;
193193
}
194194

195+
unsigned getEpilogueVectorizationMinVF() const { return 16; }
196+
195197
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; }
196198

197199
TailFoldingStyle

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
667667
return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
668668
}
669669

670+
unsigned getEpilogueVectorizationMinVF() {
671+
return BaseT::getEpilogueVectorizationMinVF();
672+
}
673+
670674
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
671675
return BaseT::preferPredicateOverEpilogue(TFI);
672676
}

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,10 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
352352
return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
353353
}
354354

355+
unsigned TargetTransformInfo::getEpilogueVectorizationMinVF() const {
356+
return TTIImpl->getEpilogueVectorizationMinVF();
357+
}
358+
355359
bool TargetTransformInfo::preferPredicateOverEpilogue(
356360
TailFoldingInfo *TFI) const {
357361
return TTIImpl->preferPredicateOverEpilogue(TFI);

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
234234
MaxBytesForLoopAlignment = 16;
235235
break;
236236
case NeoverseV2:
237+
EpilogueVectorizationMinVF = 8;
238+
MaxInterleaveFactor = 4;
237239
// Specialize cost for Neoverse-V2.
238240
ScatterOverhead = 13;
239241
LLVM_FALLTHROUGH;

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
5656
bool ATTRIBUTE = DEFAULT;
5757
#include "AArch64GenSubtargetInfo.inc"
5858

59+
unsigned EpilogueVectorizationMinVF = 16;
5960
uint8_t MaxInterleaveFactor = 2;
6061
uint8_t VectorInsertExtractBaseCost = 2;
6162
uint16_t CacheLineSize = 0;
@@ -225,6 +226,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
225226
hasFuseAdrpAdd() || hasFuseLiterals();
226227
}
227228

229+
unsigned getEpilogueVectorizationMinVF() const { return EpilogueVectorizationMinVF; }
228230
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
229231
unsigned getVectorInsertExtractBaseCost() const;
230232
unsigned getCacheLineSize() const override { return CacheLineSize; }

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4581,6 +4581,10 @@ static bool containsDecreasingPointers(Loop *TheLoop,
45814581
return false;
45824582
}
45834583

4584+
unsigned AArch64TTIImpl::getEpilogueVectorizationMinVF() const {
4585+
return ST->getEpilogueVectorizationMinVF();
4586+
}
4587+
45844588
bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
45854589
if (!ST->hasSVE())
45864590
return false;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
376376
return ST->useFixedOverScalableIfEqualCost();
377377
}
378378

379+
unsigned getEpilogueVectorizationMinVF() const;
380+
379381
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
380382

381383
bool supportsScalableVectors() const {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ static cl::opt<unsigned> EpilogueVectorizationForceVF(
185185
"loops."));
186186

187187
static cl::opt<unsigned> EpilogueVectorizationMinVF(
188-
"epilogue-vectorization-minimum-VF", cl::init(16), cl::Hidden,
188+
"epilogue-vectorization-minimum-VF", cl::Hidden,
189189
cl::desc("Only loops with vectorization factor equal to or larger than "
190190
"the specified value are considered for epilogue vectorization."));
191191

@@ -4644,7 +4644,10 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
46444644
if (TTI.getMaxInterleaveFactor(VF) <= 1)
46454645
return false;
46464646

4647-
if ((Multiplier * VF.getKnownMinValue()) >= EpilogueVectorizationMinVF)
4647+
unsigned MinVFTreshold = EpilogueVectorizationMinVF.getNumOccurrences() > 0 ?
4648+
EpilogueVectorizationMinVF : TTI.getEpilogueVectorizationMinVF();
4649+
4650+
if ((Multiplier * VF.getKnownMinValue()) >= MinVFTreshold)
46484651
return true;
46494652
return false;
46504653
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s
2+
3+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define noundef i32 @V1(ptr noalias nocapture noundef %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) #0 {
7+
; CHECK-LABEL: @V1(
8+
; CHECK-NOT: vec.epilog.ph:
9+
; CHECK-NOT: vec.epilog.vector.body:
10+
; CHECK-NOT: vec.epilog.middle.block:
11+
; CHECK-NOT: vec.epilog.scalar.ph:
12+
;
13+
entry:
14+
%4 = icmp sgt i32 %2, 0
15+
br i1 %4, label %5, label %8
16+
17+
5:
18+
%6 = zext nneg i32 %2 to i64
19+
br label %9
20+
21+
7:
22+
br label %8
23+
24+
8:
25+
ret i32 42
26+
27+
9:
28+
%10 = phi i64 [ 0, %5 ], [ %16, %9 ]
29+
%11 = getelementptr inbounds double, ptr %0, i64 %10
30+
%12 = load double, ptr %11, align 8
31+
%13 = getelementptr inbounds double, ptr %1, i64 %10
32+
%14 = load double, ptr %13, align 8
33+
%15 = fadd fast double %14, %12
34+
store double %15, ptr %11, align 8
35+
%16 = add nuw nsw i64 %10, 1
36+
%17 = icmp eq i64 %16, %6
37+
br i1 %17, label %7, label %9
38+
}
39+
40+
define noundef i32 @V2(ptr noalias nocapture noundef %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) #1 {
41+
;
42+
; CHECK-LABEL: @V2(
43+
; CHECK: vec.epilog.ph:
44+
; CHECK: vec.epilog.vector.body:
45+
; CHECK: vec.epilog.middle.block:
46+
; CHECK: vec.epilog.scalar.ph:
47+
;
48+
entry:
49+
%4 = icmp sgt i32 %2, 0
50+
br i1 %4, label %5, label %8
51+
52+
5:
53+
%6 = zext nneg i32 %2 to i64
54+
br label %9
55+
56+
7:
57+
br label %8
58+
59+
8:
60+
ret i32 42
61+
62+
9:
63+
%10 = phi i64 [ 0, %5 ], [ %16, %9 ]
64+
%11 = getelementptr inbounds double, ptr %0, i64 %10
65+
%12 = load double, ptr %11, align 8
66+
%13 = getelementptr inbounds double, ptr %1, i64 %10
67+
%14 = load double, ptr %13, align 8
68+
%15 = fadd fast double %14, %12
69+
store double %15, ptr %11, align 8
70+
%16 = add nuw nsw i64 %10, 1
71+
%17 = icmp eq i64 %16, %6
72+
br i1 %17, label %7, label %9
73+
}
74+
75+
attributes #0 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable vscale_range(1,16) "approx-func-fp-math"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "target-cpu"="neoverse-v1" "target-features"="+sve2" "unsafe-fp-math"="true" }
76+
77+
attributes #1 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable vscale_range(1,16) "approx-func-fp-math"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "target-cpu"="neoverse-v2" "target-features"="+sve2" "unsafe-fp-math"="true" }

0 commit comments

Comments
 (0)