Skip to content

Commit 2f782c9

Browse files
committed
[RISCV] Don't vectorize for loops with small trip count
Inspired by https://reviews.llvm.org/D130755. I don't know the logic behind the value 5, it is copied from AArch64. For some tests, I have to change the trip count so that we don't break what they are testing.
1 parent 7eccafc commit 2f782c9

File tree

7 files changed

+232
-187
lines changed

7 files changed

+232
-187
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ static cl::opt<unsigned> SLPMaxVF(
3737
"exclusively by SLP vectorizer."),
3838
cl::Hidden);
3939

40+
static cl::opt<unsigned>
41+
RVVMinTripCount("riscv-v-min-trip-count",
42+
cl::desc("Set the lower bound of a trip count to decide on "
43+
"vectorization while tail-folding."),
44+
cl::init(5), cl::Hidden);
45+
4046
InstructionCost
4147
RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
4248
TTI::TargetCostKind CostKind) {
@@ -2598,6 +2604,10 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
25982604
return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
25992605
}
26002606

2607+
unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const {
2608+
return RVVMinTripCount;
2609+
}
2610+
26012611
TTI::AddressingModeKind
26022612
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
26032613
ScalarEvolution *SE) const {

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
388388

389389
bool enableInterleavedAccessVectorization() { return true; }
390390

391+
unsigned getMinTripCountTailFoldingThreshold() const;
392+
391393
enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
392394
unsigned getNumberOfRegisters(unsigned ClassID) const {
393395
switch (ClassID) {

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 13 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -45,47 +45,19 @@ for.end: ; preds = %for.body
4545
define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
4646
; CHECK-LABEL: @trip3_i8(
4747
; CHECK-NEXT: entry:
48-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
49-
; CHECK: vector.ph:
50-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
51-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
52-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
53-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP2]]
54-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
55-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
56-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
57-
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
58-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
59-
; CHECK: vector.body:
60-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 3)
61-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
62-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
63-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP9]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
64-
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
65-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
66-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
67-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
68-
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
69-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
70-
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
71-
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
72-
; CHECK: middle.block:
73-
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
74-
; CHECK: scalar.ph:
75-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
7648
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
7749
; CHECK: for.body:
78-
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
79-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
50+
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
51+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]]
8052
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
8153
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
82-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
54+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 [[I_08]]
8355
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
8456
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
8557
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
8658
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
8759
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 3
88-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
60+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
8961
; CHECK: for.end:
9062
; CHECK-NEXT: ret void
9163
;
@@ -112,47 +84,19 @@ for.end: ; preds = %for.body
11284
define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
11385
; CHECK-LABEL: @trip5_i8(
11486
; CHECK-NEXT: entry:
115-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
116-
; CHECK: vector.ph:
117-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
118-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
119-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
120-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP2]]
121-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
122-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
123-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
124-
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
125-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
126-
; CHECK: vector.body:
127-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5)
128-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
129-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
130-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
131-
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
132-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
133-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
134-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
135-
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
136-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
137-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
138-
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
139-
; CHECK: middle.block:
140-
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
141-
; CHECK: scalar.ph:
142-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
14387
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
14488
; CHECK: for.body:
145-
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
146-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
89+
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
90+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]]
14791
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
14892
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
149-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
93+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 [[I_08]]
15094
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
15195
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
15296
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
15397
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
15498
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
155-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
99+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
156100
; CHECK: for.end:
157101
; CHECK-NEXT: ret void
158102
;
@@ -219,7 +163,7 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
219163
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
220164
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
221165
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8
222-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
166+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
223167
; CHECK: for.end:
224168
; CHECK-NEXT: ret void
225169
;
@@ -277,7 +221,7 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
277221
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
278222
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
279223
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
280-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
224+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
281225
; CHECK: for.end:
282226
; CHECK-NEXT: ret void
283227
;
@@ -336,7 +280,7 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
336280
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
337281
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
338282
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 32
339-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
283+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
340284
; CHECK: for.end:
341285
; CHECK-NEXT: ret void
342286
;
@@ -379,7 +323,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
379323
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[TMP5]], align 1
380324
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
381325
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
382-
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
326+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
383327
; CHECK: middle.block:
384328
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
385329
; CHECK: scalar.ph:
@@ -396,7 +340,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
396340
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
397341
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
398342
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 24
399-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
343+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
400344
; CHECK: for.end:
401345
; CHECK-NEXT: ret void
402346
;

0 commit comments

Comments
 (0)