Skip to content

Commit 41f1b46

Browse files
authored
[RISCV] Account for zvfhmin and zvfbfmin promotion in register usage (#108370)
A half with only zvfhmin or bfloat will end up getting promoted to a f32 for most instructions. Unless the loop consists only of memory ops and permutation instructions which don't need promoted (is this common?), we'll end up using double the LMUL than what's currently being returned by getRegUsageForType. Since this is used by the loop vectorizer, it seems better to be conservative and assume that any usage of a zvfhmin half/bfloat will end up being widened to a f32
1 parent 6af2f22 commit 41f1b46

File tree

3 files changed

+76
-1
lines changed

3 files changed

+76
-1
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2030,8 +2030,15 @@ void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
20302030
}
20312031

20322032
unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
2033-
TypeSize Size = DL.getTypeSizeInBits(Ty);
20342033
if (Ty->isVectorTy()) {
2034+
// f16 with only zvfhmin and bf16 will be promoted to f32
2035+
Type *EltTy = cast<VectorType>(Ty)->getElementType();
2036+
if ((EltTy->isHalfTy() && !ST->hasVInstructionsF16()) ||
2037+
EltTy->isBFloatTy())
2038+
Ty = VectorType::get(Type::getFloatTy(Ty->getContext()),
2039+
cast<VectorType>(Ty));
2040+
2041+
TypeSize Size = DL.getTypeSizeInBits(Ty);
20352042
if (Size.isScalable() && ST->hasVInstructions())
20362043
return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
20372044

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
2+
3+
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
4+
; CHECK-LABEL: add
5+
; CHECK: LV(REG): Found max usage: 2 item
6+
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
7+
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
8+
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
9+
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
10+
11+
entry:
12+
%conv = zext i32 %size to i64
13+
%cmp10.not = icmp eq i32 %size, 0
14+
br i1 %cmp10.not, label %for.cond.cleanup, label %for.body
15+
16+
for.cond.cleanup:
17+
ret void
18+
19+
for.body:
20+
%i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
21+
%arrayidx = getelementptr inbounds bfloat, ptr %src1, i64 %i.011
22+
%0 = load bfloat, ptr %arrayidx, align 4
23+
%arrayidx2 = getelementptr inbounds bfloat, ptr %src2, i64 %i.011
24+
%1 = load bfloat, ptr %arrayidx2, align 4
25+
%add = fadd bfloat %0, %1
26+
%arrayidx3 = getelementptr inbounds bfloat, ptr %result, i64 %i.011
27+
store bfloat %add, ptr %arrayidx3, align 4
28+
%add4 = add nuw nsw i64 %i.011, 1
29+
%exitcond.not = icmp eq i64 %add4, %conv
30+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
31+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
2+
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
3+
4+
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
5+
; CHECK-LABEL: add
6+
; ZVFH: LV(REG): Found max usage: 2 item
7+
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
8+
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
9+
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
10+
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
11+
; ZVFHMIN: LV(REG): Found max usage: 2 item
12+
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
13+
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
14+
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item
15+
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
16+
17+
entry:
18+
%conv = zext i32 %size to i64
19+
%cmp10.not = icmp eq i32 %size, 0
20+
br i1 %cmp10.not, label %for.cond.cleanup, label %for.body
21+
22+
for.cond.cleanup:
23+
ret void
24+
25+
for.body:
26+
%i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
27+
%arrayidx = getelementptr inbounds half, ptr %src1, i64 %i.011
28+
%0 = load half, ptr %arrayidx, align 4
29+
%arrayidx2 = getelementptr inbounds half, ptr %src2, i64 %i.011
30+
%1 = load half, ptr %arrayidx2, align 4
31+
%add = fadd half %0, %1
32+
%arrayidx3 = getelementptr inbounds half, ptr %result, i64 %i.011
33+
store half %add, ptr %arrayidx3, align 4
34+
%add4 = add nuw nsw i64 %i.011, 1
35+
%exitcond.not = icmp eq i64 %add4, %conv
36+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
37+
}

0 commit comments

Comments
 (0)