Skip to content

Commit 748ae69

Browse files
committed
[RISCV] Add fastcc support for bf16 with Zfbfmin.
1 parent 9e1e36d commit 748ae69

File tree

2 files changed

+74
-2
lines changed

2 files changed

+74
-2
lines changed

llvm/lib/Target/RISCV/RISCVCallingConv.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,8 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
531531
}
532532
}
533533

534-
if (LocVT == MVT::f16 && Subtarget.hasStdExtZfhmin()) {
534+
if ((LocVT == MVT::f16 && Subtarget.hasStdExtZfhmin()) ||
535+
(LocVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())) {
535536
static const MCPhysReg FPR16List[] = {
536537
RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
537538
RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
@@ -584,7 +585,7 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
584585
}
585586
}
586587

587-
if (LocVT == MVT::f16) {
588+
if (LocVT == MVT::f16 || LocVT == MVT::bf16) {
588589
unsigned Offset2 = State.AllocateStack(2, Align(2));
589590
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
590591
return false;
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -target-abi=ilp32 -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s
4+
5+
define fastcc bfloat @callee(<32 x bfloat> %A) nounwind {
6+
; CHECK-LABEL: callee:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: fmv.x.h a0, fa0
9+
; CHECK-NEXT: ret
10+
%B = extractelement <32 x bfloat> %A, i32 0
11+
ret bfloat %B
12+
}
13+
14+
; With the fastcc, arguments will be passed by fa0-fa7 and ft0-ft11.
15+
; The rest will be pushed on the stack.
16+
define bfloat @caller(<32 x bfloat> %A) nounwind {
17+
; CHECK-LABEL: caller:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: addi sp, sp, -32
20+
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
21+
; CHECK-NEXT: fmv.h.x fa0, a0
22+
; CHECK-NEXT: fmv.h.x fa1, a1
23+
; CHECK-NEXT: fmv.h.x fa2, a2
24+
; CHECK-NEXT: fmv.h.x fa3, a3
25+
; CHECK-NEXT: fmv.h.x fa4, a4
26+
; CHECK-NEXT: flh ft0, 32(sp)
27+
; CHECK-NEXT: flh ft1, 36(sp)
28+
; CHECK-NEXT: flh ft2, 40(sp)
29+
; CHECK-NEXT: flh ft3, 44(sp)
30+
; CHECK-NEXT: flh ft4, 48(sp)
31+
; CHECK-NEXT: flh ft5, 52(sp)
32+
; CHECK-NEXT: flh ft6, 56(sp)
33+
; CHECK-NEXT: flh ft7, 60(sp)
34+
; CHECK-NEXT: flh ft8, 64(sp)
35+
; CHECK-NEXT: flh ft9, 68(sp)
36+
; CHECK-NEXT: flh ft10, 72(sp)
37+
; CHECK-NEXT: flh ft11, 76(sp)
38+
; CHECK-NEXT: flh fs0, 80(sp)
39+
; CHECK-NEXT: flh fs1, 84(sp)
40+
; CHECK-NEXT: flh fs2, 88(sp)
41+
; CHECK-NEXT: flh fs3, 92(sp)
42+
; CHECK-NEXT: flh fs4, 96(sp)
43+
; CHECK-NEXT: flh fs5, 100(sp)
44+
; CHECK-NEXT: flh fs6, 104(sp)
45+
; CHECK-NEXT: flh fs7, 108(sp)
46+
; CHECK-NEXT: flh fs8, 112(sp)
47+
; CHECK-NEXT: flh fs9, 116(sp)
48+
; CHECK-NEXT: flh fs10, 120(sp)
49+
; CHECK-NEXT: flh fs11, 124(sp)
50+
; CHECK-NEXT: fmv.h.x fa5, a5
51+
; CHECK-NEXT: fmv.h.x fa6, a6
52+
; CHECK-NEXT: fmv.h.x fa7, a7
53+
; CHECK-NEXT: fsh fs11, 22(sp)
54+
; CHECK-NEXT: fsh fs10, 20(sp)
55+
; CHECK-NEXT: fsh fs9, 18(sp)
56+
; CHECK-NEXT: fsh fs8, 16(sp)
57+
; CHECK-NEXT: fsh fs7, 14(sp)
58+
; CHECK-NEXT: fsh fs6, 12(sp)
59+
; CHECK-NEXT: fsh fs5, 10(sp)
60+
; CHECK-NEXT: fsh fs4, 8(sp)
61+
; CHECK-NEXT: fsh fs3, 6(sp)
62+
; CHECK-NEXT: fsh fs2, 4(sp)
63+
; CHECK-NEXT: fsh fs1, 2(sp)
64+
; CHECK-NEXT: fsh fs0, 0(sp)
65+
; CHECK-NEXT: call callee
66+
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
67+
; CHECK-NEXT: addi sp, sp, 32
68+
; CHECK-NEXT: ret
69+
%C = call fastcc bfloat @callee(<32 x bfloat> %A)
70+
ret bfloat %C
71+
}

0 commit comments

Comments
 (0)