Skip to content

Commit fd534e5

Browse files
authored
[AArch64] Do not split bfloat HFA args between regs and stack (#128909)
In AAPCS64, __fp16 and __bf16 share the same machine type, so they should be treated the same way for argument passing. In particular, arrays of them need to be treated as homogeneous aggregates, and not split between registers and the stack.
1 parent 56762b7 commit fd534e5

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

llvm/lib/Target/AArch64/AArch64CallingConvention.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
142142
ArrayRef<MCPhysReg> RegList;
143143
if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
144144
RegList = XRegList;
145-
else if (LocVT.SimpleTy == MVT::f16)
145+
else if (LocVT.SimpleTy == MVT::f16 || LocVT.SimpleTy == MVT::bf16)
146146
RegList = HRegList;
147147
else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
148148
RegList = SRegList;

llvm/test/CodeGen/AArch64/argument-blocks.ll

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,10 @@ define half @test_f16_blocked([7 x double], [2 x half] %in) {
195195
%val = extractvalue [2 x half] %in, 0
196196
ret half %val
197197
}
198+
199+
define bfloat @test_bf16_blocked([7 x double], [2 x bfloat] %in) {
200+
; CHECK-LABEL: test_bf16_blocked:
201+
; CHECK: ldr h0, [sp]
202+
%val = extractvalue [2 x bfloat] %in, 0
203+
ret bfloat %val
204+
}

0 commit comments

Comments
 (0)