Skip to content

Commit 23bc5b6

Browse files
committed
[AArch64] Mark bf16 as custom for truncating stores & add a comment
While we don't use SVE2 as a fallback for missing NEON instructions for BF16, it is confusing to break symmetry with fp16. While we are here, add a comment explaining how BF16 immediates work.
1 parent aa6ebf9 commit 23bc5b6

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,7 +1571,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15711571
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
15721572
}
15731573

1574-
if (!Subtarget->isNeonAvailable()) {// TODO(majnemer)
1574+
if (!Subtarget->isNeonAvailable()) {
1575+
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Custom);
1576+
setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Custom);
1577+
setTruncStoreAction(MVT::v8f32, MVT::v8bf16, Custom);
1578+
setTruncStoreAction(MVT::v2f64, MVT::v2bf16, Custom);
1579+
setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Custom);
15751580
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
15761581
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Custom);
15771582
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Custom);
@@ -10385,13 +10390,17 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1038510390
bool IsLegal = false;
1038610391
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
1038710392
// 16-bit case when target has full fp16 support.
10393+
// We encode bf16 bit patterns as if they were fp16. This results in very
10394+
// strange looking assembly but should populate the register with appropriate
10395+
// values. Let's say we wanted to encode 0xR3FC0 which is 1.5 in BF16. We will
10396+
// end up encoding this as the imm8 0x7f. This imm8 will be expanded to the
10397+
// FP16 1.9375 which shares the same bit pattern as BF16 1.5.
1038810398
// FIXME: We should be able to handle f128 as well with a clever lowering.
1038910399
const APInt ImmInt = Imm.bitcastToAPInt();
1039010400
if (VT == MVT::f64)
1039110401
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
1039210402
else if (VT == MVT::f32)
1039310403
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
10394-
// TODO(majnemer): double check this...
1039510404
else if (VT == MVT::f16 || VT == MVT::bf16)
1039610405
IsLegal =
1039710406
(Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||

0 commit comments

Comments
 (0)