Skip to content

Commit f87a9db

Browse files
committed
[ARM] Expand fp64 bf16 converts similarly to f32
This helps with +fp64 targets where the f64s are legal and not previously lowered. It can treat fpextends as a shift + cvt and fptrunc can use a libcall.
1 parent 579ced4 commit f87a9db

File tree

2 files changed

+41
-26
lines changed

2 files changed

+41
-26
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
806806
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
807807
} else {
808808
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
809+
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand);
809810
setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);
811+
setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom);
810812
}
811813

812814
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

llvm/test/CodeGen/Thumb2/bf16-instructions.ll

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -mtriple thumbv8.1m.main-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP
3-
; RUN: llc < %s -mtriple thumbv8.1m.main-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP
3+
; RUN: llc < %s -mtriple thumbv8.1m.main-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP,CHECK-FPNO64
4+
; RUN: llc < %s -mtriple thumbv8.1m.main-none-eabihf -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-FP,CHECK-FP64
45

56
define bfloat @test_fadd(bfloat %a, bfloat %b) {
67
; CHECK-NOFP-LABEL: test_fadd:
@@ -259,9 +260,8 @@ define void @test_truncstore64(double %a, ptr %b) {
259260
; CHECK-FP-NEXT: .save {r4, lr}
260261
; CHECK-FP-NEXT: push {r4, lr}
261262
; CHECK-FP-NEXT: mov r4, r0
262-
; CHECK-FP-NEXT: vmov r0, r1, d0
263-
; CHECK-FP-NEXT: bl __aeabi_d2f
264-
; CHECK-FP-NEXT: lsrs r0, r0, #16
263+
; CHECK-FP-NEXT: bl __truncdfbf2
264+
; CHECK-FP-NEXT: vmov r0, s0
265265
; CHECK-FP-NEXT: strh r0, [r4]
266266
; CHECK-FP-NEXT: pop {r4, pc}
267267
%r = fptrunc double %a to bfloat
@@ -312,15 +312,23 @@ define double @test_loadext64(ptr %a) {
312312
; CHECK-NOFP-NEXT: bl __aeabi_f2d
313313
; CHECK-NOFP-NEXT: pop {r7, pc}
314314
;
315-
; CHECK-FP-LABEL: test_loadext64:
316-
; CHECK-FP: @ %bb.0:
317-
; CHECK-FP-NEXT: .save {r7, lr}
318-
; CHECK-FP-NEXT: push {r7, lr}
319-
; CHECK-FP-NEXT: ldrh r0, [r0]
320-
; CHECK-FP-NEXT: lsls r0, r0, #16
321-
; CHECK-FP-NEXT: bl __aeabi_f2d
322-
; CHECK-FP-NEXT: vmov d0, r0, r1
323-
; CHECK-FP-NEXT: pop {r7, pc}
315+
; CHECK-FPNO64-LABEL: test_loadext64:
316+
; CHECK-FPNO64: @ %bb.0:
317+
; CHECK-FPNO64-NEXT: .save {r7, lr}
318+
; CHECK-FPNO64-NEXT: push {r7, lr}
319+
; CHECK-FPNO64-NEXT: ldrh r0, [r0]
320+
; CHECK-FPNO64-NEXT: lsls r0, r0, #16
321+
; CHECK-FPNO64-NEXT: bl __aeabi_f2d
322+
; CHECK-FPNO64-NEXT: vmov d0, r0, r1
323+
; CHECK-FPNO64-NEXT: pop {r7, pc}
324+
;
325+
; CHECK-FP64-LABEL: test_loadext64:
326+
; CHECK-FP64: @ %bb.0:
327+
; CHECK-FP64-NEXT: ldrh r0, [r0]
328+
; CHECK-FP64-NEXT: lsls r0, r0, #16
329+
; CHECK-FP64-NEXT: vmov s0, r0
330+
; CHECK-FP64-NEXT: vcvt.f64.f32 d0, s0
331+
; CHECK-FP64-NEXT: bx lr
324332
%r = load bfloat, ptr %a
325333
%d = fpext bfloat %r to double
326334
ret double %d
@@ -1374,10 +1382,7 @@ define bfloat @test_fptrunc_double(double %a) {
13741382
; CHECK-FP: @ %bb.0:
13751383
; CHECK-FP-NEXT: .save {r7, lr}
13761384
; CHECK-FP-NEXT: push {r7, lr}
1377-
; CHECK-FP-NEXT: vmov r0, r1, d0
1378-
; CHECK-FP-NEXT: bl __aeabi_d2f
1379-
; CHECK-FP-NEXT: lsrs r0, r0, #16
1380-
; CHECK-FP-NEXT: vmov.f16 s0, r0
1385+
; CHECK-FP-NEXT: bl __truncdfbf2
13811386
; CHECK-FP-NEXT: vmov.f16 r0, s0
13821387
; CHECK-FP-NEXT: vmov s0, r0
13831388
; CHECK-FP-NEXT: pop {r7, pc}
@@ -1410,15 +1415,23 @@ define double @test_fpext_double(bfloat %a) {
14101415
; CHECK-NOFP-NEXT: bl __aeabi_f2d
14111416
; CHECK-NOFP-NEXT: pop {r7, pc}
14121417
;
1413-
; CHECK-FP-LABEL: test_fpext_double:
1414-
; CHECK-FP: @ %bb.0:
1415-
; CHECK-FP-NEXT: .save {r7, lr}
1416-
; CHECK-FP-NEXT: push {r7, lr}
1417-
; CHECK-FP-NEXT: vmov r0, s0
1418-
; CHECK-FP-NEXT: lsls r0, r0, #16
1419-
; CHECK-FP-NEXT: bl __aeabi_f2d
1420-
; CHECK-FP-NEXT: vmov d0, r0, r1
1421-
; CHECK-FP-NEXT: pop {r7, pc}
1418+
; CHECK-FPNO64-LABEL: test_fpext_double:
1419+
; CHECK-FPNO64: @ %bb.0:
1420+
; CHECK-FPNO64-NEXT: .save {r7, lr}
1421+
; CHECK-FPNO64-NEXT: push {r7, lr}
1422+
; CHECK-FPNO64-NEXT: vmov r0, s0
1423+
; CHECK-FPNO64-NEXT: lsls r0, r0, #16
1424+
; CHECK-FPNO64-NEXT: bl __aeabi_f2d
1425+
; CHECK-FPNO64-NEXT: vmov d0, r0, r1
1426+
; CHECK-FPNO64-NEXT: pop {r7, pc}
1427+
;
1428+
; CHECK-FP64-LABEL: test_fpext_double:
1429+
; CHECK-FP64: @ %bb.0:
1430+
; CHECK-FP64-NEXT: vmov r0, s0
1431+
; CHECK-FP64-NEXT: lsls r0, r0, #16
1432+
; CHECK-FP64-NEXT: vmov s0, r0
1433+
; CHECK-FP64-NEXT: vcvt.f64.f32 d0, s0
1434+
; CHECK-FP64-NEXT: bx lr
14221435
%r = fpext bfloat %a to double
14231436
ret double %r
14241437
}

0 commit comments

Comments
 (0)